Merge pull request #31 from mirkobunse/devel

Continuous Integration with GitHub Actions
This commit is contained in:
Alejandro Moreo Fernandez 2024-04-18 09:31:58 +02:00 committed by GitHub
commit bee1c4e678
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 56 additions and 7 deletions

33
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,33 @@
name: CI
on:
pull_request:
push:
branches:
- main
- devel
jobs:
# take out unit tests
test:
name: Unit tests (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.11"
env:
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[bayes,tests]
- name: Test with unittest
run: python -m unittest

View File

@ -549,7 +549,7 @@ class Dataset:
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
def reduce(self, n_train=100, n_test=100):
def reduce(self, n_train=100, n_test=100, random_state=None):
"""
Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
@ -557,6 +557,14 @@ class Dataset:
:param n_test: number of test documents to keep (default 100)
:return: self
"""
self.training = self.training.sampling(n_train, *self.training.prevalence())
self.test = self.test.sampling(n_test, *self.test.prevalence())
self.training = self.training.sampling(
n_train,
*self.training.prevalence(),
random_state = random_state
)
self.test = self.test.sampling(
n_test,
*self.test.prevalence(),
random_state = random_state
)
return self

View File

@ -1,3 +1,4 @@
import os
import unittest
from sklearn.feature_extraction.text import TfidfVectorizer
@ -77,6 +78,9 @@ class TestDatasets(unittest.TestCase):
self._check_dataset(dataset)
def test_lequa2022(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_lequa2022 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
return
for dataset_name in LEQUA2022_VECTOR_TASKS:
print(f'loading dataset {dataset_name}...', end='')
@ -104,6 +108,10 @@ class TestDatasets(unittest.TestCase):
def test_IFCB(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
return
print(f'loading dataset IFCB.')
for mod_sel in [False, True]:
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)

View File

@ -4,7 +4,6 @@ import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
import util
from quapy.method.aggregative import PACC
from quapy.model_selection import GridSearchQ
from quapy.protocol import APP
@ -20,7 +19,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': [0.000001, 10.]}
@ -42,7 +41,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500)
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': np.logspace(-3,3,7)}
@ -80,7 +79,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(SlowLR())
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': np.logspace(-1,1,3)}

View File

@ -125,6 +125,7 @@ setup(
# projects.
extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'],
'tests': ['certifi'],
},
# If there are data files included in your packages that need to be