Merge pull request #31 from mirkobunse/devel

Continuous Integration with GitHub Actions
This commit is contained in:
Alejandro Moreo Fernandez 2024-04-18 09:31:58 +02:00 committed by GitHub
commit bee1c4e678
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 56 additions and 7 deletions

33
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,33 @@
name: CI
on:
pull_request:
push:
branches:
- main
- devel
jobs:
# take out unit tests
test:
name: Unit tests (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.11"
env:
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[bayes,tests]
- name: Test with unittest
run: python -m unittest

View File

@ -549,7 +549,7 @@ class Dataset:
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})') yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
def reduce(self, n_train=100, n_test=100): def reduce(self, n_train=100, n_test=100, random_state=None):
""" """
Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set. Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
@ -557,6 +557,14 @@ class Dataset:
:param n_test: number of test documents to keep (default 100) :param n_test: number of test documents to keep (default 100)
:return: self :return: self
""" """
self.training = self.training.sampling(n_train, *self.training.prevalence()) self.training = self.training.sampling(
self.test = self.test.sampling(n_test, *self.test.prevalence()) n_train,
*self.training.prevalence(),
random_state = random_state
)
self.test = self.test.sampling(
n_test,
*self.test.prevalence(),
random_state = random_state
)
return self return self

View File

@ -1,3 +1,4 @@
import os
import unittest import unittest
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
@ -77,6 +78,9 @@ class TestDatasets(unittest.TestCase):
self._check_dataset(dataset) self._check_dataset(dataset)
def test_lequa2022(self): def test_lequa2022(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_lequa2022 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
return
for dataset_name in LEQUA2022_VECTOR_TASKS: for dataset_name in LEQUA2022_VECTOR_TASKS:
print(f'loading dataset {dataset_name}...', end='') print(f'loading dataset {dataset_name}...', end='')
@ -104,6 +108,10 @@ class TestDatasets(unittest.TestCase):
def test_IFCB(self): def test_IFCB(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
return
print(f'loading dataset IFCB.') print(f'loading dataset IFCB.')
for mod_sel in [False, True]: for mod_sel in [False, True]:
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel) train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)

View File

@ -4,7 +4,6 @@ import numpy as np
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
import quapy as qp import quapy as qp
import util
from quapy.method.aggregative import PACC from quapy.method.aggregative import PACC
from quapy.model_selection import GridSearchQ from quapy.model_selection import GridSearchQ
from quapy.protocol import APP from quapy.protocol import APP
@ -20,7 +19,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(LogisticRegression(random_state=1, max_iter=5000)) q = PACC(LogisticRegression(random_state=1, max_iter=5000))
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce() data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1) training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': [0.000001, 10.]} param_grid = {'classifier__C': [0.000001, 10.]}
@ -42,7 +41,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(LogisticRegression(random_state=1, max_iter=5000)) q = PACC(LogisticRegression(random_state=1, max_iter=5000))
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500) data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1) training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': np.logspace(-3,3,7)} param_grid = {'classifier__C': np.logspace(-3,3,7)}
@ -80,7 +79,7 @@ class ModselTestCase(unittest.TestCase):
q = PACC(SlowLR()) q = PACC(SlowLR())
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce() data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
training, validation = data.training.split_stratified(0.7, random_state=1) training, validation = data.training.split_stratified(0.7, random_state=1)
param_grid = {'classifier__C': np.logspace(-1,1,3)} param_grid = {'classifier__C': np.logspace(-1,1,3)}

View File

@ -125,6 +125,7 @@ setup(
# projects. # projects.
extras_require={ # Optional extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'], 'bayes': ['jax', 'jaxlib', 'numpyro'],
'tests': ['certifi'],
}, },
# If there are data files included in your packages that need to be # If there are data files included in your packages that need to be