Merge pull request #31 from mirkobunse/devel
Continuous Integration with GitHub Actions
This commit is contained in:
commit
bee1c4e678
|
@ -0,0 +1,33 @@
|
||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- devel
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
# take out unit tests
|
||||||
|
test:
|
||||||
|
name: Unit tests (Python ${{ matrix.python-version }})
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version:
|
||||||
|
- "3.11"
|
||||||
|
env:
|
||||||
|
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip setuptools wheel
|
||||||
|
python -m pip install -e .[bayes,tests]
|
||||||
|
- name: Test with unittest
|
||||||
|
run: python -m unittest
|
|
@ -549,7 +549,7 @@ class Dataset:
|
||||||
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
|
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
|
||||||
|
|
||||||
|
|
||||||
def reduce(self, n_train=100, n_test=100):
|
def reduce(self, n_train=100, n_test=100, random_state=None):
|
||||||
"""
|
"""
|
||||||
Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
|
Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
|
||||||
|
|
||||||
|
@ -557,6 +557,14 @@ class Dataset:
|
||||||
:param n_test: number of test documents to keep (default 100)
|
:param n_test: number of test documents to keep (default 100)
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
self.training = self.training.sampling(n_train, *self.training.prevalence())
|
self.training = self.training.sampling(
|
||||||
self.test = self.test.sampling(n_test, *self.test.prevalence())
|
n_train,
|
||||||
|
*self.training.prevalence(),
|
||||||
|
random_state = random_state
|
||||||
|
)
|
||||||
|
self.test = self.test.sampling(
|
||||||
|
n_test,
|
||||||
|
*self.test.prevalence(),
|
||||||
|
random_state = random_state
|
||||||
|
)
|
||||||
return self
|
return self
|
|
@ -1,3 +1,4 @@
|
||||||
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
@ -77,6 +78,9 @@ class TestDatasets(unittest.TestCase):
|
||||||
self._check_dataset(dataset)
|
self._check_dataset(dataset)
|
||||||
|
|
||||||
def test_lequa2022(self):
|
def test_lequa2022(self):
|
||||||
|
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
||||||
|
print("omitting test_lequa2022 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
||||||
|
return
|
||||||
|
|
||||||
for dataset_name in LEQUA2022_VECTOR_TASKS:
|
for dataset_name in LEQUA2022_VECTOR_TASKS:
|
||||||
print(f'loading dataset {dataset_name}...', end='')
|
print(f'loading dataset {dataset_name}...', end='')
|
||||||
|
@ -104,6 +108,10 @@ class TestDatasets(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
def test_IFCB(self):
|
def test_IFCB(self):
|
||||||
|
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
||||||
|
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
||||||
|
return
|
||||||
|
|
||||||
print(f'loading dataset IFCB.')
|
print(f'loading dataset IFCB.')
|
||||||
for mod_sel in [False, True]:
|
for mod_sel in [False, True]:
|
||||||
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)
|
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)
|
||||||
|
|
|
@ -4,7 +4,6 @@ import numpy as np
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import util
|
|
||||||
from quapy.method.aggregative import PACC
|
from quapy.method.aggregative import PACC
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
|
@ -20,7 +19,7 @@ class ModselTestCase(unittest.TestCase):
|
||||||
|
|
||||||
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
||||||
|
|
||||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
|
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
|
||||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||||
|
|
||||||
param_grid = {'classifier__C': [0.000001, 10.]}
|
param_grid = {'classifier__C': [0.000001, 10.]}
|
||||||
|
@ -42,7 +41,7 @@ class ModselTestCase(unittest.TestCase):
|
||||||
|
|
||||||
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
||||||
|
|
||||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500)
|
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
|
||||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||||
|
|
||||||
param_grid = {'classifier__C': np.logspace(-3,3,7)}
|
param_grid = {'classifier__C': np.logspace(-3,3,7)}
|
||||||
|
@ -80,7 +79,7 @@ class ModselTestCase(unittest.TestCase):
|
||||||
|
|
||||||
q = PACC(SlowLR())
|
q = PACC(SlowLR())
|
||||||
|
|
||||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
|
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
|
||||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||||
|
|
||||||
param_grid = {'classifier__C': np.logspace(-1,1,3)}
|
param_grid = {'classifier__C': np.logspace(-1,1,3)}
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -125,6 +125,7 @@ setup(
|
||||||
# projects.
|
# projects.
|
||||||
extras_require={ # Optional
|
extras_require={ # Optional
|
||||||
'bayes': ['jax', 'jaxlib', 'numpyro'],
|
'bayes': ['jax', 'jaxlib', 'numpyro'],
|
||||||
|
'tests': ['certifi'],
|
||||||
},
|
},
|
||||||
|
|
||||||
# If there are data files included in your packages that need to be
|
# If there are data files included in your packages that need to be
|
||||||
|
|
Loading…
Reference in New Issue