Merge pull request #31 from mirkobunse/devel
Continuous Integration with GitHub Actions
This commit is contained in:
commit
bee1c4e678
|
@ -0,0 +1,33 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- devel
|
||||
|
||||
jobs:
|
||||
|
||||
# take out unit tests
|
||||
test:
|
||||
name: Unit tests (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.11"
|
||||
env:
|
||||
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
python -m pip install -e .[bayes,tests]
|
||||
- name: Test with unittest
|
||||
run: python -m unittest
|
|
@ -549,7 +549,7 @@ class Dataset:
|
|||
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
|
||||
|
||||
|
||||
def reduce(self, n_train=100, n_test=100):
|
||||
def reduce(self, n_train=100, n_test=100, random_state=None):
|
||||
"""
|
||||
Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
|
||||
|
||||
|
@ -557,6 +557,14 @@ class Dataset:
|
|||
:param n_test: number of test documents to keep (default 100)
|
||||
:return: self
|
||||
"""
|
||||
self.training = self.training.sampling(n_train, *self.training.prevalence())
|
||||
self.test = self.test.sampling(n_test, *self.test.prevalence())
|
||||
self.training = self.training.sampling(
|
||||
n_train,
|
||||
*self.training.prevalence(),
|
||||
random_state = random_state
|
||||
)
|
||||
self.test = self.test.sampling(
|
||||
n_test,
|
||||
*self.test.prevalence(),
|
||||
random_state = random_state
|
||||
)
|
||||
return self
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
import unittest
|
||||
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
@ -77,6 +78,9 @@ class TestDatasets(unittest.TestCase):
|
|||
self._check_dataset(dataset)
|
||||
|
||||
def test_lequa2022(self):
|
||||
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
||||
print("omitting test_lequa2022 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
||||
return
|
||||
|
||||
for dataset_name in LEQUA2022_VECTOR_TASKS:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
|
@ -104,6 +108,10 @@ class TestDatasets(unittest.TestCase):
|
|||
|
||||
|
||||
def test_IFCB(self):
|
||||
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
||||
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
||||
return
|
||||
|
||||
print(f'loading dataset IFCB.')
|
||||
for mod_sel in [False, True]:
|
||||
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)
|
||||
|
|
|
@ -4,7 +4,6 @@ import numpy as np
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
import util
|
||||
from quapy.method.aggregative import PACC
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import APP
|
||||
|
@ -20,7 +19,7 @@ class ModselTestCase(unittest.TestCase):
|
|||
|
||||
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
||||
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
|
||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||
|
||||
param_grid = {'classifier__C': [0.000001, 10.]}
|
||||
|
@ -42,7 +41,7 @@ class ModselTestCase(unittest.TestCase):
|
|||
|
||||
q = PACC(LogisticRegression(random_state=1, max_iter=5000))
|
||||
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500)
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
|
||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||
|
||||
param_grid = {'classifier__C': np.logspace(-3,3,7)}
|
||||
|
@ -80,7 +79,7 @@ class ModselTestCase(unittest.TestCase):
|
|||
|
||||
q = PACC(SlowLR())
|
||||
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
|
||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
|
||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||
|
||||
param_grid = {'classifier__C': np.logspace(-1,1,3)}
|
||||
|
|
Loading…
Reference in New Issue