1
0
Fork 0

Tests for non aggregative and meta methods.

This commit is contained in:
Andrea Esuli 2021-05-04 12:14:14 +02:00
parent 8f284e540a
commit 70a3d4bd0f
3 changed files with 109 additions and 6 deletions

View File

@ -11,8 +11,8 @@ from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm from tqdm import tqdm
import quapy as qp import quapy as qp
from data import LabelledCollection from quapy.data import LabelledCollection
from util import EarlyStop from quapy.util import EarlyStop
class NeuralClassifierTrainer: class NeuralClassifierTrainer:

View File

@ -3,21 +3,31 @@ from . import base
from . import meta from . import meta
from . import non_aggregative from . import non_aggregative
EXPLICIT_LOSS_MINIMIZATION_METHODS = {
aggregative.ELM,
aggregative.SVMQ,
aggregative.SVMAE,
aggregative.SVMKLD,
aggregative.SVMRAE,
aggregative.SVMNKLD
}
AGGREGATIVE_METHODS = { AGGREGATIVE_METHODS = {
aggregative.CC, aggregative.CC,
aggregative.ACC, aggregative.ACC,
aggregative.PCC, aggregative.PCC,
aggregative.PACC, aggregative.PACC,
aggregative.ELM,
aggregative.EMQ, aggregative.EMQ,
aggregative.HDy aggregative.HDy
} } | EXPLICIT_LOSS_MINIMIZATION_METHODS
NON_AGGREGATIVE_METHODS = { NON_AGGREGATIVE_METHODS = {
non_aggregative.MaximumLikelihoodPrevalenceEstimation non_aggregative.MaximumLikelihoodPrevalenceEstimation
} }
META_METHODS = { META_METHODS = {
meta.Ensemble,
meta.QuaNet meta.QuaNet
} }

View File

@ -5,7 +5,8 @@ from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC from sklearn.svm import LinearSVC
import quapy as qp import quapy as qp
from quapy.method import AGGREGATIVE_METHODS from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS
from quapy.method.meta import Ensemble
datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'), datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')] pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
@ -14,12 +15,104 @@ learners = [LogisticRegression, MultinomialNB, LinearSVC]
@pytest.mark.parametrize('dataset', datasets) @pytest.mark.parametrize('dataset', datasets)
@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS) @pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
@pytest.mark.parametrize('learner', learners) @pytest.mark.parametrize('learner', learners)
def test_aggregative_methods(dataset, aggregative_method, learner): def test_aggregative_methods(dataset, aggregative_method, learner):
model = aggregative_method(learner()) model = aggregative_method(learner())
if model.binary and not dataset.binary: if model.binary and not dataset.binary:
print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
return
model.fit(dataset.training)
estim_prevalences = model.quantify(dataset.test.instances)
true_prevalences = dataset.test.prevalence()
error = qp.error.mae(true_prevalences, estim_prevalences)
assert type(error) == numpy.float64
@pytest.mark.parametrize('dataset', datasets)
@pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS)
def test_elm_methods(dataset, elm_method):
try:
model = elm_method()
except AssertionError as ae:
if ae.args[0].find('does not seem to point to a valid path') > 0:
print('Missing SVMperf binary program, skipping test')
return
if model.binary and not dataset.binary:
print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
return
model.fit(dataset.training)
estim_prevalences = model.quantify(dataset.test.instances)
true_prevalences = dataset.test.prevalence()
error = qp.error.mae(true_prevalences, estim_prevalences)
assert type(error) == numpy.float64
@pytest.mark.parametrize('dataset', datasets)
@pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS)
def test_non_aggregative_methods(dataset, non_aggregative_method):
model = non_aggregative_method()
if model.binary and not dataset.binary:
print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
return
model.fit(dataset.training)
estim_prevalences = model.quantify(dataset.test.instances)
true_prevalences = dataset.test.prevalence()
error = qp.error.mae(true_prevalences, estim_prevalences)
assert type(error) == numpy.float64
@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
@pytest.mark.parametrize('learner', learners)
@pytest.mark.parametrize('dataset', datasets)
@pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
def test_ensemble_method(base_method, learner, dataset, policy):
qp.environ['SAMPLE_SIZE'] = len(dataset.training)
model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1)
if model.binary and not dataset.binary:
print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
return
model.fit(dataset.training)
estim_prevalences = model.quantify(dataset.test.instances)
true_prevalences = dataset.test.prevalence()
error = qp.error.mae(true_prevalences, estim_prevalences)
assert type(error) == numpy.float64
def test_quanet_method():
dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
from quapy.classification.neural import CNNnet
cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
from quapy.classification.neural import NeuralClassifierTrainer
learner = NeuralClassifierTrainer(cnn, device='cuda')
from quapy.method.meta import QuaNet
model = QuaNet(learner, sample_size=len(dataset.training), device='cuda')
if model.binary and not dataset.binary:
print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
return return
model.fit(dataset.training) model.fit(dataset.training)