From 70a3d4bd0f2c9c18b226cf6e9fa6d48be7890086 Mon Sep 17 00:00:00 2001 From: Andrea Esuli Date: Tue, 4 May 2021 12:14:14 +0200 Subject: [PATCH] Tests for non aggregative and meta methods. --- quapy/classification/neural.py | 4 +- quapy/method/__init__.py | 14 ++++- quapy/tests/test_methods.py | 97 +++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/quapy/classification/neural.py b/quapy/classification/neural.py index 68a924e..afeb649 100644 --- a/quapy/classification/neural.py +++ b/quapy/classification/neural.py @@ -11,8 +11,8 @@ from torch.nn.utils.rnn import pad_sequence from tqdm import tqdm import quapy as qp -from data import LabelledCollection -from util import EarlyStop +from quapy.data import LabelledCollection +from quapy.util import EarlyStop class NeuralClassifierTrainer: diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py index 6ef83f1..b69d38f 100644 --- a/quapy/method/__init__.py +++ b/quapy/method/__init__.py @@ -3,21 +3,31 @@ from . import base from . import meta from . import non_aggregative +EXPLICIT_LOSS_MINIMIZATION_METHODS = { + aggregative.ELM, + aggregative.SVMQ, + aggregative.SVMAE, + aggregative.SVMKLD, + aggregative.SVMRAE, + aggregative.SVMNKLD +} + AGGREGATIVE_METHODS = { aggregative.CC, aggregative.ACC, aggregative.PCC, aggregative.PACC, - aggregative.ELM, aggregative.EMQ, aggregative.HDy -} +} | EXPLICIT_LOSS_MINIMIZATION_METHODS + NON_AGGREGATIVE_METHODS = { non_aggregative.MaximumLikelihoodPrevalenceEstimation } META_METHODS = { + meta.Ensemble, meta.QuaNet } diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index b59c900..d32916d 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -5,7 +5,8 @@ from sklearn.naive_bayes import MultinomialNB from sklearn.svm import LinearSVC import quapy as qp -from quapy.method import AGGREGATIVE_METHODS +from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS +from quapy.method.meta import Ensemble datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'), pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')] @@ -14,12 +15,104 @@ learners = [LogisticRegression, MultinomialNB, LinearSVC] @pytest.mark.parametrize('dataset', datasets) -@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS) +@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS)) @pytest.mark.parametrize('learner', learners) def test_aggregative_methods(dataset, aggregative_method, learner): model = aggregative_method(learner()) if model.binary and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == numpy.float64 + + +@pytest.mark.parametrize('dataset', datasets) +@pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS) +def test_elm_methods(dataset, elm_method): + try: + model = elm_method() + except AssertionError as ae: + if ae.args[0].find('does not seem to point to a valid path') > 0: + print('Missing SVMperf binary program, skipping test') + return + + if model.binary and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == numpy.float64 + + +@pytest.mark.parametrize('dataset', datasets) +@pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS) +def test_non_aggregative_methods(dataset, non_aggregative_method): + model = non_aggregative_method() + + if model.binary and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == numpy.float64 + + +@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS)) +@pytest.mark.parametrize('learner', learners) +@pytest.mark.parametrize('dataset', datasets) +@pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES) +def test_ensemble_method(base_method, learner, dataset, policy): + qp.environ['SAMPLE_SIZE'] = len(dataset.training) + model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1) + if model.binary and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == numpy.float64 + + +def test_quanet_method(): + dataset = qp.datasets.fetch_reviews('kindle', pickle=True) + qp.data.preprocessing.index(dataset, min_df=5, inplace=True) + + from quapy.classification.neural import CNNnet + cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) + + from quapy.classification.neural import NeuralClassifierTrainer + learner = NeuralClassifierTrainer(cnn, device='cuda') + + from quapy.method.meta import QuaNet + model = QuaNet(learner, sample_size=len(dataset.training), device='cuda') + + if model.binary and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return model.fit(dataset.training)