QuaPy/quapy/tests/test_evaluation.py

85 lines
2.9 KiB
Python
Raw Normal View History

2022-05-25 19:14:33 +02:00
import unittest
2023-02-14 11:14:38 +01:00
import numpy as np
2022-05-25 19:14:33 +02:00
import quapy as qp
from sklearn.linear_model import LogisticRegression
from time import time
2023-02-14 11:14:38 +01:00
2023-11-08 09:45:28 +01:00
from quapy.error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \
2023-02-14 11:14:38 +01:00
QUANTIFICATION_ERROR_SINGLE_NAMES
from quapy.method.aggregative import EMQ, PCC
2022-06-01 18:28:59 +02:00
from quapy.method.base import BaseQuantifier
2022-05-25 19:14:33 +02:00
class EvalTestCase(unittest.TestCase):
def test_eval_speedup(self):
data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True)
train, test = data.training, data.test
protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=11, repeats=1, random_state=1)
2022-05-25 19:14:33 +02:00
class SlowLR(LogisticRegression):
def predict_proba(self, X):
import time
time.sleep(1)
return super().predict_proba(X)
emq = EMQ(SlowLR()).fit(train)
tinit = time()
2022-06-01 18:28:59 +02:00
score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True, aggr_speedup='force')
2022-05-25 19:14:33 +02:00
tend_optim = time()-tinit
print(f'evaluation (with optimization) took {tend_optim}s [MAE={score:.4f}]')
class NonAggregativeEMQ(BaseQuantifier):
def __init__(self, cls):
self.emq = EMQ(cls)
def quantify(self, instances):
return self.emq.quantify(instances)
def fit(self, data):
self.emq.fit(data)
return self
emq = NonAggregativeEMQ(SlowLR()).fit(train)
tinit = time()
score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True)
tend_no_optim = time() - tinit
print(f'evaluation (w/o optimization) took {tend_no_optim}s [MAE={score:.4f}]')
2022-06-01 18:28:59 +02:00
self.assertEqual(tend_no_optim>(tend_optim/2), True)
2022-05-25 19:14:33 +02:00
2023-02-14 11:14:38 +01:00
def test_evaluation_output(self):
data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True)
train, test = data.training, data.test
qp.environ['SAMPLE_SIZE']=100
protocol = qp.protocol.APP(test, random_state=0)
q = PCC(LogisticRegression()).fit(train)
single_errors = list(QUANTIFICATION_ERROR_SINGLE_NAMES)
averaged_errors = ['m'+e for e in single_errors]
single_errors = single_errors + [qp.error.from_name(e) for e in single_errors]
averaged_errors = averaged_errors + [qp.error.from_name(e) for e in averaged_errors]
for error_metric, averaged_error_metric in zip(single_errors, averaged_errors):
score = qp.evaluation.evaluate(q, protocol, error_metric=averaged_error_metric)
self.assertTrue(isinstance(score, float))
scores = qp.evaluation.evaluate(q, protocol, error_metric=error_metric)
self.assertTrue(isinstance(scores, np.ndarray))
self.assertEqual(scores.mean(), score)
2022-05-25 19:14:33 +02:00
if __name__ == '__main__':
unittest.main()