tests passed; working on examples

This commit is contained in:
Alejandro Moreo Fernandez 2025-07-13 14:27:14 +02:00
parent c045525075
commit 265fcc2d92
15 changed files with 113 additions and 56 deletions

View File

@ -1,3 +1,7 @@
Adapt examples; remaining: example 4-onwards
Add 'platt' to calib options in EMQ?
Allow n_prevpoints in APP to be specified by a user-defined grid?
Update READMEs, wiki, & examples for new fit-predict interface

View File

@ -23,6 +23,12 @@ print(f'running model selection with N_JOBS={qp.environ["N_JOBS"]}; '
training, test = qp.datasets.fetch_UCIMulticlassDataset('letter').train_test
# evaluation in terms of MAE with default hyperparameters
model.fit(*training.Xy)
mae_score = qp.evaluation.evaluate(model, protocol=UPP(test), error_metric='mae')
print(f'MAE (non optimized)={mae_score:.5f}')
with qp.util.temp_seed(0):
# The model will be returned by the fit method of GridSearchQ.

View File

@ -31,8 +31,7 @@ class MyQuantifier(BaseQuantifier):
self.alpha = alpha
self.classifier = classifier
# in general, we would need to implement the method fit(self, data: LabelledCollection, fit_classifier=True,
# val_split=None); this would amount to:
# in general, we would need to implement the method fit(self, X, y); this would amount to:
def fit(self, X, y):
n_classes = F.num_classes_from_labels(y)
assert n_classes==2, \
@ -61,8 +60,9 @@ class MyQuantifier(BaseQuantifier):
class MyAggregativeSoftQuantifier(AggregativeSoftQuantifier, BinaryQuantifier):
def __init__(self, classifier, alpha=0.5):
# aggregative quantifiers have an internal attribute called self.classifier
self.classifier = classifier
# aggregative quantifiers have an internal attribute called self.classifier, but this is defined
# within the super's init
super().__init__(classifier, fit_classifier=True, val_split=None)
self.alpha = alpha
# since this method is of type aggregative, we can simply implement the method aggregation_fit, which
@ -144,7 +144,7 @@ if __name__ == '__main__':
evaluation took 4.66s [MAE = 0.0630]
"""
# Note that the first implementation is much slower, both in terms of grid-search optimization and in terms of
# evaluation. The reason why is that QuaPy is highly optimized for aggregative quantifiers (by far, the most
# evaluation. The reason why, is that QuaPy is highly optimized for aggregative quantifiers (by far, the most
# popular type of quantification methods), thus significantly speeding up model selection and test routines.
# Furthermore, it is simpler to extend an aggregation type since QuaPy implements boilerplate functions for you.

View File

@ -15,7 +15,7 @@ https://lequa2022.github.io/index (the site of the competition)
https://ceur-ws.org/Vol-3180/paper-146.pdf (the overview paper)
"""
# there are 4 tasks (T1A, T1B, T2A, T2B)
# there are 4 tasks (T1A, T1B, T2A, T2B), let us symply consider T1A (binary quantification, vector form)
task = 'T1A'
# set the sample size in the environment. The sample size is task-dendendent and can be consulted by doing:
@ -28,6 +28,7 @@ qp.environ['N_JOBS'] = -1
# of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
# stored in a directory.
training, val_generator, test_generator = fetch_lequa2022(task=task)
Xtr, ytr = training.Xy
# define the quantifier
quantifier = EMQ(classifier=LogisticRegression())
@ -36,10 +37,10 @@ quantifier = EMQ(classifier=LogisticRegression())
param_grid = {
'classifier__C': np.logspace(-3, 3, 7), # classifier-dependent: inverse of regularization strength
'classifier__class_weight': ['balanced', None], # classifier-dependent: weights of each class
'recalib': ['bcts', 'platt', None] # quantifier-dependent: recalibration method (new in v0.1.7)
'calib': ['bcts', None] # quantifier-dependent: recalibration method (new in v0.1.7)
}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
quantifier = model_selection.fit(training)
quantifier = model_selection.fit(Xtr, ytr)
# evaluation
report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True)

View File

@ -27,6 +27,7 @@ qp.environ['N_JOBS'] = -1
# of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
# stored in a directory.
training, val_generator, test_generator = fetch_lequa2024(task=task)
Xtr, ytr = training.Xy
# define the quantifier
quantifier = KDEyML(classifier=LogisticRegression())
@ -38,7 +39,7 @@ param_grid = {
'bandwidth': np.linspace(0.01, 0.2, 20) # quantifier-dependent: bandwidth of the kernel
}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
quantifier = model_selection.fit(training)
quantifier = model_selection.fit(Xtr, ytr)
# evaluation
report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae'], verbose=True)

View File

@ -58,11 +58,11 @@ param_grid = {
}
print('starting model selection')
model_selection = GridSearchQ(quantifier, param_grid, protocol=UPP(val), verbose=True, refit=False)
quantifier = model_selection.fit(train_modsel).best_model()
quantifier = model_selection.fit(*train_modsel.Xy).best_model()
print('training on the whole training set')
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
quantifier.fit(train)
quantifier.fit(*train.Xy)
# evaluation
mae = qp.evaluation.evaluate(quantifier, protocol=UPP(test), error_metric='mae')

View File

@ -792,7 +792,7 @@ def _array_replace(arr, repl={"yes": 1, "no": 0}):
def fetch_lequa2022(task, data_home=None):
"""
Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
Loads the official datasets provided for the `LeQua 2022 <https://lequa2022.github.io/index>`_ competition.
In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.
Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification
@ -812,7 +812,7 @@ def fetch_lequa2022(task, data_home=None):
~/quay_data/ directory)
:return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of
:class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of
:class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
:class:`quapy.data._lequa.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
that return a series of samples stored in a directory which are labelled by prevalence.
"""
@ -834,7 +834,9 @@ def fetch_lequa2022(task, data_home=None):
tmp_path = join(lequa_dir, task + '_tmp.zip')
download_file_if_not_exists(url, tmp_path)
with zipfile.ZipFile(tmp_path) as file:
print(f'Unzipping {tmp_path}...', end='')
file.extractall(unzipped_path)
print(f'[done]')
os.remove(tmp_path)
if not os.path.exists(join(lequa_dir, task)):
@ -862,6 +864,35 @@ def fetch_lequa2022(task, data_home=None):
def fetch_lequa2024(task, data_home=None, merge_T3=False):
"""
Loads the official datasets provided for the `LeQua 2024 <https://lequa2024.github.io/index>`_ competition.
LeQua 2024 defines four tasks (T1, T2, T3, T4) related to the problem of quantification;
all tasks are affected by some type of dataset shift. Tasks T1 and T2 are akin to tasks T1A and T1B of LeQua 2022,
while T3 and T4 are new tasks introduced in LeQua 2024.
- Task T1 evaluates binary quantifiers under prior probability shift (akin to T1A of LeQua 2022).
- Task T2 evaluates single-label multi-class quantifiers (for n > 2 classes) under prior probability shift (akin to T1B of LeQua 2022).
- Task T3 evaluates ordinal quantifiers, where the classes are totally ordered.
- Task T4 also evaluates binary quantifiers, but under some mix of covariate shift and prior probability shift.
For a broader discussion, we refer to the `online official documentation <https://lequa2024.github.io/tasks/>`_
The datasets are downloaded only once, and stored locally for future reuse.
See `4b.lequa2024_experiments.py` provided in the example folder, which can serve as a guide on how to use these
datasets.
:param task: a string representing the task name; valid ones are T1, T2, T3, and T4
:param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quapy_data/ directory)
:param merge_T3: bool, if False (default), returns a generator of training collections, corresponding to natural
groups of reviews; if True, returns one single :class:`quapy.data.base.LabelledCollection` representing the
entire training set, as a concatenation of all the training collections
:return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of
:class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of
:class:`quapy.data._lequa.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
that return a series of samples stored in a directory which are labelled by prevalence.
"""
from quapy.data._lequa import load_vector_documents_2024, SamplesFromDir, LabelledCollectionsFromDir
@ -904,11 +935,7 @@ def fetch_lequa2024(task, data_home=None, merge_T3=False):
test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt')
test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn)
if task != 'T3':
tr_path = join(lequa_dir, task, 'public', 'training_data.txt')
train = LabelledCollection.load(tr_path, loader_func=load_fn)
return train, val_gen, test_gen
else:
if task == 'T3':
training_samples_path = join(lequa_dir, task, 'public', 'training_samples')
training_true_prev_path = join(lequa_dir, task, 'public', 'training_prevalences.txt')
train_gen = LabelledCollectionsFromDir(training_samples_path, training_true_prev_path, load_fn=load_fn)
@ -917,7 +944,10 @@ def fetch_lequa2024(task, data_home=None, merge_T3=False):
return train, val_gen, test_gen
else:
return train_gen, val_gen, test_gen
else:
tr_path = join(lequa_dir, task, 'public', 'training_data.txt')
train = LabelledCollection.load(tr_path, loader_func=load_fn)
return train, val_gen, test_gen
def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None):

View File

@ -784,6 +784,8 @@ class EMQ(AggregativeSoftQuantifier):
def _fit_calibration(self, calibrator, P, y):
n_classes = len(self.classes_)
print(y, 'Y')
print(y.dtype, 'DTYPE')
if not np.issubdtype(y.dtype, np.number):
y = np.searchsorted(self.classes_, y)

View File

@ -372,7 +372,7 @@ def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfol
total_prev = np.zeros(shape=data.n_classes)
for train, test in data.kFCV(nfolds=nfolds, random_state=random_state):
quantifier.fit(train)
quantifier.fit(*train.Xy)
fold_prev = quantifier.predict(test.X)
rel_size = 1. * len(test) / len(data)
total_prev += fold_prev*rel_size

View File

@ -52,18 +52,12 @@ class TestDatasets(unittest.TestCase):
def test_UCIBinaryDataset(self):
for dataset_name in UCI_BINARY_DATASETS:
try:
print(f'loading dataset {dataset_name}...', end='')
dataset = fetch_UCIBinaryDataset(dataset_name)
dataset.stats()
dataset.reduce()
print(f'[done]')
self._check_dataset(dataset)
except FileNotFoundError as fnfe:
if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
'If this is the first time you attempt to load this dataset') > 0:
print('The pageblocks.5 dataset requires some hand processing to be usable; skipping this test.')
continue
def test_UCIMultiDataset(self):
for dataset_name in UCI_MULTICLASS_DATASETS:
@ -83,7 +77,7 @@ class TestDatasets(unittest.TestCase):
return
for dataset_name in LEQUA2022_VECTOR_TASKS:
print(f'loading dataset {dataset_name}...', end='')
print(f'LeQu2022: loading dataset {dataset_name}...', end='')
train, gen_val, gen_test = fetch_lequa2022(dataset_name)
train.stats()
n_classes = train.n_classes
@ -94,7 +88,7 @@ class TestDatasets(unittest.TestCase):
self._check_samples(gen_test, q, max_samples_test=5)
for dataset_name in LEQUA2022_TEXT_TASKS:
print(f'loading dataset {dataset_name}...', end='')
print(f'LeQu2022: loading dataset {dataset_name}...', end='')
train, gen_val, gen_test = fetch_lequa2022(dataset_name)
train.stats()
n_classes = train.n_classes
@ -106,6 +100,23 @@ class TestDatasets(unittest.TestCase):
self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)
def test_lequa2024(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_lequa2024 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
return
for task in LEQUA2024_TASKS:
print(f'LeQu2024: loading task {task}...', end='')
train, gen_val, gen_test = fetch_lequa2024(task, merge_T3=True)
train.stats()
n_classes = train.n_classes
train = train.sampling(100, *F.uniform_prevalence(n_classes))
q = self.new_quantifier()
q.fit(*train.Xy)
self._check_samples(gen_val, q, max_samples_test=5)
self._check_samples(gen_test, q, max_samples_test=5)
def test_IFCB(self):
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")

View File

@ -29,7 +29,7 @@ class EvalTestCase(unittest.TestCase):
time.sleep(1)
return super().predict_proba(X)
emq = EMQ(SlowLR()).fit(train)
emq = EMQ(SlowLR()).fit(*train.Xy)
tinit = time()
score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True, aggr_speedup='force')
@ -44,11 +44,11 @@ class EvalTestCase(unittest.TestCase):
def predict(self, X):
return self.emq.predict(X)
def fit(self, data):
self.emq.fit(data)
def fit(self, X, y):
self.emq.fit(X, y)
return self
emq = NonAggregativeEMQ(SlowLR()).fit(train)
emq = NonAggregativeEMQ(SlowLR()).fit(*train.Xy)
tinit = time()
score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True)
@ -69,7 +69,7 @@ class EvalTestCase(unittest.TestCase):
protocol = qp.protocol.APP(test, random_state=0)
q = PCC(LogisticRegression()).fit(train)
q = PCC(LogisticRegression()).fit(*train.Xy)
single_errors = list(QUANTIFICATION_ERROR_SINGLE_NAMES)
averaged_errors = ['m'+e for e in single_errors]

View File

@ -10,15 +10,17 @@ from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_ME
from quapy.functional import check_prevalence_vector
# a random selection of composed methods to test the qunfold integration
from quapy.method.composable import check_compatible_qunfold_version
from quapy.method.composable import (
ComposableQuantifier,
LeastSquaresLoss,
HellingerSurrogateLoss,
ClassTransformer,
HistogramTransformer,
CVClassifier,
check_compatible_qunfold_version
CVClassifier
)
COMPOSABLE_METHODS = [
ComposableQuantifier( # ACC
LeastSquaresLoss(),
@ -70,7 +72,6 @@ class TestMethods(unittest.TestCase):
self.assertTrue(check_prevalence_vector(estim_prevalences))
def test_ensembles(self):
qp.environ['SAMPLE_SIZE'] = 10
base_quantifier = ACC(LogisticRegression())

View File

@ -26,7 +26,7 @@ class ModselTestCase(unittest.TestCase):
app = APP(validation, sample_size=100, random_state=1)
q = GridSearchQ(
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, verbose=True, n_jobs=-1
).fit(training)
).fit(*training.Xy)
print('best params', q.best_params_)
print('best score', q.best_score_)
@ -51,7 +51,7 @@ class ModselTestCase(unittest.TestCase):
tinit = time.time()
modsel = GridSearchQ(
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True
).fit(training)
).fit(*training.Xy)
tend_seq = time.time()-tinit
best_c_seq = modsel.best_params_['classifier__C']
print(f'[done] took {tend_seq:.2f}s best C = {best_c_seq}')
@ -60,7 +60,7 @@ class ModselTestCase(unittest.TestCase):
tinit = time.time()
modsel = GridSearchQ(
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True
).fit(training)
).fit(*training.Xy)
tend_par = time.time() - tinit
best_c_par = modsel.best_params_['classifier__C']
print(f'[done] took {tend_par:.2f}s best C = {best_c_par}')
@ -90,7 +90,7 @@ class ModselTestCase(unittest.TestCase):
q, param_grid, protocol=app, timeout=3, n_jobs=-1, verbose=True, raise_errors=True
)
with self.assertRaises(TimeoutError):
modsel.fit(training)
modsel.fit(*training.Xy)
print('Expecting ValueError to be raised')
modsel = GridSearchQ(
@ -99,7 +99,7 @@ class ModselTestCase(unittest.TestCase):
with self.assertRaises(ValueError):
# this exception is not raised because of the timeout, but because no combination of hyperparams
# succedded (in this case, a ValueError is raised, regardless of "raise_errors"
modsel.fit(training)
modsel.fit(*training.Xy)
if __name__ == '__main__':

View File

@ -71,7 +71,7 @@ class TestProtocols(unittest.TestCase):
# surprisingly enough, for some n_prevalences the test fails, notwithstanding
# everything is correct. The problem is that in function APP.prevalence_grid()
# there is sometimes one rounding error that gets cumulated and
# surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)
# surpasses 1.0 (by a very small float value, 0.0000000000002 or the like)
# so these tuples are mistakenly removed... I have tried with np.close, and
# other workarounds, but eventually happens that there is some negative probability
# in the sampling function...

View File

@ -13,17 +13,18 @@ class TestReplicability(unittest.TestCase):
def test_prediction_replicability(self):
dataset = qp.datasets.fetch_UCIBinaryDataset('yeast')
train, test = dataset.train_test
with qp.util.temp_seed(0):
lr = LogisticRegression(random_state=0, max_iter=10000)
pacc = PACC(lr)
prev = pacc.fit(dataset.training).predict(dataset.test.X)
prev = pacc.fit(*train.Xy).predict(test.X)
str_prev1 = strprev(prev, prec=5)
with qp.util.temp_seed(0):
lr = LogisticRegression(random_state=0, max_iter=10000)
pacc = PACC(lr)
prev2 = pacc.fit(dataset.training).predict(dataset.test.X)
prev2 = pacc.fit(*train.Xy).predict(test.X)
str_prev2 = strprev(prev2, prec=5)
self.assertEqual(str_prev1, str_prev2)
@ -83,18 +84,18 @@ class TestReplicability(unittest.TestCase):
test = test.sampling(500, *[0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.0])
with qp.util.temp_seed(10):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
pacc.fit(*train.Xy)
prev1 = F.strprev(pacc.predict(test.instances))
with qp.util.temp_seed(0):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
pacc.fit(*train.Xy)
prev2 = F.strprev(pacc.predict(test.instances))
with qp.util.temp_seed(0):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
pacc.fit(*train.Xy)
prev3 = F.strprev(pacc.predict(test.instances))
print(prev1)