tests passed; working on examples

2025-07-13 14:27:14 +02:00 · 2025-07-13 14:27:14 +02:00 · 265fcc2d92
parent c045525075
commit 265fcc2d92
15 changed files with 113 additions and 56 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -1,3 +1,7 @@
+Adapt examples; remaining: example 4-onwards
+
+Add 'platt' to calib options in EMQ?
+
 Allow n_prevpoints in APP to be specified by a user-defined grid?

 Update READMEs, wiki, & examples for new fit-predict interface
--- a/examples/1.model_selection.py
+++ b/examples/1.model_selection.py
@ -23,6 +23,12 @@ print(f'running model selection with N_JOBS={qp.environ["N_JOBS"]}; '

 training, test = qp.datasets.fetch_UCIMulticlassDataset('letter').train_test

+# evaluation in terms of MAE with default hyperparameters
+model.fit(*training.Xy)
+mae_score = qp.evaluation.evaluate(model, protocol=UPP(test), error_metric='mae')
+print(f'MAE (non optimized)={mae_score:.5f}')
+
+
 with qp.util.temp_seed(0):

    # The model will be returned by the fit method of GridSearchQ.
--- a/examples/2.custom_quantifier.py
+++ b/examples/2.custom_quantifier.py
@ -31,8 +31,7 @@ class MyQuantifier(BaseQuantifier):
        self.alpha = alpha
        self.classifier = classifier

-    # in general, we would need to implement the method fit(self, data: LabelledCollection, fit_classifier=True,
-    # val_split=None); this would amount to:
+    # in general, we would need to implement the method fit(self, X, y); this would amount to:
    def fit(self, X, y):
        n_classes = F.num_classes_from_labels(y)
        assert n_classes==2, \
@ -61,8 +60,9 @@ class MyQuantifier(BaseQuantifier):
 class MyAggregativeSoftQuantifier(AggregativeSoftQuantifier, BinaryQuantifier):

    def __init__(self, classifier, alpha=0.5):
-        # aggregative quantifiers have an internal attribute called self.classifier
-        self.classifier = classifier
+        # aggregative quantifiers have an internal attribute called self.classifier, but this is defined
+        # within the super's init
+        super().__init__(classifier, fit_classifier=True, val_split=None)
        self.alpha = alpha

    # since this method is of type aggregative, we can simply implement the method aggregation_fit, which
@ -144,7 +144,7 @@ if __name__ == '__main__':
        evaluation took 4.66s [MAE = 0.0630]
    """
    # Note that the first implementation is much slower, both in terms of grid-search optimization and in terms of
-    # evaluation. The reason why is that QuaPy is highly optimized for aggregative quantifiers (by far, the most
+    # evaluation. The reason why, is that QuaPy is highly optimized for aggregative quantifiers (by far, the most
    # popular type of quantification methods), thus significantly speeding up model selection and test routines.
    # Furthermore, it is simpler to extend an aggregation type since QuaPy implements boilerplate functions for you.

--- a/examples/4.lequa2022_experiments.py
+++ b/examples/4.lequa2022_experiments.py
@ -15,7 +15,7 @@ https://lequa2022.github.io/index (the site of the competition)
 https://ceur-ws.org/Vol-3180/paper-146.pdf (the overview paper)
 """

-# there are 4 tasks (T1A, T1B, T2A, T2B)
+# there are 4 tasks (T1A, T1B, T2A, T2B), let us symply consider T1A (binary quantification, vector form)
 task = 'T1A'

 # set the sample size in the environment. The sample size is task-dendendent and can be consulted by doing:
@ -28,6 +28,7 @@ qp.environ['N_JOBS'] = -1
 # of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
 # stored in a directory.
 training, val_generator, test_generator = fetch_lequa2022(task=task)
+Xtr, ytr = training.Xy

 # define the quantifier
 quantifier = EMQ(classifier=LogisticRegression())
@ -36,10 +37,10 @@ quantifier = EMQ(classifier=LogisticRegression())
 param_grid = {
    'classifier__C': np.logspace(-3, 3, 7),          # classifier-dependent: inverse of regularization strength
    'classifier__class_weight': ['balanced', None],  # classifier-dependent: weights of each class
-    'recalib': ['bcts', 'platt', None]               # quantifier-dependent: recalibration method (new in v0.1.7)
+    'calib': ['bcts', None]                 # quantifier-dependent: recalibration method (new in v0.1.7)
 }
 model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
-quantifier = model_selection.fit(training)
+quantifier = model_selection.fit(Xtr, ytr)

 # evaluation
 report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True)
--- a/examples/4b.lequa2024_experiments.py
+++ b/examples/4b.lequa2024_experiments.py
@ -27,6 +27,7 @@ qp.environ['N_JOBS'] = -1
 # of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
 # stored in a directory.
 training, val_generator, test_generator = fetch_lequa2024(task=task)
+Xtr, ytr = training.Xy

 # define the quantifier
 quantifier = KDEyML(classifier=LogisticRegression())
@ -38,7 +39,7 @@ param_grid = {
    'bandwidth': np.linspace(0.01, 0.2, 20)  # quantifier-dependent: bandwidth of the kernel
 }
 model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
-quantifier = model_selection.fit(training)
+quantifier = model_selection.fit(Xtr, ytr)

 # evaluation
 report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae'], verbose=True)
--- a/examples/5.explicit_loss_minimization.py
+++ b/examples/5.explicit_loss_minimization.py
@ -58,11 +58,11 @@ param_grid = {
 }
 print('starting model selection')
 model_selection = GridSearchQ(quantifier, param_grid, protocol=UPP(val), verbose=True, refit=False)
-quantifier = model_selection.fit(train_modsel).best_model()
+quantifier = model_selection.fit(*train_modsel.Xy).best_model()

 print('training on the whole training set')
 train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
-quantifier.fit(train)
+quantifier.fit(*train.Xy)

 # evaluation
 mae = qp.evaluation.evaluate(quantifier, protocol=UPP(test), error_metric='mae')
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@ -792,7 +792,7 @@ def _array_replace(arr, repl={"yes": 1, "no": 0}):

 def fetch_lequa2022(task, data_home=None):
    """
-    Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
+    Loads the official datasets provided for the `LeQua 2022 <https://lequa2022.github.io/index>`_ competition.
    In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
    problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.
    Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification
@ -812,7 +812,7 @@ def fetch_lequa2022(task, data_home=None):
        ~/quay_data/ directory)
    :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of
        :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of
-        :class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
+        :class:`quapy.data._lequa.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
        that return a series of samples stored in a directory which are labelled by prevalence.
    """

@ -834,7 +834,9 @@ def fetch_lequa2022(task, data_home=None):
        tmp_path = join(lequa_dir, task + '_tmp.zip')
        download_file_if_not_exists(url, tmp_path)
        with zipfile.ZipFile(tmp_path) as file:
+            print(f'Unzipping {tmp_path}...', end='')
            file.extractall(unzipped_path)
+            print(f'[done]')
        os.remove(tmp_path)

    if not os.path.exists(join(lequa_dir, task)):
@ -862,6 +864,35 @@ def fetch_lequa2022(task, data_home=None):


 def fetch_lequa2024(task, data_home=None, merge_T3=False):
+    """
+    Loads the official datasets provided for the `LeQua 2024 <https://lequa2024.github.io/index>`_ competition.
+    LeQua 2024 defines four tasks (T1, T2, T3, T4) related to the problem of quantification;
+    all tasks are affected by some type of dataset shift. Tasks T1 and T2 are akin to tasks T1A and T1B of LeQua 2022,
+    while T3 and T4 are new tasks introduced in LeQua 2024.
+
+    - Task T1 evaluates binary quantifiers under prior probability shift (akin to T1A of LeQua 2022).
+    - Task T2 evaluates single-label multi-class quantifiers (for n > 2 classes) under prior probability shift (akin to T1B of LeQua 2022).
+    - Task T3 evaluates ordinal quantifiers, where the classes are totally ordered.
+    - Task T4 also evaluates binary quantifiers, but under some mix of covariate shift and prior probability shift.
+
+    For a broader discussion, we refer to the `online official documentation <https://lequa2024.github.io/tasks/>`_
+
+    The datasets are downloaded only once, and stored locally for future reuse.
+
+    See `4b.lequa2024_experiments.py` provided in the example folder, which can serve as a guide on how to use these
+    datasets.
+
+    :param task: a string representing the task name; valid ones are T1, T2, T3, and T4
+    :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
+        ~/quapy_data/ directory)
+    :param merge_T3: bool, if False (default), returns a generator of training collections, corresponding to natural
+        groups of reviews; if True, returns one single :class:`quapy.data.base.LabelledCollection` representing the
+        entire training set, as a concatenation of all the training collections
+    :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of
+        :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of
+        :class:`quapy.data._lequa.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,
+        that return a series of samples stored in a directory which are labelled by prevalence.
+    """

    from quapy.data._lequa import load_vector_documents_2024, SamplesFromDir, LabelledCollectionsFromDir

@ -904,11 +935,7 @@ def fetch_lequa2024(task, data_home=None, merge_T3=False):
    test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt')
    test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn)

-    if task != 'T3':
-        tr_path = join(lequa_dir, task, 'public', 'training_data.txt')
-        train = LabelledCollection.load(tr_path, loader_func=load_fn)
-        return train, val_gen, test_gen
-    else:
+    if task == 'T3':
        training_samples_path = join(lequa_dir, task, 'public', 'training_samples')
        training_true_prev_path = join(lequa_dir, task, 'public', 'training_prevalences.txt')
        train_gen = LabelledCollectionsFromDir(training_samples_path, training_true_prev_path, load_fn=load_fn)
@ -917,7 +944,10 @@ def fetch_lequa2024(task, data_home=None, merge_T3=False):
            return train, val_gen, test_gen
        else:
            return train_gen, val_gen, test_gen
-
+    else:
+        tr_path = join(lequa_dir, task, 'public', 'training_data.txt')
+        train = LabelledCollection.load(tr_path, loader_func=load_fn)
+        return train, val_gen, test_gen


 def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None):
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -784,6 +784,8 @@ class EMQ(AggregativeSoftQuantifier):
    def _fit_calibration(self, calibrator, P, y):
        n_classes = len(self.classes_)

+        print(y, 'Y')
+        print(y.dtype, 'DTYPE')
        if not np.issubdtype(y.dtype, np.number):
            y = np.searchsorted(self.classes_, y)

--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -372,7 +372,7 @@ def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfol
    total_prev = np.zeros(shape=data.n_classes)

    for train, test in data.kFCV(nfolds=nfolds, random_state=random_state):
-        quantifier.fit(train)
+        quantifier.fit(*train.Xy)
        fold_prev = quantifier.predict(test.X)
        rel_size = 1. * len(test) / len(data)
        total_prev += fold_prev*rel_size
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@ -52,18 +52,12 @@ class TestDatasets(unittest.TestCase):

    def test_UCIBinaryDataset(self):
        for dataset_name in UCI_BINARY_DATASETS:
-            try:
            print(f'loading dataset {dataset_name}...', end='')
            dataset = fetch_UCIBinaryDataset(dataset_name)
            dataset.stats()
            dataset.reduce()
            print(f'[done]')
            self._check_dataset(dataset)
-            except FileNotFoundError as fnfe:
-                if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
-                        'If this is the first time you attempt to load this dataset') > 0:
-                    print('The pageblocks.5 dataset requires some hand processing to be usable; skipping this test.')
-                    continue

    def test_UCIMultiDataset(self):
        for dataset_name in UCI_MULTICLASS_DATASETS:
@ -83,7 +77,7 @@ class TestDatasets(unittest.TestCase):
            return

        for dataset_name in LEQUA2022_VECTOR_TASKS:
-            print(f'loading dataset {dataset_name}...', end='')
+            print(f'LeQu2022: loading dataset {dataset_name}...', end='')
            train, gen_val, gen_test = fetch_lequa2022(dataset_name)
            train.stats()
            n_classes = train.n_classes
@ -94,7 +88,7 @@ class TestDatasets(unittest.TestCase):
            self._check_samples(gen_test, q, max_samples_test=5)

        for dataset_name in LEQUA2022_TEXT_TASKS:
-            print(f'loading dataset {dataset_name}...', end='')
+            print(f'LeQu2022: loading dataset {dataset_name}...', end='')
            train, gen_val, gen_test = fetch_lequa2022(dataset_name)
            train.stats()
            n_classes = train.n_classes
@ -106,6 +100,23 @@ class TestDatasets(unittest.TestCase):
            self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
            self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)

+    def test_lequa2024(self):
+        if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
+            print("omitting test_lequa2024 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
+            return
+
+        for task in LEQUA2024_TASKS:
+            print(f'LeQu2024: loading task {task}...', end='')
+            train, gen_val, gen_test = fetch_lequa2024(task, merge_T3=True)
+            train.stats()
+            n_classes = train.n_classes
+            train = train.sampling(100, *F.uniform_prevalence(n_classes))
+            q = self.new_quantifier()
+            q.fit(*train.Xy)
+            self._check_samples(gen_val, q, max_samples_test=5)
+            self._check_samples(gen_test, q, max_samples_test=5)
+
+
    def test_IFCB(self):
        if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
            print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
--- a/quapy/tests/test_evaluation.py
+++ b/quapy/tests/test_evaluation.py
@ -29,7 +29,7 @@ class EvalTestCase(unittest.TestCase):
                time.sleep(1)
                return super().predict_proba(X)

-        emq = EMQ(SlowLR()).fit(train)
+        emq = EMQ(SlowLR()).fit(*train.Xy)

        tinit = time()
        score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True, aggr_speedup='force')
@ -44,11 +44,11 @@ class EvalTestCase(unittest.TestCase):
            def predict(self, X):
                return self.emq.predict(X)

-            def fit(self, data):
-                self.emq.fit(data)
+            def fit(self, X, y):
+                self.emq.fit(X, y)
                return self

-        emq = NonAggregativeEMQ(SlowLR()).fit(train)
+        emq = NonAggregativeEMQ(SlowLR()).fit(*train.Xy)

        tinit = time()
        score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True)
@ -69,7 +69,7 @@ class EvalTestCase(unittest.TestCase):

        protocol = qp.protocol.APP(test, random_state=0)

-        q = PCC(LogisticRegression()).fit(train)
+        q = PCC(LogisticRegression()).fit(*train.Xy)

        single_errors = list(QUANTIFICATION_ERROR_SINGLE_NAMES)
        averaged_errors = ['m'+e for e in single_errors]
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@ -10,15 +10,17 @@ from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_ME
 from quapy.functional import check_prevalence_vector

 # a random selection of composed methods to test the qunfold integration
+from quapy.method.composable import check_compatible_qunfold_version
+
 from quapy.method.composable import (
    ComposableQuantifier,
    LeastSquaresLoss,
    HellingerSurrogateLoss,
    ClassTransformer,
    HistogramTransformer,
-    CVClassifier,
-    check_compatible_qunfold_version
+    CVClassifier
 )
+
 COMPOSABLE_METHODS = [
    ComposableQuantifier( # ACC
        LeastSquaresLoss(),
@ -70,7 +72,6 @@ class TestMethods(unittest.TestCase):
                self.assertTrue(check_prevalence_vector(estim_prevalences))

    def test_ensembles(self):
-
        qp.environ['SAMPLE_SIZE'] = 10

        base_quantifier = ACC(LogisticRegression())
--- a/quapy/tests/test_modsel.py
+++ b/quapy/tests/test_modsel.py
@ -26,7 +26,7 @@ class ModselTestCase(unittest.TestCase):
        app = APP(validation, sample_size=100, random_state=1)
        q = GridSearchQ(
            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, verbose=True, n_jobs=-1
-        ).fit(training)
+        ).fit(*training.Xy)
        print('best params', q.best_params_)
        print('best score', q.best_score_)

@ -51,7 +51,7 @@ class ModselTestCase(unittest.TestCase):
        tinit = time.time()
        modsel = GridSearchQ(
            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True
-        ).fit(training)
+        ).fit(*training.Xy)
        tend_seq = time.time()-tinit
        best_c_seq = modsel.best_params_['classifier__C']
        print(f'[done] took {tend_seq:.2f}s best C = {best_c_seq}')
@ -60,7 +60,7 @@ class ModselTestCase(unittest.TestCase):
        tinit = time.time()
        modsel = GridSearchQ(
            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True
-        ).fit(training)
+        ).fit(*training.Xy)
        tend_par = time.time() - tinit
        best_c_par = modsel.best_params_['classifier__C']
        print(f'[done] took {tend_par:.2f}s best C = {best_c_par}')
@ -90,7 +90,7 @@ class ModselTestCase(unittest.TestCase):
            q, param_grid, protocol=app, timeout=3, n_jobs=-1, verbose=True, raise_errors=True
        )
        with self.assertRaises(TimeoutError):
-            modsel.fit(training)
+            modsel.fit(*training.Xy)

        print('Expecting ValueError to be raised')
        modsel = GridSearchQ(
@ -99,7 +99,7 @@ class ModselTestCase(unittest.TestCase):
        with self.assertRaises(ValueError):
            # this exception is not raised because of the timeout, but because no combination of hyperparams
            # succedded (in this case, a ValueError is raised, regardless of "raise_errors"
-            modsel.fit(training)
+            modsel.fit(*training.Xy)


 if __name__ == '__main__':
--- a/quapy/tests/test_protocols.py
+++ b/quapy/tests/test_protocols.py
@ -71,7 +71,7 @@ class TestProtocols(unittest.TestCase):
        # surprisingly enough, for some n_prevalences the test fails, notwithstanding
        # everything is correct. The problem is that in function APP.prevalence_grid()
        # there is sometimes one rounding error that gets cumulated and
-        # surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)
+        # surpasses 1.0 (by a very small float value, 0.0000000000002 or the like)
        # so these tuples are mistakenly removed... I have tried with np.close, and
        # other workarounds, but eventually happens that there is some negative probability
        # in the sampling function...
--- a/quapy/tests/test_replicability.py
+++ b/quapy/tests/test_replicability.py
@ -13,17 +13,18 @@ class TestReplicability(unittest.TestCase):
    def test_prediction_replicability(self):

        dataset = qp.datasets.fetch_UCIBinaryDataset('yeast')
+        train, test = dataset.train_test

        with qp.util.temp_seed(0):
            lr = LogisticRegression(random_state=0, max_iter=10000)
            pacc = PACC(lr)
-            prev = pacc.fit(dataset.training).predict(dataset.test.X)
+            prev = pacc.fit(*train.Xy).predict(test.X)
            str_prev1 = strprev(prev, prec=5)

        with qp.util.temp_seed(0):
            lr = LogisticRegression(random_state=0, max_iter=10000)
            pacc = PACC(lr)
-            prev2 = pacc.fit(dataset.training).predict(dataset.test.X)
+            prev2 = pacc.fit(*train.Xy).predict(test.X)
            str_prev2 = strprev(prev2, prec=5)

        self.assertEqual(str_prev1, str_prev2)
@ -83,18 +84,18 @@ class TestReplicability(unittest.TestCase):
        test = test.sampling(500, *[0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.0])

        with qp.util.temp_seed(10):
-            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
-            pacc.fit(train, val_split=0.5)
+            pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
+            pacc.fit(*train.Xy)
            prev1 = F.strprev(pacc.predict(test.instances))

        with qp.util.temp_seed(0):
-            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
-            pacc.fit(train, val_split=0.5)
+            pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
+            pacc.fit(*train.Xy)
            prev2 = F.strprev(pacc.predict(test.instances))

        with qp.util.temp_seed(0):
-            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
-            pacc.fit(train, val_split=0.5)
+            pacc = PACC(LogisticRegression(), val_split=.5, n_jobs=2)
+            pacc.fit(*train.Xy)
            prev3 = F.strprev(pacc.predict(test.instances))

        print(prev1)