From 25f1cc29a3f5d4d79eda20e08e3eeb5ea0a9caf0 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sun, 12 Nov 2023 13:04:19 +0100
Subject: [PATCH 01/22] refactoring aggregative quantifiers

---
 quapy/classification/calibration.py |   4 +-
 quapy/method/aggregative.py         | 156 +++++++++++++++++++++-------
 2 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py
index a3f1543..3763b64 100644
--- a/quapy/classification/calibration.py
+++ b/quapy/classification/calibration.py
@@ -59,7 +59,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi
         elif isinstance(k, float):
             if not (0 < k < 1):
                 raise ValueError('wrong value for val_split: the proportion of validation documents must be in (0,1)')
-            return self.fit_cv(X, y)
+            return self.fit_tr_val(X, y)
 
     def fit_cv(self, X, y):
         """
@@ -94,7 +94,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi
         self.classifier.fit(Xtr, ytr)
         posteriors = self.classifier.predict_proba(Xva)
         nclasses = len(np.unique(yva))
-        self.calibrator = self.calibrator(posteriors, np.eye(nclasses)[yva], posterior_supplied=True)
+        self.calibration_function = self.calibrator(posteriors, np.eye(nclasses)[yva], posterior_supplied=True)
         return self
 
     def predict(self, X):
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 232a92b..a056543 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1,4 +1,4 @@
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from copy import deepcopy
 from typing import Callable, Union
 import numpy as np
@@ -19,25 +19,55 @@ from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
 # Abstract classes
 # ------------------------------------
 
-class AggregativeQuantifier(BaseQuantifier):
+class AggregativeQuantifier(ABC, BaseQuantifier):
     """
     Abstract class for quantification methods that base their estimations on the aggregation of classification
-    results. Aggregative Quantifiers thus implement a :meth:`classify` method and maintain a :attr:`classifier`
-    attribute. Subclasses of this abstract class must implement the method :meth:`aggregate` which computes the
-    aggregation of label predictions. The method :meth:`quantify` comes with a default implementation based on
-    :meth:`classify` and :meth:`aggregate`.
+    results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions
+    and aggregating them. For this reason, the training phase is implemented by :meth:`classification_fit` followed
+    by :meth:`aggregation_fit`, while the testing phase is implemented by :meth:`classify` followed by
+    :meth:`aggregate`. Subclasses of this abstract class must provide implementations for these methods.
+    Aggregative quantifiers also maintain a :attr:`classifier` attribute.
+
+    The method :meth:`fit` comes with a default implementation based on :meth:`classification_fit`
+    and :meth:`aggregation_fit`.
+
+    The method :meth:`quantify` comes with a default implementation based on :meth:`classify`
+    and :meth:`aggregate`.
     """
 
-    @abstractmethod
     def fit(self, data: LabelledCollection, fit_classifier=True):
         """
-        Trains the aggregative quantifier
+        Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
 
         :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
-        :param fit_classifier: whether or not to train the learner (default is True). Set to False if the
+        :param fit_classifier: whether to train the learner (default is True). Set to False if the
             learner has been trained outside the quantifier.
         :return: self
         """
+        classif_predictions = self.classification_fit(data, fit_classifier)
+        self.aggregation_fit(classif_predictions)
+        return self
+
+    @abstractmethod
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
+        """
+        Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to
+        train the aggregation function.
+
+        :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
+        :param fit_classifier: whether to train the learner (default is True). Set to False if the
+            learner has been trained outside the quantifier.
+        """
+        ...
+
+    @abstractmethod
+    def aggregation_fit(self, classif_predictions):
+        """
+        Trains the aggregation function.
+
+        :param classif_predictions: typically an `ndarray` containing the label predictions, but could be a
+            tuple containing any information needed for fitting the aggregation function
+        """
         ...
 
     @property
@@ -101,7 +131,7 @@ class AggregativeQuantifier(BaseQuantifier):
         return self.classifier.classes_
 
 
-class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
+class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
     """
     Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities
     as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative
@@ -227,9 +257,9 @@ class CC(AggregativeQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def fit(self, data: LabelledCollection, fit_classifier=True):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
         """
-        Trains the Classify & Count method unless `fit_classifier` is False, in which case, the classifier is assumed to
+        Trains the classifier unless `fit_classifier` is False, in which case, the classifier is assumed to
         be already fit and there is nothing else to do.
 
         :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
@@ -237,7 +267,15 @@ class CC(AggregativeQuantifier):
         :return: self
         """
         self.classifier, _ = _training_helper(self.classifier, data, fit_classifier)
-        return self
+        return None
+
+    def aggregation_fit(self, classif_predictions: np.ndarray):
+        """
+        Nothing to do here!
+
+        :param classif_predictions: this is actually None
+        """
+        pass
 
     def aggregate(self, classif_predictions: np.ndarray):
         """
@@ -269,9 +307,10 @@ class ACC(AggregativeQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
         """
-        Trains a ACC quantifier.
+        Trains the classifier and generates, optionally through a cross-validation procedure, the predictions
+        needed for estimating the misclassification rates matrix.
 
         :param data: the training set
         :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
@@ -281,18 +320,24 @@ class ACC(AggregativeQuantifier):
             cross validation to estimate the parameters
         :return: self
         """
-
         if val_split is None:
             val_split = self.val_split
 
-        self.classifier, y, y_, classes, class_count = cross_generate_predictions(
+        self.classifier, true_labels, pred_labels, classes, class_count = cross_generate_predictions(
             data, self.classifier, val_split, probabilistic=False, fit_classifier=fit_classifier, n_jobs=self.n_jobs
         )
 
-        self.cc = CC(self.classifier)
-        self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, y, y_)
+        return (true_labels, pred_labels)
 
-        return self
+    def aggregation_fit(self, classif_predictions):
+        """
+        Nothing to do here!
+
+        :param classif_predictions: this is actually None
+        """
+        true_labels, pred_labels = classif_predictions
+        self.cc = CC(self.classifier)
+        self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels)
 
     @classmethod
     def getPteCondEstim(cls, classes, y, y_):
@@ -348,10 +393,18 @@ class PCC(AggregativeProbabilisticQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def fit(self, data: LabelledCollection, fit_classifier=True):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
         self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
         return self
 
+    def aggregation_fit(self, classif_predictions: np.ndarray):
+        """
+        Nothing to do here!
+
+        :param classif_predictions: this is actually None
+        """
+        pass
+
     def aggregate(self, classif_posteriors):
         return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
 
@@ -376,30 +429,37 @@ class PACC(AggregativeProbabilisticQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
         """
-        Trains a PACC quantifier.
+        Trains the soft classifier and generates, optionally through a cross-validation procedure, the posterior
+        probabilities needed for estimating the misclassification rates matrix.
 
         :param data: the training set
         :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
         :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
-         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
-         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
-         to estimate the parameters
+            validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
+            indicating the validation set itself, or an int indicating the number `k` of folds to be used in `k`-fold
+            cross validation to estimate the parameters
         :return: self
         """
-
         if val_split is None:
             val_split = self.val_split
 
-        self.classifier, y, y_, classes, class_count = cross_generate_predictions(
+        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
             data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
         )
 
-        self.pcc = PCC(self.classifier)
-        self.Pte_cond_estim_ = self.getPteCondEstim(classes, y, y_)
+        return (true_labels, posteriors)
 
-        return self
+    def aggregation_fit(self, classif_predictions):
+        """
+        Nothing to do here!
+
+        :param classif_predictions: this is actually None
+        """
+        true_labels, posteriors = classif_predictions
+        self.pcc = PCC(self.classifier)
+        self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, posteriors)
 
     @classmethod
     def getPteCondEstim(cls, classes, y, y_):
@@ -449,7 +509,13 @@ class EMQ(AggregativeProbabilisticQuantifier):
         self.exact_train_prev = exact_train_prev
         self.recalib = recalib
 
-    def fit(self, data: LabelledCollection, fit_classifier=True):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
+        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        return (true_labels, posteriors)
+
         if self.recalib is not None:
             if self.recalib == 'nbvs':
                 self.classifier = NBVSCalibration(self.non_calibrated)
@@ -477,7 +543,15 @@ class EMQ(AggregativeProbabilisticQuantifier):
                 nfolds=3,
                 random_state=0
             )
-        return self
+        return None
+
+    def aggregation_fit(self, classif_predictions: np.ndarray):
+        """
+        Nothing to do here!
+
+        :param classif_predictions: this is actually None
+        """
+        pass
 
     def aggregate(self, classif_posteriors, epsilon=EPSILON):
         priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
@@ -768,7 +842,7 @@ class DMy(AggregativeProbabilisticQuantifier):
             distributions = np.cumsum(distributions, axis=1)
         return distributions
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
         """
         Trains the classifier (if requested) and generates the validation distributions out of the training data.
         The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
@@ -787,15 +861,19 @@ class DMy(AggregativeProbabilisticQuantifier):
         if val_split is None:
             val_split = self.val_split
 
-        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
+        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
             data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
         )
 
-        self.validation_distribution = np.asarray(
-            [self.__get_distributions(posteriors[y==cat]) for cat in range(data.n_classes)]
-        )
+        return (true_labels, posteriors)
 
-        return self
+    def aggregation_fit(self, classif_predictions):
+        true_labels, posteriors = classif_predictions
+        n_classes = len(self.classifier.classes_)
+
+        self.validation_distribution = np.asarray(
+            [self.__get_distributions(posteriors[true_labels == cat]) for cat in range(n_classes)]
+        )
 
     def aggregate(self, posteriors: np.ndarray):
         """

From 0a6185d908d7828bc62503ce26ec4caeae936409 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Sun, 12 Nov 2023 14:45:03 +0100
Subject: [PATCH 02/22] refactoring the aggregative quantifiers

---
 quapy/method/aggregative.py | 257 ++++++++++++------------------------
 quapy/method/base.py        |   2 +-
 2 files changed, 85 insertions(+), 174 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index a056543..8633039 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -44,12 +44,11 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
             learner has been trained outside the quantifier.
         :return: self
         """
-        classif_predictions = self.classification_fit(data, fit_classifier)
+        classif_predictions = self.classifier_fit_predict(data, fit_classifier)
         self.aggregation_fit(classif_predictions)
         return self
 
-    @abstractmethod
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
+    def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
         """
         Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to
         train the aggregation function.
@@ -57,11 +56,62 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
         :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
         :param fit_classifier: whether to train the learner (default is True). Set to False if the
             learner has been trained outside the quantifier.
+        :param predict_on: specifies the set on which predictions need to be issued. This parameter can
+            be specified as None (default) to indicate no prediction is needed; a float in (0, 1) to
+            indicate the proportion of instances to be used for predictions (the remainder is used for
+            training); an integer >1 to indicate that the predictions must be generated via k-fold
+            cross-validation, using this integer as k; or the data sample itself on which to generate
+            the predictions.
         """
-        ...
+        assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
+
+        self.__check_classifier()
+
+        if predict_on is None:
+            if fit_classifier:
+                self.classifier.fit(*data.Xy)
+            predictions = None
+
+        elif isinstance(predict_on, float):
+            if fit_classifier:
+                if not (0. < predict_on < 1.):
+                    raise ValueError(f'proportion {predict_on=} out of range, must be in (0,1)')
+                train, val = data.split_stratified(train_prop=(1 - predict_on))
+                self.classifier.fit(*train.Xy)
+                predictions = (self.classify(val.X), val.y)
+            else:
+                raise ValueError(f'wrong type for predict_on: since fit_classifier=False, '
+                                 f'the set on which predictions have to be issued must be '
+                                 f'explicitly indicated')
+
+        elif isinstance(predict_on, LabelledCollection):
+            if fit_classifier:
+                self.classifier.fit(*data.Xy)
+            predictions = (self.classify(predict_on.X), predict_on.y)
+
+        elif isinstance(predict_on, int):
+            if fit_classifier:
+                if not predict_on > 1:
+                    raise ValueError(f'invalid value {predict_on} in fit. '
+                                     f'Specify a integer >1 for kFCV estimation.')
+                    predictions = cross_val_predict(
+                        classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self.__classifier_method())
+                    self.classifier.fit(*data.Xy)
+            else:
+                raise ValueError(f'wrong type for predict_on: since fit_classifier=False, '
+                                 f'the set on which predictions have to be issued must be '
+                                 f'explicitly indicated')
+
+        else:
+            raise ValueError(
+                f'error: param "predict_on" ({type(predict_on)}) not understood; '
+                f'use either a float indicating the split proportion, or a '
+                f'tuple (X,y) indicating the validation partition')
+
+        return predictions
 
     @abstractmethod
-    def aggregation_fit(self, classif_predictions):
+    def aggregation_fit(self, classif_predictions: LabelledCollection):
         """
         Trains the aggregation function.
 
@@ -99,6 +149,13 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
         """
         return self.classifier.predict(instances)
 
+    @property
+    def __classifier_method(self):
+        return 'predict'
+
+    def __check_classifier(self, adapt_if_necessary=False):
+        assert hasattr(self.classifier, 'predict')
+
     def quantify(self, instances):
         """
         Generate class prevalence estimates for the sample's instances by aggregating the label predictions generated
@@ -142,106 +199,20 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
     def classify(self, instances):
         return self.classifier.predict_proba(instances)
 
+    @property
+    def __classifier_method(self):
+        return 'predict_proba'
 
-# Helper
-# ------------------------------------
-def _ensure_probabilistic(classifier):
-    if not hasattr(classifier, 'predict_proba'):
-        print(f'The learner {classifier.__class__.__name__} does not seem to be probabilistic. '
-              f'The learner will be calibrated.')
-        classifier = CalibratedClassifierCV(classifier, cv=5)
-    return classifier
-
-
-def _training_helper(classifier,
-                     data: LabelledCollection,
-                     fit_classifier: bool = True,
-                     ensure_probabilistic=False,
-                     val_split: Union[LabelledCollection, float] = None):
-    """
-    Training procedure common to all Aggregative Quantifiers.
-
-    :param classifier: the learner to be fit
-    :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner.
-    :param fit_classifier: whether or not to fit the learner (if False, then bypasses any action)
-    :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the
-        learner is not probabilistic, then a CalibratedCV instance of it is trained)
-    :param val_split: if specified as a float, indicates the proportion of training instances that will define the
-        validation split (e.g., 0.3 for using 30% of the training set as validation data); if specified as a
-        LabelledCollection, represents the validation split itself
-    :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0
-        or None otherwise) to be used as a validation set for any subsequent parameter fitting
-    """
-    if fit_classifier:
-        if ensure_probabilistic:
-            classifier = _ensure_probabilistic(classifier)
-        if val_split is not None:
-            if isinstance(val_split, float):
-                if not (0 < val_split < 1):
-                    raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)')
-                train, unused = data.split_stratified(train_prop=1 - val_split)
-            elif isinstance(val_split, LabelledCollection):
-                train = data
-                unused = val_split
+    def __check_classifier(self, adapt_if_necessary=False):
+        if not hasattr(self.classifier, 'predict_proba'):
+            if adapt_if_necessary:
+                print(f'warning: The learner {self.classifier.__class__.__name__} does not seem to be '
+                      f'probabilistic. The learner will be calibrated (using CalibratedClassifierCV).')
+                self.classifier = CalibratedClassifierCV(self.classifier, cv=5)
             else:
-                raise ValueError(
-                    f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
-                    'proportion, or a LabelledCollection indicating the validation split')
-        else:
-            train, unused = data, None
-
-        if isinstance(classifier, BaseQuantifier):
-            classifier.fit(train)
-        else:
-            classifier.fit(*train.Xy)
-    else:
-        if ensure_probabilistic:
-            if not hasattr(classifier, 'predict_proba'):
-                raise AssertionError('error: the learner cannot be calibrated since fit_classifier is set to False')
-        unused = None
-        if isinstance(val_split, LabelledCollection):
-            unused = val_split
-
-    return classifier, unused
-
-
-def cross_generate_predictions(
-        data,
-        classifier,
-        val_split,
-        probabilistic,
-        fit_classifier,
-        n_jobs
-):
-
-    n_jobs = qp._get_njobs(n_jobs)
-
-    if isinstance(val_split, int):
-        assert fit_classifier == True, \
-            'the parameters for the adjustment cannot be estimated with kFCV with fit_classifier=False'
-
-        if probabilistic:
-            classifier = _ensure_probabilistic(classifier)
-            predict = 'predict_proba'
-        else:
-            predict = 'predict'
-        y_pred = cross_val_predict(classifier, *data.Xy, cv=val_split, n_jobs=n_jobs, method=predict)
-        class_count = data.counts()
-
-        # fit the learner on all data
-        classifier.fit(*data.Xy)
-        y = data.y
-        classes = data.classes_
-    else:
-        classifier, val_data = _training_helper(
-            classifier, data, fit_classifier, ensure_probabilistic=probabilistic, val_split=val_split
-        )
-        y_pred = classifier.predict_proba(val_data.instances) if probabilistic else classifier.predict(val_data.instances)
-        y = val_data.labels
-        classes = val_data.classes_
-        class_count = val_data.counts()
-
-    return classifier, y, y_pred, classes, class_count
+                raise AssertionError(f'error: The learner {self.classifier.__class__.__name__} does not '
+                                     f'seem to be probabilistic. The learner cannot be calibrated since '
+                                     f'fit_classifier is set to False')
 
 
 # Methods
@@ -257,19 +228,7 @@ class CC(AggregativeQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
-        """
-        Trains the classifier unless `fit_classifier` is False, in which case, the classifier is assumed to
-        be already fit and there is nothing else to do.
-
-        :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
-        :param fit_classifier: if False, the classifier is assumed to be fit
-        :return: self
-        """
-        self.classifier, _ = _training_helper(self.classifier, data, fit_classifier)
-        return None
-
-    def aggregation_fit(self, classif_predictions: np.ndarray):
+    def aggregation_fit(self, classif_predictions: LabelledCollection):
         """
         Nothing to do here!
 
@@ -307,33 +266,11 @@ class ACC(AggregativeQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
+    def aggregation_fit(self, classif_predictions: LabelledCollection):
         """
-        Trains the classifier and generates, optionally through a cross-validation procedure, the predictions
-        needed for estimating the misclassification rates matrix.
+        Estimates the misclassification rates.
 
-        :param data: the training set
-        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
-        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
-            validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
-            indicating the validation set itself, or an int indicating the number `k` of folds to be used in `k`-fold
-            cross validation to estimate the parameters
-        :return: self
-        """
-        if val_split is None:
-            val_split = self.val_split
-
-        self.classifier, true_labels, pred_labels, classes, class_count = cross_generate_predictions(
-            data, self.classifier, val_split, probabilistic=False, fit_classifier=fit_classifier, n_jobs=self.n_jobs
-        )
-
-        return (true_labels, pred_labels)
-
-    def aggregation_fit(self, classif_predictions):
-        """
-        Nothing to do here!
-
-        :param classif_predictions: this is actually None
+        :param classif_predictions: classifier predictions with true labels
         """
         true_labels, pred_labels = classif_predictions
         self.cc = CC(self.classifier)
@@ -393,11 +330,7 @@ class PCC(AggregativeProbabilisticQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
-        self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
-        return self
-
-    def aggregation_fit(self, classif_predictions: np.ndarray):
+    def aggregation_fit(self, classif_predictions: LabelledCollection):
         """
         Nothing to do here!
 
@@ -429,33 +362,11 @@ class PACC(AggregativeProbabilisticQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
+    def aggregation_fit(self, classif_predictions: LabelledCollection):
         """
-        Trains the soft classifier and generates, optionally through a cross-validation procedure, the posterior
-        probabilities needed for estimating the misclassification rates matrix.
+        Estimates the misclassification rates
 
-        :param data: the training set
-        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
-        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
-            validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
-            indicating the validation set itself, or an int indicating the number `k` of folds to be used in `k`-fold
-            cross validation to estimate the parameters
-        :return: self
-        """
-        if val_split is None:
-            val_split = self.val_split
-
-        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
-            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
-        )
-
-        return (true_labels, posteriors)
-
-    def aggregation_fit(self, classif_predictions):
-        """
-        Nothing to do here!
-
-        :param classif_predictions: this is actually None
+        :param classif_predictions: classifier predictions with true labels
         """
         true_labels, posteriors = classif_predictions
         self.pcc = PCC(self.classifier)
@@ -509,7 +420,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
         self.exact_train_prev = exact_train_prev
         self.recalib = recalib
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True):
+    def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True):
         self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
             data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
         )
@@ -842,7 +753,7 @@ class DMy(AggregativeProbabilisticQuantifier):
             distributions = np.cumsum(distributions, axis=1)
         return distributions
 
-    def classification_fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+    def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
         """
         Trains the classifier (if requested) and generates the validation distributions out of the training data.
         The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
diff --git a/quapy/method/base.py b/quapy/method/base.py
index e0363f1..f34acf6 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -63,7 +63,7 @@ def newOneVsAll(binary_quantifier, n_jobs=None):
         return OneVsAllGeneric(binary_quantifier, n_jobs)
 
 
-class OneVsAllGeneric(OneVsAll,BaseQuantifier):
+class OneVsAllGeneric(OneVsAll, BaseQuantifier):
     """
     Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
     quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.

From 44bfc7921feb58d47a9cd5e3999ff52808b6bea6 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 13 Nov 2023 09:57:34 +0100
Subject: [PATCH 03/22] refactoring agg quantifiers

---
 quapy/method/aggregative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 8633039..4cbe728 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -65,7 +65,7 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
         """
         assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
 
-        self.__check_classifier()
+        self.__check_classifier(adapt_if_necessary=(self.__classifier_method=='predict_proba'))
 
         if predict_on is None:
             if fit_classifier:

From c9c4511c0df2d0f5e3b75d31b002936319f99ee9 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 13 Nov 2023 12:42:57 +0100
Subject: [PATCH 04/22] hierarchical class problem?

---
 quapy/method/aggregative.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 4cbe728..ad96e53 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -7,6 +7,8 @@ from sklearn.base import BaseEstimator
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import cross_val_predict
+from typing_extensions import override
+
 import quapy as qp
 import quapy.functional as F
 from functional import get_divergence
@@ -19,7 +21,7 @@ from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
 # Abstract classes
 # ------------------------------------
 
-class AggregativeQuantifier(ABC, BaseQuantifier):
+class AggregativeQuantifier(BaseQuantifier, ABC):
     """
     Abstract class for quantification methods that base their estimations on the aggregation of classification
     results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions
@@ -65,7 +67,8 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
         """
         assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
 
-        self.__check_classifier(adapt_if_necessary=(self.__classifier_method=='predict_proba'))
+        print(type(self))
+        self.__check_classifier(adapt_if_necessary=(self.__classifier_method()=='predict_proba'))
 
         if predict_on is None:
             if fit_classifier:
@@ -149,12 +152,12 @@ class AggregativeQuantifier(ABC, BaseQuantifier):
         """
         return self.classifier.predict(instances)
 
-    @property
     def __classifier_method(self):
+        print('using predict')
         return 'predict'
 
     def __check_classifier(self, adapt_if_necessary=False):
-        assert hasattr(self.classifier, 'predict')
+        assert hasattr(self.classifier, self.__classifier_method())
 
     def quantify(self, instances):
         """
@@ -199,12 +202,12 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
     def classify(self, instances):
         return self.classifier.predict_proba(instances)
 
-    @property
     def __classifier_method(self):
+        print('using predict_proba')
         return 'predict_proba'
 
     def __check_classifier(self, adapt_if_necessary=False):
-        if not hasattr(self.classifier, 'predict_proba'):
+        if not hasattr(self.classifier, self.__check_classifier()):
             if adapt_if_necessary:
                 print(f'warning: The learner {self.classifier.__class__.__name__} does not seem to be '
                       f'probabilistic. The learner will be calibrated (using CalibratedClassifierCV).')

From c2544b50ce4fabc6beff7f2ae039aac552dbe478 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Mon, 13 Nov 2023 14:45:34 +0100
Subject: [PATCH 05/22] Removed private method

---
 quapy/method/aggregative.py     | 13 ++++++-------
 quapy/method/non_aggregative.py |  2 +-
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index ad96e53..1b8d1e6 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -7,11 +7,10 @@ from sklearn.base import BaseEstimator
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import cross_val_predict
-from typing_extensions import override
 
 import quapy as qp
 import quapy.functional as F
-from functional import get_divergence
+from quapy.functional import get_divergence
 from quapy.classification.calibration import NBVSCalibration, BCTSCalibration, TSCalibration, VSCalibration
 from quapy.classification.svmperf import SVMperf
 from quapy.data import LabelledCollection
@@ -68,7 +67,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
 
         print(type(self))
-        self.__check_classifier(adapt_if_necessary=(self.__classifier_method()=='predict_proba'))
+        self.__check_classifier(adapt_if_necessary=(self._classifier_method()=='predict_proba'))
 
         if predict_on is None:
             if fit_classifier:
@@ -98,7 +97,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
                     raise ValueError(f'invalid value {predict_on} in fit. '
                                      f'Specify a integer >1 for kFCV estimation.')
                     predictions = cross_val_predict(
-                        classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self.__classifier_method())
+                        classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self._classifier_method())
                     self.classifier.fit(*data.Xy)
             else:
                 raise ValueError(f'wrong type for predict_on: since fit_classifier=False, '
@@ -152,12 +151,12 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         """
         return self.classifier.predict(instances)
 
-    def __classifier_method(self):
+    def _classifier_method(self):
         print('using predict')
         return 'predict'
 
     def __check_classifier(self, adapt_if_necessary=False):
-        assert hasattr(self.classifier, self.__classifier_method())
+        assert hasattr(self.classifier, self._classifier_method())
 
     def quantify(self, instances):
         """
@@ -202,7 +201,7 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
     def classify(self, instances):
         return self.classifier.predict_proba(instances)
 
-    def __classifier_method(self):
+    def _classifier_method(self):
         print('using predict_proba')
         return 'predict_proba'
 
diff --git a/quapy/method/non_aggregative.py b/quapy/method/non_aggregative.py
index 87e59fb..6048bf6 100644
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@@ -1,7 +1,7 @@
 from typing import Union, Callable
 import numpy as np
 
-from functional import get_divergence
+from quapy.functional import get_divergence
 from quapy.data import LabelledCollection
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
 import quapy.functional as F

From 173db83c28d84586bbb767b8313be229d8ee17e0 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 13 Nov 2023 17:03:24 +0100
Subject: [PATCH 06/22] solved __ issue in hierarchical classes

---
 quapy/method/aggregative.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 1b8d1e6..78ff40b 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -67,7 +67,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
 
         print(type(self))
-        self.__check_classifier(adapt_if_necessary=(self._classifier_method()=='predict_proba'))
+        self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
 
         if predict_on is None:
             if fit_classifier:
@@ -155,7 +155,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         print('using predict')
         return 'predict'
 
-    def __check_classifier(self, adapt_if_necessary=False):
+    def _check_classifier(self, adapt_if_necessary=False):
         assert hasattr(self.classifier, self._classifier_method())
 
     def quantify(self, instances):
@@ -205,8 +205,8 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
         print('using predict_proba')
         return 'predict_proba'
 
-    def __check_classifier(self, adapt_if_necessary=False):
-        if not hasattr(self.classifier, self.__check_classifier()):
+    def _check_classifier(self, adapt_if_necessary=False):
+        if not hasattr(self.classifier, self._classifier_method()):
             if adapt_if_necessary:
                 print(f'warning: The learner {self.classifier.__class__.__name__} does not seem to be '
                       f'probabilistic. The learner will be calibrated (using CalibratedClassifierCV).')
@@ -274,7 +274,7 @@ class ACC(AggregativeQuantifier):
 
         :param classif_predictions: classifier predictions with true labels
         """
-        true_labels, pred_labels = classif_predictions
+        pred_labels, true_labels = classif_predictions.Xy
         self.cc = CC(self.classifier)
         self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels)
 

From e870d798b7150bc926ad88c296e49a89bfa1cb35 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 15 Nov 2023 10:55:13 +0100
Subject: [PATCH 07/22] fango

---
 examples/custom_quantifier.py       |   4 +-
 quapy/classification/calibration.py |   3 +-
 quapy/method/aggregative.py         | 534 +++++++++++++++++++++-------
 quapy/method/meta.py                | 130 ++++++-
 quapy/method/neural.py              |   2 +-
 quapy/model_selection.py            |  51 ++-
 quapy/tests/test_hierarchy.py       |   4 +-
 quapy/util.py                       |   7 +-
 8 files changed, 571 insertions(+), 164 deletions(-)

diff --git a/examples/custom_quantifier.py b/examples/custom_quantifier.py
index 31a69cd..fa014de 100644
--- a/examples/custom_quantifier.py
+++ b/examples/custom_quantifier.py
@@ -2,7 +2,7 @@ import quapy as qp
 from quapy.data import LabelledCollection
 from quapy.method.base import BinaryQuantifier
 from quapy.model_selection import GridSearchQ
-from quapy.method.aggregative import AggregativeProbabilisticQuantifier
+from quapy.method.aggregative import AggregativeSoftQuantifier
 from quapy.protocol import APP
 import numpy as np
 from sklearn.linear_model import LogisticRegression
@@ -15,7 +15,7 @@ from sklearn.linear_model import LogisticRegression
 # internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier
 # is binary, for simplicity.
 
-class MyQuantifier(AggregativeProbabilisticQuantifier, BinaryQuantifier):
+class MyQuantifier(AggregativeSoftQuantifier, BinaryQuantifier):
     def __init__(self, classifier, alpha=0.5):
         self.alpha = alpha
         # aggregative quantifiers have an internal self.classifier attribute
diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py
index 3763b64..0f5e9f7 100644
--- a/quapy/classification/calibration.py
+++ b/quapy/classification/calibration.py
@@ -24,7 +24,8 @@ class RecalibratedProbabilisticClassifier:
 class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabilisticClassifier):
     """
     Applies a (re)calibration method from `abstention.calibration`, as defined in
-    `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:
+    `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_.
+
 
     :param classifier: a scikit-learn probabilistic classifier
     :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory)
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 78ff40b..a8a7bb2 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
 from copy import deepcopy
 from typing import Callable, Union
 import numpy as np
+from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling
 from scipy import optimize
 from sklearn.base import BaseEstimator
 from sklearn.calibration import CalibratedClassifierCV
@@ -46,7 +47,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         :return: self
         """
         classif_predictions = self.classifier_fit_predict(data, fit_classifier)
-        self.aggregation_fit(classif_predictions)
+        self.aggregation_fit(classif_predictions, data)
         return self
 
     def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
@@ -66,7 +67,6 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         """
         assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
 
-        print(type(self))
         self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
 
         if predict_on is None:
@@ -80,7 +80,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
                     raise ValueError(f'proportion {predict_on=} out of range, must be in (0,1)')
                 train, val = data.split_stratified(train_prop=(1 - predict_on))
                 self.classifier.fit(*train.Xy)
-                predictions = (self.classify(val.X), val.y)
+                predictions = LabelledCollection(self.classify(val.X), val.y, classes=data.classes_)
             else:
                 raise ValueError(f'wrong type for predict_on: since fit_classifier=False, '
                                  f'the set on which predictions have to be issued must be '
@@ -89,15 +89,17 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         elif isinstance(predict_on, LabelledCollection):
             if fit_classifier:
                 self.classifier.fit(*data.Xy)
-            predictions = (self.classify(predict_on.X), predict_on.y)
+            predictions = LabelledCollection(self.classify(predict_on.X), predict_on.y, classes=predict_on.classes_)
 
         elif isinstance(predict_on, int):
             if fit_classifier:
-                if not predict_on > 1:
+                if predict_on <= 1:
                     raise ValueError(f'invalid value {predict_on} in fit. '
                                      f'Specify a integer >1 for kFCV estimation.')
+                else:
                     predictions = cross_val_predict(
-                        classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self._classifier_method())
+                        self.classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self._classifier_method())
+                    predictions = LabelledCollection(predictions, data.y, classes=data.classes_)
                     self.classifier.fit(*data.Xy)
             else:
                 raise ValueError(f'wrong type for predict_on: since fit_classifier=False, '
@@ -113,12 +115,13 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         return predictions
 
     @abstractmethod
-    def aggregation_fit(self, classif_predictions: LabelledCollection):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Trains the aggregation function.
 
-        :param classif_predictions: typically an `ndarray` containing the label predictions, but could be a
-            tuple containing any information needed for fitting the aggregation function
+        :param classif_predictions: a LabelledCollection containing the label predictions issued
+            by the classifier
+        :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
         """
         ...
 
@@ -140,23 +143,36 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         """
         self.classifier_ = classifier
 
+    @abstractmethod
     def classify(self, instances):
         """
         Provides the label predictions for the given instances. The predictions should respect the format expected by
-        :meth:`aggregate`, i.e., posterior probabilities for probabilistic quantifiers, or crisp predictions for
+        :meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for
         non-probabilistic quantifiers
 
-        :param instances: array-like
+        :param instances: array-like of shape `(n_instances, n_features,)`
         :return: np.ndarray of shape `(n_instances,)` with label predictions
         """
-        return self.classifier.predict(instances)
+        ...
 
+    @abstractmethod
     def _classifier_method(self):
-        print('using predict')
-        return 'predict'
+        """
+        Name of the method that must be used for issuing label predictions.
 
+        :return: string
+        """
+        ...
+
+    @abstractmethod
     def _check_classifier(self, adapt_if_necessary=False):
-        assert hasattr(self.classifier, self._classifier_method())
+        """
+        Guarantees that the underlying classifier implements the method required for issuing predictions, i.e.,
+        the method indicated by the :meth:`_classifier_method`
+
+        :param adapt_if_necessary: if True, the method will try to comply with the required specifications
+        """
+        ...
 
     def quantify(self, instances):
         """
@@ -190,22 +206,77 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         return self.classifier.classes_
 
 
-class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
+class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
     """
-    Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities
-    as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative
-    Quantifiers by implementing a _posterior_probabilities_ method returning values in [0,1] -- the posterior
-    probabilities.
+    Abstract class for quantification methods that base their estimations on the aggregation of crips decisions
+    as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative
+    Quantifiers by implementing specifications about crisp predictions.
     """
 
     def classify(self, instances):
+        """
+        Provides the label (crisp) predictions for the given instances.
+
+        :param instances: array-like of shape `(n_instances, n_dimensions,)`
+        :return: np.ndarray of shape `(n_instances,)` with label predictions
+        """
+        return self.classifier.predict(instances)
+
+    def _classifier_method(self):
+        """
+        Name of the method that must be used for issuing label predictions.
+
+        :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
+        """
+        print('using predict')
+        return 'predict'
+
+    def _check_classifier(self, adapt_if_necessary=False):
+        """
+        Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`
+
+        :param adapt_if_necessary: unused, added for compatibility
+        """
+        assert hasattr(self.classifier, self._classifier_method()), \
+            f"the method does not implement the required {self._classifier_method()} method"
+
+
+class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
+    """
+    Abstract class for quantification methods that base their estimations on the aggregation of posterior
+    probabilities as returned by a probabilistic classifier.
+    Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing specifications
+    about soft predictions.
+    """
+
+    def classify(self, instances):
+        """
+        Provides the posterior probabilities for the given instances.
+
+        :param instances: array-like of shape `(n_instances, n_dimensions,)`
+        :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities
+        """
         return self.classifier.predict_proba(instances)
 
     def _classifier_method(self):
+        """
+        Name of the method that must be used for issuing label predictions.
+
+        :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
+        """
         print('using predict_proba')
         return 'predict_proba'
 
     def _check_classifier(self, adapt_if_necessary=False):
+        """
+        Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`.
+        In case it does not, the classifier is calibrated (by means of the Platt's calibration method implemented by
+        scikit-learn in CalibratedClassifierCV, with cv=5). This calibration is only allowed if `adapt_if_necessary`
+        is set to True. If otherwise (i.e., the classifier is not probabilistic, and `adapt_if_necessary` is set
+        to False), an exception will be raised.
+
+        :param adapt_if_necessary: a hard classifier is turned into a soft classifier if `adapt_if_necessary==True`
+        """
         if not hasattr(self.classifier, self._classifier_method()):
             if adapt_if_necessary:
                 print(f'warning: The learner {self.classifier.__class__.__name__} does not seem to be '
@@ -217,9 +288,42 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier, ABC):
                                      f'fit_classifier is set to False')
 
 
+
+class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
+    """
+    Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
+    at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
+    is not the training set. There are three ways in which this distinction can be made, depending on how
+    the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
+    of training instances that should be devoted to validate, or (ii) an integer indicating the
+    number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
+    use for validation.
+    """
+
+    @property
+    def val_split(self):
+        return self.val_split_
+
+    @val_split.setter
+    def val_split(self, val_split):
+        if isinstance(val_split, LabelledCollection):
+            print('warning: setting val_split with a LabelledCollection will be inefficient in'
+                  'model selection. Rather pass the LabelledCollection at fit time')
+        self.val_split_ = val_split
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
+        print('method from CorrectionbasedAggregativeQuantifier')
+        if predict_on is None:
+            predict_on = self.val_split
+        classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
+        self.aggregation_fit(classif_predictions, data)
+        return self
+
+
+
 # Methods
 # ------------------------------------
-class CC(AggregativeQuantifier):
+class CC(AggregativeCrispQuantifier):
     """
     The most basic Quantification method. One that simply classifies all instances and counts how many have been
     attributed to each of the classes in order to compute class prevalence estimates.
@@ -230,7 +334,7 @@ class CC(AggregativeQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def aggregation_fit(self, classif_predictions: LabelledCollection):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Nothing to do here!
 
@@ -248,19 +352,21 @@ class CC(AggregativeQuantifier):
         return F.prevalence_from_labels(classif_predictions, self.classes_)
 
 
-class ACC(AggregativeQuantifier):
+class ACC(AggregativeCrispQuantifier, CorrectionbasedAggregativeQuantifier):
     """
     `Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
     the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
     according to the `misclassification rates`.
 
     :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
+    :param val_split: specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a collection defining the specific set of data to use for validation.
+        Alternatively, this set can be specified at fit time by indicating the exact set of data
+        on which the predictions are to be generated.
+    :param n_jobs: number of parallel workers
     """
 
     def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None):
@@ -268,7 +374,7 @@ class ACC(AggregativeQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def aggregation_fit(self, classif_predictions: LabelledCollection):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Estimates the misclassification rates.
 
@@ -292,9 +398,6 @@ class ACC(AggregativeQuantifier):
                 conf[:, i] /= class_counts[i]
         return conf
 
-    def classify(self, data):
-        return self.cc.classify(data)
-
     def aggregate(self, classif_predictions):
         prevs_estim = self.cc.aggregate(classif_predictions)
         return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
@@ -321,7 +424,7 @@ class ACC(AggregativeQuantifier):
         return adjusted_prevs
 
 
-class PCC(AggregativeProbabilisticQuantifier):
+class PCC(AggregativeSoftQuantifier):
     """
     `Probabilistic Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
     the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier.
@@ -332,7 +435,7 @@ class PCC(AggregativeProbabilisticQuantifier):
     def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
 
-    def aggregation_fit(self, classif_predictions: LabelledCollection):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Nothing to do here!
 
@@ -344,18 +447,18 @@ class PCC(AggregativeProbabilisticQuantifier):
         return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
 
 
-class PACC(AggregativeProbabilisticQuantifier):
+class PACC(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
     """
     `Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
     the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
 
     :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
+    :param val_split: specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data
+        on which the predictions are to be generated.
     :param n_jobs: number of parallel workers
     """
 
@@ -364,16 +467,20 @@ class PACC(AggregativeProbabilisticQuantifier):
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def aggregation_fit(self, classif_predictions: LabelledCollection):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Estimates the misclassification rates
 
-        :param classif_predictions: classifier predictions with true labels
+        :param classif_predictions: classifier soft predictions with true labels
         """
-        true_labels, posteriors = classif_predictions
+        posteriors, true_labels = classif_predictions.Xy
         self.pcc = PCC(self.classifier)
         self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, posteriors)
 
+    def aggregate(self, classif_posteriors):
+        prevs_estim = self.pcc.aggregate(classif_posteriors)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+
     @classmethod
     def getPteCondEstim(cls, classes, y, y_):
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
@@ -387,15 +494,8 @@ class PACC(AggregativeProbabilisticQuantifier):
 
         return confusion.T
 
-    def aggregate(self, classif_posteriors):
-        prevs_estim = self.pcc.aggregate(classif_posteriors)
-        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
 
-    def classify(self, data):
-        return self.pcc.classify(data)
-
-
-class EMQ(AggregativeProbabilisticQuantifier):
+class EMQ(AggregativeSoftQuantifier):
     """
     `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
     aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.
@@ -404,74 +504,30 @@ class EMQ(AggregativeProbabilisticQuantifier):
     maximum-likelihood estimation, in a mutually recursive way, until convergence.
 
     :param classifier: a sklearn's Estimator that generates a classifier
-    :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence;
-        or set to False for computing the training prevalence as an estimate, akin to PCC, i.e., as the expected
-        value of the posterior probabilities of the training instances as suggested in
-        `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:
-    :param recalib: a string indicating the method of recalibration. Available choices include "nbvs" (No-Bias Vector
-        Scaling), "bcts" (Bias-Corrected Temperature Scaling), "ts" (Temperature Scaling), and "vs" (Vector Scaling).
-        The default value is None, indicating no recalibration.
     """
 
     MAX_ITER = 1000
     EPSILON = 1e-4
 
-    def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None):
+    def __init__(self, classifier: BaseEstimator):
         self.classifier = classifier
-        self.non_calibrated = classifier
-        self.exact_train_prev = exact_train_prev
-        self.recalib = recalib
 
-    def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True):
-        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
-            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
-        )
-
-        return (true_labels, posteriors)
-
-        if self.recalib is not None:
-            if self.recalib == 'nbvs':
-                self.classifier = NBVSCalibration(self.non_calibrated)
-            elif self.recalib == 'bcts':
-                self.classifier = BCTSCalibration(self.non_calibrated)
-            elif self.recalib == 'ts':
-                self.classifier = TSCalibration(self.non_calibrated)
-            elif self.recalib == 'vs':
-                self.classifier = VSCalibration(self.non_calibrated)
-            elif self.recalib == 'platt':
-                self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False)
-            else:
-                raise ValueError('invalid param argument for recalibration method; available ones are '
-                                 '"nbvs", "bcts", "ts", and "vs".')
-            self.recalib = None
-        else:
-            self.classifier = self.non_calibrated
-        self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
-        if self.exact_train_prev:
-            self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
-        else:
-            self.train_prevalence = qp.model_selection.cross_val_predict(
-                quantifier=PCC(deepcopy(self.classifier)),
-                data=data,
-                nfolds=3,
-                random_state=0
-            )
-        return None
-
-    def aggregation_fit(self, classif_predictions: np.ndarray):
-        """
-        Nothing to do here!
-
-        :param classif_predictions: this is actually None
-        """
-        pass
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        self.train_prevalence = data.prevalence()
 
     def aggregate(self, classif_posteriors, epsilon=EPSILON):
         priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
         return priors
 
     def predict_proba(self, instances, epsilon=EPSILON):
-        classif_posteriors = self.classifier.predict_proba(instances)
+        """
+        Returns the posterior probabilities updated by the EM algorithm.
+
+        :param instances: np.ndarray of shape `(n_instances, n_dimensions)`
+        :param epsilon: error tolerance
+        :return: np.ndarray of shape `(n_instances, n_classes)`
+        """
+        classif_posteriors = self.classify(instances)
         priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
         return posteriors
 
@@ -514,7 +570,94 @@ class EMQ(AggregativeProbabilisticQuantifier):
         return qs, ps
 
 
-class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
+class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
+    """
+    `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
+    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
+    `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_.
+
+    These heuristics consist of using, as the training prevalence, an estimate of it obtained via k-fold cross
+    validation (instead of the true training prevalence), and to recalibrate the posterior probabilities of
+    the classifier.
+
+    :param classifier: a sklearn's Estimator that generates a classifier
+    :param val_split: specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a collection defining the specific set of data to use for validation.
+        Alternatively, this set can be specified at fit time by indicating the exact set of data
+        on which the predictions are to be generated.
+    :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence;
+        or set to False for computing the training prevalence as an estimate of it, i.e., as the expected
+        value of the posterior probabilities of the training instances
+    :param recalib: a string indicating the method of recalibration.
+        Available choices include "nbvs" (No-Bias Vector Scaling), "bcts" (Bias-Corrected Temperature Scaling,
+        default), "ts" (Temperature Scaling), and "vs" (Vector Scaling).
+    :param n_jobs: number of parallel workers
+    """
+
+    MAX_ITER = 1000
+    EPSILON = 1e-4
+
+    def __init__(self, classifier: BaseEstimator, val_split=5, exact_train_prev=False, recalib='bcts', n_jobs=None):
+        self.classifier = classifier
+        self.val_split = val_split
+        self.exact_train_prev = exact_train_prev
+        self.recalib = recalib
+        self.n_jobs = n_jobs
+
+    def classify(self, instances):
+        """
+        Provides the posterior probabilities for the given instances. If the classifier is
+        recalibrated, then these posteriors will be recalibrated accordingly.
+
+        :param instances: array-like of shape `(n_instances, n_dimensions,)`
+        :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities
+        """
+        posteriors = self.classifier.predict_proba(instances)
+        if hasattr(self, 'calibration_function') and self.calibration_function is not None:
+            posteriors = self.calibration_function(posteriors)
+        return posteriors
+
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        if self.recalib is not None:
+            P, y = classif_predictions.Xy
+            if self.recalib == 'nbvs':
+                calibrator = NoBiasVectorScaling()
+            elif self.recalib == 'bcts':
+                calibrator = TempScaling(bias_positions='all')
+            elif self.recalib == 'ts':
+                calibrator = TempScaling()
+            elif self.recalib == 'vs':
+                calibrator = VectorScaling()
+            else:
+                raise ValueError('invalid param argument for recalibration method; available ones are '
+                                 '"nbvs", "bcts", "ts", and "vs".')
+
+            self.calibration_function = calibrator(P, np.eye(data.n_classes)[y], posterior_supplied=True)
+
+        if self.exact_train_prev:
+            self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
+        else:
+            if self.recalib is not None:
+                train_posteriors = self.classify(data.X)
+            else:
+                train_posteriors = classif_predictions.X
+
+            self.train_prevalence = np.mean(train_posteriors, axis=0)
+
+    def aggregate(self, classif_posteriors, epsilon=EPSILON):
+        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon)
+        return priors
+
+    def predict_proba(self, instances, epsilon=EPSILON):
+        classif_posteriors = self.classify(instances)
+        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon)
+        return posteriors
+
+
+class HDy(AggregativeSoftQuantifier, BinaryQuantifier, CorrectionbasedAggregativeQuantifier):
     """
     `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
     HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
@@ -533,7 +676,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         self.classifier = classifier
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Trains a HDy quantifier.
 
@@ -544,22 +687,23 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
          :class:`quapy.data.base.LabelledCollection` indicating the validation set itself
         :return: self
         """
-        if val_split is None:
-            val_split = self.val_split
 
         self._check_binary(data, self.__class__.__name__)
-        self.classifier, validation = _training_helper(
-            self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split)
-        Px = self.classify(validation.instances)[:, 1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]]
-        self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]]
+        P, y = classif_predictions.Xy
+        Px = P[:, 1]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[y == self.classifier.classes_[1]]
+        self.Pxy0 = Px[y == self.classifier.classes_[0]]
+
         # pre-compute the histogram for positive and negative examples
         self.bins = np.linspace(10, 110, 11, dtype=int)  # [10, 20, 30, ..., 100, 110]
+
         def hist(P, bins):
             h = np.histogram(P, bins=bins, range=(0, 1), density=True)[0]
             return h / h.sum()
+
         self.Pxy1_density = {bins: hist(self.Pxy1, bins) for bins in self.bins}
         self.Pxy0_density = {bins: hist(self.Pxy0, bins) for bins in self.bins}
+
         return self
 
     def aggregate(self, classif_posteriors):
@@ -583,7 +727,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
             # at small steps (modern implementations resort to an optimization procedure,
             # see class DistributionMatching)
             prev_selected, min_dist = None, None
-            for prev in F.prevalence_linspace(n_prevalences=100, repeats=1, smooth_limits_epsilon=0.0):
+            for prev in F.prevalence_linspace(n_prevalences=101, repeats=1, smooth_limits_epsilon=0.0):
                 Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density
                 hdy = F.HellingerDistance(Px_train, Px_test)
                 if prev_selected is None or hdy < min_dist:
@@ -594,7 +738,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier):
+class DyS(AggregativeSoftQuantifier, BinaryQuantifier):
     """
     `DyS framework <https://ojs.aaai.org/index.php/AAAI/article/view/4376>`_ (DyS).
     DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that
@@ -661,7 +805,7 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier):
+class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
     """
     `SMM method <https://ieeexplore.ieee.org/document/9260028>`_ (SMM).
     SMM is a simplification of matching distribution methods where the representation of the examples
@@ -700,7 +844,7 @@ class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class DMy(AggregativeProbabilisticQuantifier):
+class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
     """
     Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
     probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
@@ -736,7 +880,7 @@ class DMy(AggregativeProbabilisticQuantifier):
         from quapy.method.meta import MedianEstimator
 
         hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
-        hdy = MedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
+        hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
         return hdy
 
     def __get_distributions(self, posteriors):
@@ -755,7 +899,7 @@ class DMy(AggregativeProbabilisticQuantifier):
             distributions = np.cumsum(distributions, axis=1)
         return distributions
 
-    def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Trains the classifier (if requested) and generates the validation distributions out of the training data.
         The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
@@ -771,21 +915,13 @@ class DMy(AggregativeProbabilisticQuantifier):
          indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
          to estimate the parameters
         """
-        if val_split is None:
-            val_split = self.val_split
-
-        self.classifier, true_labels, posteriors, classes, class_count = cross_generate_predictions(
-            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
-        )
-
-        return (true_labels, posteriors)
-
-    def aggregation_fit(self, classif_predictions):
-        true_labels, posteriors = classif_predictions
+        posteriors, true_labels = classif_predictions.Xy
         n_classes = len(self.classifier.classes_)
 
-        self.validation_distribution = np.asarray(
-            [self.__get_distributions(posteriors[true_labels == cat]) for cat in range(n_classes)]
+        self.validation_distribution = qp.util.parallel(
+            func=self.__get_distributions,
+            args=[posteriors[true_labels==cat] for cat in range(n_classes)],
+            n_jobs=self.n_jobs
         )
 
     def aggregate(self, posteriors: np.ndarray):
@@ -1252,7 +1388,7 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
         """
 
         classif_predictions = self._parallel(self._delayed_binary_classification, instances)
-        if isinstance(self.binary_quantifier, AggregativeProbabilisticQuantifier):
+        if isinstance(self.binary_quantifier, AggregativeSoftQuantifier):
             return np.swapaxes(classif_predictions, 0, 1)
         else:
             return classif_predictions.T
@@ -1269,6 +1405,130 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
         return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1]
 
 
+class AggregativeMedianEstimator(BinaryQuantifier):
+    """
+    This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the
+    estimation returned by differently (hyper)parameterized base quantifiers.
+    The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,
+    i.e., in cases of binary quantification.
+
+    :param base_quantifier: the base, binary quantifier
+    :param random_state: a seed to be set before fitting any base quantifier (default None)
+    :param param_grid: the grid or parameters towards which the median will be computed
+    :param n_jobs: number of parllel workes
+    """
+    def __init__(self, base_quantifier: AggregativeQuantifier, param_grid: dict, random_state=None, n_jobs=None):
+        self.base_quantifier = base_quantifier
+        self.param_grid = param_grid
+        self.random_state = random_state
+        self.n_jobs = qp._get_njobs(n_jobs)
+
+    def get_params(self, deep=True):
+        return self.base_quantifier.get_params(deep)
+
+    def set_params(self, **params):
+        self.base_quantifier.set_params(**params)
+
+    def _delayed_fit(self, args):
+        with qp.util.temp_seed(self.random_state):
+            params, training = args
+            model = deepcopy(self.base_quantifier)
+            model.set_params(**params)
+            model.fit(training)
+            return model
+
+    def _delayed_fit_classifier(self, args):
+        with qp.util.temp_seed(self.random_state):
+            print('enter job')
+            cls_params, training, kwargs = args
+            model = deepcopy(self.base_quantifier)
+            model.set_params(**cls_params)
+            predictions = model.classifier_fit_predict(training, **kwargs)
+            print('exit job')
+            return (model, predictions)
+
+    def _delayed_fit_aggregation(self, args):
+        with qp.util.temp_seed(self.random_state):
+            print('\tenter job')
+            ((model, predictions), q_params), training = args
+            model = deepcopy(model)
+            print('fitaggr', model, predictions, len(predictions), print(self.training))
+            model.set_params(**q_params)
+            model.aggregation_fit(predictions, training)
+            print('\texit job')
+            return model
+
+
+    def fit(self, training: LabelledCollection, **kwargs):
+        import itertools
+
+        self._check_binary(training, self.__class__.__name__)
+
+        if isinstance(self.base_quantifier, AggregativeQuantifier):
+            cls_configs, q_configs = qp.model_selection.group_params(self.param_grid)
+
+            if len(cls_configs) > 1:
+                models_preds = qp.util.parallel(
+                    self._delayed_fit_classifier,
+                    ((params, training, kwargs) for params in cls_configs),
+                    seed=qp.environ.get('_R_SEED', None),
+                    n_jobs=self.n_jobs,
+                    asarray=False
+                )
+            else:
+                print('only 1')
+                model = self.base_quantifier
+                model.set_params(**cls_configs[0])
+                predictions = model.classifier_fit_predict(training, **kwargs)
+                models_preds = [(model, predictions)]
+
+            self.training = training
+
+            self.models = []
+            print('WITHOUT PARALLEL JOBS')
+            for ((model, predictions), q_params) in itertools.product(models_preds, q_configs):
+                print('\tenter job')
+                model = deepcopy(model)
+                print('fitaggr', model, predictions, len(predictions), print(self.training))
+                model.set_params(**q_params)
+                model.aggregation_fit(predictions, training)
+                self.models.append(model)
+                print('\texit job')
+
+
+            # self.models = qp.util.parallel(
+            #     self._delayed_fit_aggregation,
+            #     ((setup, training) for setup in itertools.product(models_preds, q_configs)),
+            #     seed=qp.environ.get('_R_SEED', None),
+            #     n_jobs=self.n_jobs,
+            #     asarray=False
+            # )
+        else:
+            configs = qp.model_selection.expand_grid(self.param_grid)
+            self.models = qp.util.parallel(
+                self._delayed_fit,
+                ((params, training) for params in configs),
+                seed=qp.environ.get('_R_SEED', None),
+                n_jobs=self.n_jobs,
+                asarray=False
+            )
+        return self
+
+    def _delayed_predict(self, args):
+        model, instances = args
+        return model.quantify(instances)
+
+    def quantify(self, instances):
+        prev_preds = qp.util.parallel(
+            self._delayed_predict,
+            ((model, instances) for model in self.models),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs,
+            asarray=False
+        )
+        prev_preds = np.asarray(prev_preds)
+        return np.median(prev_preds, axis=0)
+
 #---------------------------------------------------------------
 # aliases
 #---------------------------------------------------------------
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index 7f111c0..d29433e 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -12,7 +12,7 @@ from quapy import functional as F
 from quapy.data import LabelledCollection
 from quapy.model_selection import GridSearchQ
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
-from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier
 
 try:
     from . import neural
@@ -26,6 +26,65 @@ else:
     QuaNet = "QuaNet is not available due to missing torch package"
 
 
+class MedianEstimator2(BinaryQuantifier):
+    """
+    This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the
+    estimation returned by differently (hyper)parameterized base quantifiers.
+    The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,
+    i.e., in cases of binary quantification.
+
+    :param base_quantifier: the base, binary quantifier
+    :param random_state: a seed to be set before fitting any base quantifier (default None)
+    :param param_grid: the grid or parameters towards which the median will be computed
+    :param n_jobs: number of parllel workes
+    """
+    def __init__(self, base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None):
+        self.base_quantifier = base_quantifier
+        self.param_grid = param_grid
+        self.random_state = random_state
+        self.n_jobs = qp._get_njobs(n_jobs)
+
+    def get_params(self, deep=True):
+        return self.base_quantifier.get_params(deep)
+
+    def set_params(self, **params):
+        self.base_quantifier.set_params(**params)
+
+    def _delayed_fit(self, args):
+        with qp.util.temp_seed(self.random_state):
+            params, training = args
+            model = deepcopy(self.base_quantifier)
+            model.set_params(**params)
+            model.fit(training)
+            return model
+
+    def fit(self, training: LabelledCollection):
+        self._check_binary(training, self.__class__.__name__)
+
+        configs = qp.model_selection.expand_grid(self.param_grid)
+        self.models = qp.util.parallel(
+            self._delayed_fit,
+            ((params, training) for params in configs),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs
+        )
+        return self
+
+    def _delayed_predict(self, args):
+        model, instances = args
+        return model.quantify(instances)
+
+    def quantify(self, instances):
+        prev_preds = qp.util.parallel(
+            self._delayed_predict,
+            ((model, instances) for model in self.models),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs
+        )
+        prev_preds = np.asarray(prev_preds)
+        return np.median(prev_preds, axis=0)
+
+
 class MedianEstimator(BinaryQuantifier):
     """
     This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the
@@ -58,17 +117,64 @@ class MedianEstimator(BinaryQuantifier):
             model.fit(training)
             return model
 
+    def _delayed_fit_classifier(self, args):
+        with qp.util.temp_seed(self.random_state):
+            print('enter job')
+            cls_params, training = args
+            model = deepcopy(self.base_quantifier)
+            model.set_params(**cls_params)
+            predictions = model.classifier_fit_predict(training, predict_on=model.val_split)
+            print('exit job')
+            return (model, predictions)
+
+    def _delayed_fit_aggregation(self, args):
+        with qp.util.temp_seed(self.random_state):
+            print('\tenter job')
+            ((model, predictions), q_params), training = args
+            model = deepcopy(model)
+            model.set_params(**q_params)
+            model.aggregation_fit(predictions, training)
+            print('\texit job')
+            return model
+
+
     def fit(self, training: LabelledCollection):
         self._check_binary(training, self.__class__.__name__)
-        params_keys = list(self.param_grid.keys())
-        params_values = list(self.param_grid.values())
-        hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)]
-        self.models = qp.util.parallel(
-            self._delayed_fit,
-            ((params, training) for params in hyper),
-            seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs
-        )
+
+        if isinstance(self.base_quantifier, AggregativeQuantifier):
+            cls_configs, q_configs = qp.model_selection.group_params(self.param_grid)
+
+            if len(cls_configs) > 1:
+                models_preds = qp.util.parallel(
+                    self._delayed_fit_classifier,
+                    ((params, training) for params in cls_configs),
+                    seed=qp.environ.get('_R_SEED', None),
+                    n_jobs=self.n_jobs,
+                    asarray=False
+                )
+            else:
+                print('only 1')
+                model = self.base_quantifier
+                model.set_params(**cls_configs[0])
+                predictions = model.classifier_fit_predict(training, predict_on=model.val_split)
+                models_preds = [(model, predictions)]
+
+            self.models = qp.util.parallel(
+                self._delayed_fit_aggregation,
+                ((setup, training) for setup in itertools.product(models_preds, q_configs)),
+                seed=qp.environ.get('_R_SEED', None),
+                n_jobs=self.n_jobs,
+                asarray=False
+            )
+        else:
+            configs = qp.model_selection.expand_grid(self.param_grid)
+            self.models = qp.util.parallel(
+                self._delayed_fit,
+                ((params, training) for params in configs),
+                seed=qp.environ.get('_R_SEED', None),
+                n_jobs=self.n_jobs,
+                asarray=False
+            )
         return self
 
     def _delayed_predict(self, args):
@@ -80,13 +186,13 @@ class MedianEstimator(BinaryQuantifier):
             self._delayed_predict,
             ((model, instances) for model in self.models),
             seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs
+            n_jobs=self.n_jobs,
+            asarray=False
         )
         prev_preds = np.asarray(prev_preds)
         return np.median(prev_preds, axis=0)
 
 
-
 class Ensemble(BaseQuantifier):
     VALID_POLICIES = {'ave', 'ptr', 'ds'} | qp.error.QUANTIFICATION_ERROR_NAMES
 
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index 2478055..330ac60 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -194,7 +194,7 @@ class QuaNetTrainer(BaseQuantifier):
         label_predictions = np.argmax(posteriors, axis=-1)
         prevs_estim = []
         for quantifier in self.quantifiers.values():
-            predictions = posteriors if isinstance(quantifier, AggregativeProbabilisticQuantifier) else label_predictions
+            predictions = posteriors if isinstance(quantifier, AggregativeSoftQuantifier) else label_predictions
             prevs_estim.extend(quantifier.aggregate(predictions))
 
         # there is no real need for adding static estims like the TPR or FPR from training since those are constant
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index f02d9dc..2378777 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -76,8 +76,6 @@ class GridSearchQ(BaseQuantifier):
         :param training: the training set on which to optimize the hyperparameters
         :return: self
         """
-        params_keys = list(self.param_grid.keys())
-        params_values = list(self.param_grid.values())
 
         protocol = self.protocol
 
@@ -86,12 +84,13 @@ class GridSearchQ(BaseQuantifier):
 
         tinit = time()
 
-        hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)]
+        configs = expand_grid(self.param_grid)
+
         self._sout(f'starting model selection with {self.n_jobs =}')
-        #pass a seed to parallel so it is set in clild processes
+        #pass a seed to parallel so it is set in child processes
         scores = qp.util.parallel(
             self._delayed_eval,
-            ((params, training) for params in hyper),
+            ((params, training) for params in configs),
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs
         )
@@ -204,8 +203,6 @@ class GridSearchQ(BaseQuantifier):
         raise ValueError('best_model called before fit')
 
 
-
-
 def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0):
     """
     Akin to `scikit-learn's cross_val_predict <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html>`_
@@ -229,3 +226,43 @@ def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfol
     return total_prev
 
 
+def expand_grid(param_grid: dict):
+    """
+    Expands a param_grid dictionary as a list of configurations.
+    Example:
+
+    >>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]})
+    >>> print(combinations)
+    >>> [{'A': 1, 'B': True}, {'A': 1, 'B': False}, {'A': 10, 'B': True}, {'A': 10, 'B': False}, {'A': 100, 'B': True}, {'A': 100, 'B': False}]
+
+    :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range
+        to explore for that hyper-parameter
+    :return: a list of configurations, i.e., combinations of hyper-parameter assignments in the grid.
+    """
+    params_keys = list(param_grid.keys())
+    params_values = list(param_grid.values())
+    configs = [{k: combs[i] for i, k in enumerate(params_keys)} for combs in itertools.product(*params_values)]
+    return configs
+
+
+def group_params(param_grid: dict):
+    """
+    Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific
+    hyper-parameters, and another for que quantifier-specific hyper-parameters
+
+    :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range
+        to explore for that hyper-parameter
+    :return: two expanded grids of configurations, one for the classifier, another for the quantifier
+    """
+    classifier_params, quantifier_params = {}, {}
+    for key, values in param_grid.items():
+        if key.startswith('classifier__') or key == 'val_split':
+            classifier_params[key] = values
+        else:
+            quantifier_params[key] = values
+
+    classifier_configs = expand_grid(classifier_params)
+    quantifier_configs = expand_grid(quantifier_params)
+
+    return classifier_configs, quantifier_configs
+
diff --git a/quapy/tests/test_hierarchy.py b/quapy/tests/test_hierarchy.py
index 2ea3af5..b0842e5 100644
--- a/quapy/tests/test_hierarchy.py
+++ b/quapy/tests/test_hierarchy.py
@@ -22,9 +22,9 @@ class HierarchyTestCase(unittest.TestCase):
     def test_probabilistic(self):
         lr = LogisticRegression()
         for m in [CC(lr), ACC(lr)]:
-            self.assertEqual(isinstance(m, AggregativeProbabilisticQuantifier), False)
+            self.assertEqual(isinstance(m, AggregativeSoftQuantifier), False)
         for m in [PCC(lr), PACC(lr)]:
-            self.assertEqual(isinstance(m, AggregativeProbabilisticQuantifier), True)
+            self.assertEqual(isinstance(m, AggregativeSoftQuantifier), True)
 
 
 if __name__ == '__main__':
diff --git a/quapy/util.py b/quapy/util.py
index 733fbb8..c1d9f0d 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs):
     return list(itertools.chain.from_iterable(results))
 
 
-def parallel(func, args, n_jobs, seed=None):
+def parallel(func, args, n_jobs, seed=None, asarray=True):
     """
     A wrapper of multiprocessing:
 
@@ -58,9 +58,12 @@ def parallel(func, args, n_jobs, seed=None):
                 stack.enter_context(qp.util.temp_seed(seed))
             return func(*args)
     
-    return Parallel(n_jobs=n_jobs)(
+    out = Parallel(n_jobs=n_jobs)(
         delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
     )
+    if asarray:
+        out = np.asarray(out)
+    return out
 
 
 @contextlib.contextmanager

From 513c78f1f30e97c8ff59c76527ea3bef02f9571a Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 16 Nov 2023 14:29:34 +0100
Subject: [PATCH 08/22] model seletion in two levels, classifier oriented and
 quantifier oriented

---
 examples/model_selection.py |  67 ++++++-----
 quapy/method/aggregative.py | 154 +++++++++++++------------
 quapy/model_selection.py    | 218 +++++++++++++++++++++++++++++-------
 quapy/util.py               |   4 +-
 4 files changed, 296 insertions(+), 147 deletions(-)

diff --git a/examples/model_selection.py b/examples/model_selection.py
index ae7fb6a..df051a0 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -2,7 +2,9 @@ import quapy as qp
 from quapy.protocol import APP
 from quapy.method.aggregative import DMy
 from sklearn.linear_model import LogisticRegression
+from examples.comparing_gridsearch import OLD_GridSearchQ
 import numpy as np
+from time import time
 
 """
 In this example, we show how to perform model selection on a DistributionMatching quantifier.
@@ -15,35 +17,44 @@ qp.environ['N_JOBS'] = -1
 
 training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
 
-# The model will be returned by the fit method of GridSearchQ.
-# Every combination of hyper-parameters will be evaluated by confronting the
-# quantifier thus configured against a series of samples generated by means
-# of a sample generation protocol. For this example, we will use the
-# artificial-prevalence protocol (APP), that generates samples with prevalence
-# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
-# We devote 30% of the dataset for this exploration.
-training, validation = training.split_stratified(train_prop=0.7)
-protocol = APP(validation)
+with qp.util.temp_seed(0):
 
-# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
-# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
-# (e.g., the number of bins in a DistributionMatching quantifier.
-# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
-# in order to let the quantifier know this hyper-parameter belongs to its underlying
-# classifier.
-param_grid = {
-    'classifier__C': np.logspace(-3,3,7),
-    'nbins': [8, 16, 32, 64],
-}
+    # The model will be returned by the fit method of GridSearchQ.
+    # Every combination of hyper-parameters will be evaluated by confronting the
+    # quantifier thus configured against a series of samples generated by means
+    # of a sample generation protocol. For this example, we will use the
+    # artificial-prevalence protocol (APP), that generates samples with prevalence
+    # values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
+    # We devote 30% of the dataset for this exploration.
+    training, validation = training.split_stratified(train_prop=0.7)
+    protocol = APP(validation)
 
-model = qp.model_selection.GridSearchQ(
-    model=model,
-    param_grid=param_grid,
-    protocol=protocol,
-    error='mae',  # the error to optimize is the MAE (a quantification-oriented loss)
-    refit=True,   # retrain on the whole labelled set once done
-    verbose=True  # show information as the process goes on
-).fit(training)
+    # We will explore a classification-dependent hyper-parameter (e.g., the 'C'
+    # hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
+    # (e.g., the number of bins in a DistributionMatching quantifier.
+    # Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
+    # in order to let the quantifier know this hyper-parameter belongs to its underlying
+    # classifier.
+    param_grid = {
+        'classifier__C': np.logspace(-3,3,7),
+        'classifier__class_weight': ['balanced', None],
+        'nbins': [8, 16, 32, 64],
+    }
+
+    tinit = time()
+
+
+    # model = OLD_GridSearchQ(
+    model = qp.model_selection.GridSearchQ(
+        model=model,
+        param_grid=param_grid,
+        protocol=protocol,
+        error='mae',  # the error to optimize is the MAE (a quantification-oriented loss)
+        refit=False,   # retrain on the whole labelled set once done
+        verbose=True  # show information as the process goes on
+    ).fit(training)
+
+tend = time()
 
 print(f'model selection ended: best hyper-parameters={model.best_params_}')
 model = model.best_model_
@@ -53,5 +64,5 @@ model = model.best_model_
 mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
 
 print(f'MAE={mae_score:.5f}')
-
+print(f'model selection took {tend-tinit}s')
 
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index a8a7bb2..f3779be 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -37,7 +37,20 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
     and :meth:`aggregate`.
     """
 
-    def fit(self, data: LabelledCollection, fit_classifier=True):
+    val_split_ = None
+
+    @property
+    def val_split(self):
+        return self.val_split_
+
+    @val_split.setter
+    def val_split(self, val_split):
+        if isinstance(val_split, LabelledCollection):
+            print('warning: setting val_split with a LabelledCollection will be inefficient in'
+                  'model selection. Rather pass the LabelledCollection at fit time')
+        self.val_split_ = val_split
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
         """
         Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
 
@@ -46,7 +59,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
             learner has been trained outside the quantifier.
         :return: self
         """
-        classif_predictions = self.classifier_fit_predict(data, fit_classifier)
+        classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
         self.aggregation_fit(classif_predictions, data)
         return self
 
@@ -69,6 +82,9 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
 
         self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
 
+        if predict_on is None:
+            predict_on = self.val_split
+
         if predict_on is None:
             if fit_classifier:
                 self.classifier.fit(*data.Xy)
@@ -228,7 +244,6 @@ class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
 
         :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
         """
-        print('using predict')
         return 'predict'
 
     def _check_classifier(self, adapt_if_necessary=False):
@@ -264,7 +279,6 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
 
         :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
         """
-        print('using predict_proba')
         return 'predict_proba'
 
     def _check_classifier(self, adapt_if_necessary=False):
@@ -289,35 +303,35 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
 
 
 
-class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
-    """
-    Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
-    at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
-    is not the training set. There are three ways in which this distinction can be made, depending on how
-    the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
-    of training instances that should be devoted to validate, or (ii) an integer indicating the
-    number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
-    use for validation.
-    """
-
-    @property
-    def val_split(self):
-        return self.val_split_
-
-    @val_split.setter
-    def val_split(self, val_split):
-        if isinstance(val_split, LabelledCollection):
-            print('warning: setting val_split with a LabelledCollection will be inefficient in'
-                  'model selection. Rather pass the LabelledCollection at fit time')
-        self.val_split_ = val_split
-
-    def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
-        print('method from CorrectionbasedAggregativeQuantifier')
-        if predict_on is None:
-            predict_on = self.val_split
-        classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
-        self.aggregation_fit(classif_predictions, data)
-        return self
+# class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
+#     """
+#     Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
+#     at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
+#     is not the training set. There are three ways in which this distinction can be made, depending on how
+#     the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
+#     of training instances that should be devoted to validate, or (ii) an integer indicating the
+#     number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
+#     use for validation.
+#     """
+#
+#     @property
+#     def val_split(self):
+#         return self.val_split_
+#
+#     @val_split.setter
+#     def val_split(self, val_split):
+#         if isinstance(val_split, LabelledCollection):
+#             print('warning: setting val_split with a LabelledCollection will be inefficient in'
+#                   'model selection. Rather pass the LabelledCollection at fit time')
+#         self.val_split_ = val_split
+#
+#     def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
+#         print('method from CorrectionbasedAggregativeQuantifier')
+#         if predict_on is None:
+#             predict_on = self.val_split
+#         classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
+#         self.aggregation_fit(classif_predictions, data)
+#         return self
 
 
 
@@ -352,7 +366,7 @@ class CC(AggregativeCrispQuantifier):
         return F.prevalence_from_labels(classif_predictions, self.classes_)
 
 
-class ACC(AggregativeCrispQuantifier, CorrectionbasedAggregativeQuantifier):
+class ACC(AggregativeCrispQuantifier):
     """
     `Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
     the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
@@ -447,7 +461,7 @@ class PCC(AggregativeSoftQuantifier):
         return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
 
 
-class PACC(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
+class PACC(AggregativeSoftQuantifier):
     """
     `Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
     the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
@@ -570,7 +584,7 @@ class EMQ(AggregativeSoftQuantifier):
         return qs, ps
 
 
-class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
+class EMQrecalib(AggregativeSoftQuantifier):
     """
     `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
     aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
@@ -657,7 +671,7 @@ class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier
         return posteriors
 
 
-class HDy(AggregativeSoftQuantifier, BinaryQuantifier, CorrectionbasedAggregativeQuantifier):
+class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
     """
     `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
     HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
@@ -844,7 +858,7 @@ class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
+class DMy(AggregativeSoftQuantifier):
     """
     Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
     probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
@@ -865,7 +879,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
     :param n_jobs: number of parallel workers (default None)
     """
 
-    def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD',
+    def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
                  cdf=False, search='optim_minimize', n_jobs=None):
         self.classifier = classifier
         self.val_split = val_split
@@ -875,15 +889,15 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
         self.search = search
         self.n_jobs = n_jobs
 
-    @classmethod
-    def HDy(cls, classifier, val_split=0.4, n_jobs=None):
-        from quapy.method.meta import MedianEstimator
+    # @classmethod
+    # def HDy(cls, classifier, val_split=0.4, n_jobs=None):
+    #     from quapy.method.meta import MedianEstimator
+    #
+    #     hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
+    #     hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
+    #     return hdy
 
-        hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
-        hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
-        return hdy
-
-    def __get_distributions(self, posteriors):
+    def _get_distributions(self, posteriors):
         histograms = []
         post_dims = posteriors.shape[1]
         if post_dims == 2:
@@ -919,9 +933,10 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
         n_classes = len(self.classifier.classes_)
 
         self.validation_distribution = qp.util.parallel(
-            func=self.__get_distributions,
+            func=self._get_distributions,
             args=[posteriors[true_labels==cat] for cat in range(n_classes)],
-            n_jobs=self.n_jobs
+            n_jobs=self.n_jobs,
+            backend='threading'
         )
 
     def aggregate(self, posteriors: np.ndarray):
@@ -935,7 +950,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
         :param posteriors: posterior probabilities of the instances in the sample
         :return: a vector of class prevalence estimates
         """
-        test_distribution = self.__get_distributions(posteriors)
+        test_distribution = self._get_distributions(posteriors)
         divergence = get_divergence(self.divergence)
         n_classes, n_channels, nbins = self.validation_distribution.shape
         def loss(prev):
@@ -1449,13 +1464,10 @@ class AggregativeMedianEstimator(BinaryQuantifier):
 
     def _delayed_fit_aggregation(self, args):
         with qp.util.temp_seed(self.random_state):
-            print('\tenter job')
             ((model, predictions), q_params), training = args
             model = deepcopy(model)
-            print('fitaggr', model, predictions, len(predictions), print(self.training))
             model.set_params(**q_params)
             model.aggregation_fit(predictions, training)
-            print('\texit job')
             return model
 
 
@@ -1473,7 +1485,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
                     ((params, training, kwargs) for params in cls_configs),
                     seed=qp.environ.get('_R_SEED', None),
                     n_jobs=self.n_jobs,
-                    asarray=False
+                    asarray=False,
+                    backend='threading'
                 )
             else:
                 print('only 1')
@@ -1482,27 +1495,13 @@ class AggregativeMedianEstimator(BinaryQuantifier):
                 predictions = model.classifier_fit_predict(training, **kwargs)
                 models_preds = [(model, predictions)]
 
-            self.training = training
-
-            self.models = []
-            print('WITHOUT PARALLEL JOBS')
-            for ((model, predictions), q_params) in itertools.product(models_preds, q_configs):
-                print('\tenter job')
-                model = deepcopy(model)
-                print('fitaggr', model, predictions, len(predictions), print(self.training))
-                model.set_params(**q_params)
-                model.aggregation_fit(predictions, training)
-                self.models.append(model)
-                print('\texit job')
-
-
-            # self.models = qp.util.parallel(
-            #     self._delayed_fit_aggregation,
-            #     ((setup, training) for setup in itertools.product(models_preds, q_configs)),
-            #     seed=qp.environ.get('_R_SEED', None),
-            #     n_jobs=self.n_jobs,
-            #     asarray=False
-            # )
+            self.models = qp.util.parallel(
+                self._delayed_fit_aggregation,
+                ((setup, training) for setup in itertools.product(models_preds, q_configs)),
+                seed=qp.environ.get('_R_SEED', None),
+                n_jobs=self.n_jobs,
+                backend='threading'
+            )
         else:
             configs = qp.model_selection.expand_grid(self.param_grid)
             self.models = qp.util.parallel(
@@ -1510,7 +1509,7 @@ class AggregativeMedianEstimator(BinaryQuantifier):
                 ((params, training) for params in configs),
                 seed=qp.environ.get('_R_SEED', None),
                 n_jobs=self.n_jobs,
-                asarray=False
+                backend='threading'
             )
         return self
 
@@ -1524,9 +1523,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
             ((model, instances) for model in self.models),
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs,
-            asarray=False
+            backend='threading'
         )
-        prev_preds = np.asarray(prev_preds)
         return np.median(prev_preds, axis=0)
 
 #---------------------------------------------------------------
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 2378777..5448d4d 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -1,6 +1,7 @@
 import itertools
 import signal
 from copy import deepcopy
+from enum import Enum
 from typing import Union, Callable
 
 import numpy as np
@@ -10,10 +11,16 @@ import quapy as qp
 from quapy import evaluation
 from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
 from quapy.data.base import LabelledCollection
-from quapy.method.aggregative import BaseQuantifier
+from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
 from time import time
 
 
+class Status(Enum):
+    SUCCESS = 1
+    TIMEOUT = 2
+    INVALID = 3
+    ERROR = 4
+
 class GridSearchQ(BaseQuantifier):
     """Grid Search optimization targeting a quantification-oriented metric.
 
@@ -69,21 +76,7 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    def fit(self, training: LabelledCollection):
-        """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
-            the error metric.
-
-        :param training: the training set on which to optimize the hyperparameters
-        :return: self
-        """
-
-        protocol = self.protocol
-
-        self.param_scores_ = {}
-        self.best_score_ = None
-
-        tinit = time()
-
+    def _fit_nonaggregative(self, training):
         configs = expand_grid(self.param_grid)
 
         self._sout(f'starting model selection with {self.n_jobs =}')
@@ -94,34 +87,106 @@ class GridSearchQ(BaseQuantifier):
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs
         )
+        return scores
 
-        for params, score, model in scores:
-            if score is not None:
-                if self.best_score_ is None or score < self.best_score_:
-                    self.best_score_ = score
-                    self.best_params_ = params
-                    self.best_model_ = model
-                self.param_scores_[str(params)] = score
-            else:
-                self.param_scores_[str(params)] = 'timeout'
+    def _delayed_fit_classifier(self, args):
+        cls_params, training = args
+        model = deepcopy(self.model)
+        model.set_params(**cls_params)
+        predictions = model.classifier_fit_predict(training)
+        return (model, predictions, cls_params)
 
-        tend = time()-tinit
+    def _eval_aggregative(self, args):
+        ((model, predictions, cls_params), q_params), training = args
+        model = deepcopy(model)
+        # overrides default parameters with the parameters being explored at this iteration
+        model.set_params(**q_params)
+        model.aggregation_fit(predictions, training)
+        params = {**cls_params, **q_params}
+        return model, params
 
-        if self.best_score_ is None:
-            raise TimeoutError('no combination of hyperparameters seem to work')
+    def _delayed_evaluation__(self, args):
 
-        self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
-                   f'[took {tend:.4f}s]')
+        exit_status = Status.SUCCESS
 
-        if self.refit:
-            if isinstance(protocol, OnLabelledCollectionProtocol):
-                self._sout(f'refitting on the whole development set')
-                self.best_model_.fit(training + protocol.get_labelled_collection())
-            else:
-                raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
-                                     f'implement the {OnLabelledCollectionProtocol.__name__} interface')
+        tinit = time()
+        if self.timeout > 0:
+            def handler(signum, frame):
+                raise TimeoutError()
 
-        return self
+            signal.signal(signal.SIGALRM, handler)
+            signal.alarm(self.timeout)
+
+        try:
+            model, params = self._eval_aggregative(args)
+
+            score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+
+            ttime = time() - tinit
+            self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
+
+            if self.timeout > 0:
+                signal.alarm(0)
+
+        except TimeoutError:
+            self._sout(f'timeout ({self.timeout}s) reached for config {params}')
+            score = None
+            exit_status = Status.TIMEOUT
+
+        except ValueError as e:
+            self._sout(f'the combination of hyperparameters {params} is invalid')
+            score = None
+            exit_status = Status.INVALID
+
+        except Exception as e:
+            self._sout(f'something went wrong for config {params}; skipping:')
+            self._sout(f'\tException: {e}')
+            score = None
+            exit_status = Status.ERROR
+
+
+        return params, score, model, exit_status
+
+    # def _delayed_fit_aggregation_and_eval(self, args):
+    #
+    #     ((model, predictions, cls_params), q_params), training = args
+    #     exit_status = Status.SUCCESS
+    #
+    #     tinit = time()
+    #     if self.timeout > 0:
+    #         def handler(signum, frame):
+    #             raise TimeoutError()
+    #         signal.signal(signal.SIGALRM, handler)
+    #         signal.alarm(self.timeout)
+    #
+    #     try:
+    #         model = deepcopy(model)
+    #         # overrides default parameters with the parameters being explored at this iteration
+    #         model.set_params(**q_params)
+    #         model.aggregation_fit(predictions, training)
+    #         score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+    #
+    #         ttime = time() - tinit
+    #         self._sout(f'hyperparams=[cls:{cls_params}, q:{q_params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
+    #
+    #         if self.timeout > 0:
+    #             signal.alarm(0)
+    #     except TimeoutError:
+    #         self._sout(f'timeout ({self.timeout}s) reached for config {q_params}')
+    #         score = None
+    #         exit_status = Status.TIMEOUT
+    #     except ValueError as e:
+    #         self._sout(f'the combination of hyperparameters {q_params} is invalid')
+    #         score = None
+    #         exit_status = Status.INVALID
+    #     except Exception as e:
+    #         self._sout(f'something went wrong for config {q_params}; skipping:')
+    #         self._sout(f'\tException: {e}')
+    #         score = None
+    #         exit_status = Status.ERROR
+    #
+    #     params = {**cls_params, **q_params}
+    #     return params, score, model, exit_status
 
     def _delayed_eval(self, args):
         params, training = args
@@ -163,8 +228,83 @@ class GridSearchQ(BaseQuantifier):
             self._sout(f'\tException: {e}')
             score = None
 
-        return params, score, model
+        return params, score, model, status
 
+    def _fit_aggregative(self, training):
+
+        # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
+        cls_configs, q_configs = group_params(self.param_grid)
+
+        # train all classifiers and get the predictions
+        models_preds_clsconfigs = qp.util.parallel(
+            self._delayed_fit_classifier,
+            ((params, training) for params in cls_configs),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs,
+            asarray=False,
+        )
+
+        # explore the quantifier-specific hyperparameters for each training configuration
+        scores = qp.util.parallel(
+            self._delayed_fit_aggregation_and_eval,
+            ((setup, training) for setup in itertools.product(models_preds_clsconfigs, q_configs)),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs
+        )
+
+        return scores
+
+
+    def fit(self, training: LabelledCollection):
+        """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
+            the error metric.
+
+        :param training: the training set on which to optimize the hyperparameters
+        :return: self
+        """
+
+        if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol):
+                raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
+                                     f'implement the {OnLabelledCollectionProtocol.__name__} interface')
+
+        tinit = time()
+
+        if isinstance(self.model, AggregativeQuantifier):
+            self.results = self._fit_aggregative(training)
+        else:
+            self.results = self._fit_nonaggregative(training)
+
+        self.param_scores_ = {}
+        self.best_score_ = None
+        for params, score, model in self.results:
+            if score is not None:
+                if self.best_score_ is None or score < self.best_score_:
+                    self.best_score_ = score
+                    self.best_params_ = params
+                    self.best_model_ = model
+                self.param_scores_[str(params)] = score
+            else:
+                self.param_scores_[str(params)] = 'timeout'
+
+        tend = time()-tinit
+
+        if self.best_score_ is None:
+            raise TimeoutError('no combination of hyperparameters seem to work')
+
+        self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
+                   f'[took {tend:.4f}s]')
+
+        if self.refit:
+            if isinstance(self.protocol, OnLabelledCollectionProtocol):
+                tinit = time()
+                self._sout(f'refitting on the whole development set')
+                self.best_model_.fit(training + self.protocol.get_labelled_collection())
+                tend = time() - tinit
+                self.refit_time_ = tend
+            else:
+                raise RuntimeWarning(f'the model cannot be refit on the whole dataset')
+
+        return self
 
     def quantify(self, instances):
         """Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
diff --git a/quapy/util.py b/quapy/util.py
index c1d9f0d..51c2a41 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs):
     return list(itertools.chain.from_iterable(results))
 
 
-def parallel(func, args, n_jobs, seed=None, asarray=True):
+def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
     """
     A wrapper of multiprocessing:
 
@@ -58,7 +58,7 @@ def parallel(func, args, n_jobs, seed=None, asarray=True):
                 stack.enter_context(qp.util.temp_seed(seed))
             return func(*args)
     
-    out = Parallel(n_jobs=n_jobs)(
+    out = Parallel(n_jobs=n_jobs, backend=backend)(
         delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
     )
     if asarray:

From f785a4eeef1b34103cdb9b401f86b894ecc5ff54 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 16 Nov 2023 19:56:30 +0100
Subject: [PATCH 09/22] model selection with error handling

---
 examples/model_selection.py |   3 +-
 quapy/model_selection.py    | 322 ++++++++++++++++--------------------
 2 files changed, 146 insertions(+), 179 deletions(-)

diff --git a/examples/model_selection.py b/examples/model_selection.py
index df051a0..141cf91 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -1,4 +1,5 @@
 import quapy as qp
+from quapy.method.non_aggregative import DMx
 from quapy.protocol import APP
 from quapy.method.aggregative import DMy
 from sklearn.linear_model import LogisticRegression
@@ -38,7 +39,7 @@ with qp.util.temp_seed(0):
     param_grid = {
         'classifier__C': np.logspace(-3,3,7),
         'classifier__class_weight': ['balanced', None],
-        'nbins': [8, 16, 32, 64],
+        'nbins': [8, 16, 32, 64, 'poooo'],
     }
 
     tinit = time()
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 5448d4d..9bd0985 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -3,6 +3,7 @@ import signal
 from copy import deepcopy
 from enum import Enum
 from typing import Union, Callable
+from functools import wraps
 
 import numpy as np
 from sklearn import clone
@@ -21,6 +22,56 @@ class Status(Enum):
     INVALID = 3
     ERROR = 4
 
+def check_status(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        obj = args[0]
+        tinit = time()
+
+        job_descriptor = dict(args[1])
+        params = {**job_descriptor.get('cls-params', {}), **job_descriptor.get('q-params', {})}
+
+        if obj.timeout > 0:
+            def handler(signum, frame):
+                raise TimeoutError()
+
+            signal.signal(signal.SIGALRM, handler)
+            signal.alarm(obj.timeout)
+
+        try:
+            job_descriptor = func(*args, **kwargs)
+
+            ttime = time() - tinit
+
+            score = job_descriptor.get('score', None)
+            if score is not None:
+                obj._sout(f'hyperparams=[{params}]\t got {obj.error.__name__} = {score:.5f} [took {ttime:.4f}s]')
+
+            if obj.timeout > 0:
+                signal.alarm(0)
+
+            exit_status = Status.SUCCESS
+
+        except TimeoutError:
+            obj._sout(f'timeout ({obj.timeout}s) reached for config {params}')
+            exit_status = Status.TIMEOUT
+
+        except ValueError as e:
+            obj._sout(f'the combination of hyperparameters {params} is invalid')
+            obj._sout(f'\tException: {e}')
+            exit_status = Status.INVALID
+
+        except Exception as e:
+            obj._sout(f'something went wrong for config {params}; skipping:')
+            obj._sout(f'\tException: {e}')
+            exit_status = Status.ERROR
+
+        job_descriptor['status'] = exit_status
+        job_descriptor['params'] = params
+        return job_descriptor
+    return wrapper
+
+
 class GridSearchQ(BaseQuantifier):
     """Grid Search optimization targeting a quantification-oriented metric.
 
@@ -76,184 +127,97 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    def _fit_nonaggregative(self, training):
+    @check_status
+    def _prepare_classifier(self, args):
+        cls_params = args['cls-params']
+        training = args['training']
+        model = deepcopy(self.model)
+        model.set_params(**cls_params)
+        predictions = model.classifier_fit_predict(training)
+        return {'model': model, 'predictions': predictions, 'cls-params': cls_params}
+
+    @check_status
+    def _prepare_aggregation(self, args):
+        # (partial_setup, q_params), training = args
+        model = args['model']
+        predictions = args['predictions']
+        cls_params = args['cls-params']
+        q_params = args['q-params']
+        training = args['training']
+
+        params = {**cls_params, **q_params}
+
+        model = deepcopy(model)
+        # overrides default parameters with the parameters being explored at this iteration
+        model.set_params(**q_params)
+        model.aggregation_fit(predictions, training)
+        score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+
+        return {'model': model, 'cls-params':cls_params, 'q-params': q_params, 'params': params, 'score': score}
+
+    @check_status
+    def _prepare_model(self, args):
+        params, training = args
+        model = deepcopy(self.model)
+        # overrides default parameters with the parameters being explored at this iteration
+        model.set_params(**params)
+        model.fit(training)
+        score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+        return {'model': model, 'params': params, 'score': score}
+
+
+    def _compute_scores_aggregative(self, training):
+
+        # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
+        cls_configs, q_configs = group_params(self.param_grid)
+
+        # train all classifiers and get the predictions
+        partial_setups = qp.util.parallel(
+            self._prepare_classifier,
+            ({'cls-params':params, 'training':training} for params in cls_configs),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs,
+            asarray=False,
+        )
+
+        # filter out classifier configurations that yield any error
+        for setup in partial_setups:
+            if setup['status'] != Status.SUCCESS:
+                self._sout(f'-> classifier hyperparemters {setup["params"]} caused '
+                           f'error {setup["status"]} and will be ignored')
+
+        partial_setups = [setup for setup in partial_setups if setup['status']==Status.SUCCESS]
+
+        if len(partial_setups) == 0:
+            raise ValueError('No valid configuration found for the classifier.')
+
+        # explore the quantifier-specific hyperparameters for each training configuration
+        scores = qp.util.parallel(
+            self._prepare_aggregation,
+            ({'q-params': setup[1], 'training': training, **setup[0]} for setup in itertools.product(partial_setups, q_configs)),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs
+        )
+
+        return scores
+
+    def _compute_scores_nonaggregative(self, training):
         configs = expand_grid(self.param_grid)
 
-        self._sout(f'starting model selection with {self.n_jobs =}')
-        #pass a seed to parallel so it is set in child processes
+        # pass a seed to parallel, so it is set in child processes
         scores = qp.util.parallel(
-            self._delayed_eval,
+            self._prepare_model,
             ((params, training) for params in configs),
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs
         )
         return scores
 
-    def _delayed_fit_classifier(self, args):
-        cls_params, training = args
-        model = deepcopy(self.model)
-        model.set_params(**cls_params)
-        predictions = model.classifier_fit_predict(training)
-        return (model, predictions, cls_params)
-
-    def _eval_aggregative(self, args):
-        ((model, predictions, cls_params), q_params), training = args
-        model = deepcopy(model)
-        # overrides default parameters with the parameters being explored at this iteration
-        model.set_params(**q_params)
-        model.aggregation_fit(predictions, training)
-        params = {**cls_params, **q_params}
-        return model, params
-
-    def _delayed_evaluation__(self, args):
-
-        exit_status = Status.SUCCESS
-
-        tinit = time()
-        if self.timeout > 0:
-            def handler(signum, frame):
-                raise TimeoutError()
-
-            signal.signal(signal.SIGALRM, handler)
-            signal.alarm(self.timeout)
-
-        try:
-            model, params = self._eval_aggregative(args)
-
-            score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
-
-            ttime = time() - tinit
-            self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
-
-            if self.timeout > 0:
-                signal.alarm(0)
-
-        except TimeoutError:
-            self._sout(f'timeout ({self.timeout}s) reached for config {params}')
-            score = None
-            exit_status = Status.TIMEOUT
-
-        except ValueError as e:
-            self._sout(f'the combination of hyperparameters {params} is invalid')
-            score = None
-            exit_status = Status.INVALID
-
-        except Exception as e:
-            self._sout(f'something went wrong for config {params}; skipping:')
-            self._sout(f'\tException: {e}')
-            score = None
-            exit_status = Status.ERROR
-
-
-        return params, score, model, exit_status
-
-    # def _delayed_fit_aggregation_and_eval(self, args):
-    #
-    #     ((model, predictions, cls_params), q_params), training = args
-    #     exit_status = Status.SUCCESS
-    #
-    #     tinit = time()
-    #     if self.timeout > 0:
-    #         def handler(signum, frame):
-    #             raise TimeoutError()
-    #         signal.signal(signal.SIGALRM, handler)
-    #         signal.alarm(self.timeout)
-    #
-    #     try:
-    #         model = deepcopy(model)
-    #         # overrides default parameters with the parameters being explored at this iteration
-    #         model.set_params(**q_params)
-    #         model.aggregation_fit(predictions, training)
-    #         score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
-    #
-    #         ttime = time() - tinit
-    #         self._sout(f'hyperparams=[cls:{cls_params}, q:{q_params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
-    #
-    #         if self.timeout > 0:
-    #             signal.alarm(0)
-    #     except TimeoutError:
-    #         self._sout(f'timeout ({self.timeout}s) reached for config {q_params}')
-    #         score = None
-    #         exit_status = Status.TIMEOUT
-    #     except ValueError as e:
-    #         self._sout(f'the combination of hyperparameters {q_params} is invalid')
-    #         score = None
-    #         exit_status = Status.INVALID
-    #     except Exception as e:
-    #         self._sout(f'something went wrong for config {q_params}; skipping:')
-    #         self._sout(f'\tException: {e}')
-    #         score = None
-    #         exit_status = Status.ERROR
-    #
-    #     params = {**cls_params, **q_params}
-    #     return params, score, model, exit_status
-
-    def _delayed_eval(self, args):
-        params, training = args
-
-        protocol = self.protocol
-        error = self.error
-
-        if self.timeout > 0:
-            def handler(signum, frame):
-                raise TimeoutError()
-
-            signal.signal(signal.SIGALRM, handler)
-
-        tinit = time()
-
-        if self.timeout > 0:
-            signal.alarm(self.timeout)
-
-        try:
-            model = deepcopy(self.model)
-            # overrides default parameters with the parameters being explored at this iteration
-            model.set_params(**params)
-            model.fit(training)
-            score = evaluation.evaluate(model, protocol=protocol, error_metric=error)
-
-            ttime = time()-tinit
-            self._sout(f'hyperparams={params}\t got {error.__name__} score {score:.5f} [took {ttime:.4f}s]')
-
-            if self.timeout > 0:
-                signal.alarm(0)
-        except TimeoutError:
-            self._sout(f'timeout ({self.timeout}s) reached for config {params}')
-            score = None
-        except ValueError as e:
-            self._sout(f'the combination of hyperparameters {params} is invalid')
-            raise e
-        except Exception as e:
-            self._sout(f'something went wrong for config {params}; skipping:')
-            self._sout(f'\tException: {e}')
-            score = None
-
-        return params, score, model, status
-
-    def _fit_aggregative(self, training):
-
-        # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
-        cls_configs, q_configs = group_params(self.param_grid)
-
-        # train all classifiers and get the predictions
-        models_preds_clsconfigs = qp.util.parallel(
-            self._delayed_fit_classifier,
-            ((params, training) for params in cls_configs),
-            seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs,
-            asarray=False,
-        )
-
-        # explore the quantifier-specific hyperparameters for each training configuration
-        scores = qp.util.parallel(
-            self._delayed_fit_aggregation_and_eval,
-            ((setup, training) for setup in itertools.product(models_preds_clsconfigs, q_configs)),
-            seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs
-        )
-
-        return scores
-
+    def _compute_scores(self, training):
+        if isinstance(self.model, AggregativeQuantifier):
+            return self._compute_scores_aggregative(training)
+        else:
+            return self._compute_scores_nonaggregative(training)
 
     def fit(self, training: LabelledCollection):
         """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
@@ -264,27 +228,29 @@ class GridSearchQ(BaseQuantifier):
         """
 
         if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol):
-                raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
-                                     f'implement the {OnLabelledCollectionProtocol.__name__} interface')
+            raise RuntimeWarning(
+                f'"refit" was requested, but the protocol does not implement '
+                f'the {OnLabelledCollectionProtocol.__name__} interface'
+            )
 
         tinit = time()
 
-        if isinstance(self.model, AggregativeQuantifier):
-            self.results = self._fit_aggregative(training)
-        else:
-            self.results = self._fit_nonaggregative(training)
+        self._sout(f'starting model selection with n_jobs={self.n_jobs}')
+        results = self._compute_scores(training)
 
         self.param_scores_ = {}
         self.best_score_ = None
-        for params, score, model in self.results:
+        for job_result in results:
+            score = job_result.get('score', None)
+            params = job_result['params']
             if score is not None:
                 if self.best_score_ is None or score < self.best_score_:
                     self.best_score_ = score
                     self.best_params_ = params
-                    self.best_model_ = model
+                    self.best_model_ = job_result['model']
                 self.param_scores_[str(params)] = score
             else:
-                self.param_scores_[str(params)] = 'timeout'
+                self.param_scores_[str(params)] = job_result['status']
 
         tend = time()-tinit
 

From 6663b4c91d01c0edb5735da637cb2416433d031c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 20 Nov 2023 22:05:26 +0100
Subject: [PATCH 10/22] context timeout

---
 quapy/model_selection.py | 54 ++++++++++++++++++++++++++++++++--------
 quapy/util.py            | 34 +++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 9bd0985..6637d62 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -13,6 +13,7 @@ from quapy import evaluation
 from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
 from quapy.data.base import LabelledCollection
 from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
+from quapy.util import timeout
 from time import time
 
 
@@ -127,7 +128,6 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    @check_status
     def _prepare_classifier(self, args):
         cls_params = args['cls-params']
         training = args['training']
@@ -136,9 +136,8 @@ class GridSearchQ(BaseQuantifier):
         predictions = model.classifier_fit_predict(training)
         return {'model': model, 'predictions': predictions, 'cls-params': cls_params}
 
-    @check_status
     def _prepare_aggregation(self, args):
-        # (partial_setup, q_params), training = args
+
         model = args['model']
         predictions = args['predictions']
         cls_params = args['cls-params']
@@ -147,15 +146,32 @@ class GridSearchQ(BaseQuantifier):
 
         params = {**cls_params, **q_params}
 
-        model = deepcopy(model)
-        # overrides default parameters with the parameters being explored at this iteration
-        model.set_params(**q_params)
-        model.aggregation_fit(predictions, training)
-        score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+        def job(model):
+            tinit = time()
+            model = deepcopy(model)
+            # overrides default parameters with the parameters being explored at this iteration
+            model.set_params(**q_params)
+            model.aggregation_fit(predictions, training)
+            score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+            ttime = time()-tinit
+
+            return {
+                'model': model,
+                'cls-params':cls_params,
+                'q-params': q_params,
+                'params': params,
+                'score': score,
+                'ttime':ttime
+            }
+
+        out, status = self._error_handler(job, args)
+        if status == Status.SUCCESS:
+            self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} = {out["score"]:.5f} [took {out["time"]:.4f}s]')
+        elif status == Status.INVALID:
+            self._sout(f'the combination of hyperparameters {params} is invalid')
+        elif status == Status.
 
-        return {'model': model, 'cls-params':cls_params, 'q-params': q_params, 'params': params, 'score': score}
 
-    @check_status
     def _prepare_model(self, args):
         params, training = args
         model = deepcopy(self.model)
@@ -309,6 +325,24 @@ class GridSearchQ(BaseQuantifier):
         raise ValueError('best_model called before fit')
 
 
+    def _error_handler(self, func, *args, **kwargs):
+
+        try:
+            with timeout(self.timeout):
+                output = func(*args, **kwargs)
+                return output, Status.SUCCESS
+
+        except TimeoutError:
+            return None, Status.TIMEOUT
+
+        except ValueError:
+            return None, Status.INVALID
+
+        except Exception:
+            return None, Status.ERROR
+
+
+
 def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0):
     """
     Akin to `scikit-learn's cross_val_predict <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html>`_
diff --git a/quapy/util.py b/quapy/util.py
index 51c2a41..de5c131 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -10,6 +10,8 @@ import quapy as qp
 
 import numpy as np
 from joblib import Parallel, delayed
+from time import time
+import signal
 
 
 def _get_parallel_slices(n_tasks, n_jobs):
@@ -257,3 +259,35 @@ class EarlyStop:
             if self.patience <= 0:
                 self.STOP = True
 
+
+@contextlib.contextmanager
+def timeout(seconds):
+    """
+    Opens a context that will launch an exception if not closed after a given number of seconds
+
+    >>> def func(start_msg, end_msg):
+    >>>     print(start_msg)
+    >>>     sleep(2)
+    >>>     print(end_msg)
+    >>>
+    >>> with timeout(1):
+    >>>     func('begin function', 'end function')
+    >>> Out[]
+    >>> begin function
+    >>> TimeoutError
+
+
+    :param seconds: number of seconds, set to <=0 to ignore the timer
+    """
+    if seconds > 0:
+        def handler(signum, frame):
+            raise TimeoutError()
+
+        signal.signal(signal.SIGALRM, handler)
+        signal.alarm(seconds)
+
+    yield
+
+    if seconds > 0:
+        signal.alarm(0)
+

From eb9a3dde2a77f280eb95a1c07c9503660971cb64 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 21 Nov 2023 18:59:36 +0100
Subject: [PATCH 11/22] grid search almost complete

---
 examples/model_selection.py |  12 +-
 quapy/model_selection.py    | 260 +++++++++++++++++-------------------
 2 files changed, 131 insertions(+), 141 deletions(-)

diff --git a/examples/model_selection.py b/examples/model_selection.py
index 141cf91..559de27 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -3,7 +3,7 @@ from quapy.method.non_aggregative import DMx
 from quapy.protocol import APP
 from quapy.method.aggregative import DMy
 from sklearn.linear_model import LogisticRegression
-from examples.comparing_gridsearch import OLD_GridSearchQ
+#from examples.comparing_gridsearch import OLD_GridSearchQ
 import numpy as np
 from time import time
 
@@ -37,14 +37,13 @@ with qp.util.temp_seed(0):
     # in order to let the quantifier know this hyper-parameter belongs to its underlying
     # classifier.
     param_grid = {
-        'classifier__C': np.logspace(-3,3,7),
-        'classifier__class_weight': ['balanced', None],
-        'nbins': [8, 16, 32, 64, 'poooo'],
+        'classifier__C': np.logspace(-2, 2, 5),
+        'classifier__class_weight': ['balanced', None, 'ch'],
+        'nbins': [8, 16, 32, 64, 'po'],
     }
 
     tinit = time()
 
-
     # model = OLD_GridSearchQ(
     model = qp.model_selection.GridSearchQ(
         model=model,
@@ -52,6 +51,7 @@ with qp.util.temp_seed(0):
         protocol=protocol,
         error='mae',  # the error to optimize is the MAE (a quantification-oriented loss)
         refit=False,   # retrain on the whole labelled set once done
+        raise_errors=False,
         verbose=True  # show information as the process goes on
     ).fit(training)
 
@@ -65,5 +65,5 @@ model = model.best_model_
 mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
 
 print(f'MAE={mae_score:.5f}')
-print(f'model selection took {tend-tinit}s')
+print(f'model selection took {tend-tinit:.1f}s')
 
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 6637d62..9017b99 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -23,54 +23,24 @@ class Status(Enum):
     INVALID = 3
     ERROR = 4
 
-def check_status(func):
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        obj = args[0]
-        tinit = time()
 
-        job_descriptor = dict(args[1])
-        params = {**job_descriptor.get('cls-params', {}), **job_descriptor.get('q-params', {})}
+class ConfigStatus:
+    def __init__(self, params, status, msg=''):
+        self.params = params
+        self.status = status
+        self.msg = msg
 
-        if obj.timeout > 0:
-            def handler(signum, frame):
-                raise TimeoutError()
+    def __str__(self):
+        return f':params:{self.params} :status:{self.status} ' + self.msg
 
-            signal.signal(signal.SIGALRM, handler)
-            signal.alarm(obj.timeout)
+    def __repr__(self):
+        return str(self)
 
-        try:
-            job_descriptor = func(*args, **kwargs)
+    def success(self):
+        return self.status == Status.SUCCESS
 
-            ttime = time() - tinit
-
-            score = job_descriptor.get('score', None)
-            if score is not None:
-                obj._sout(f'hyperparams=[{params}]\t got {obj.error.__name__} = {score:.5f} [took {ttime:.4f}s]')
-
-            if obj.timeout > 0:
-                signal.alarm(0)
-
-            exit_status = Status.SUCCESS
-
-        except TimeoutError:
-            obj._sout(f'timeout ({obj.timeout}s) reached for config {params}')
-            exit_status = Status.TIMEOUT
-
-        except ValueError as e:
-            obj._sout(f'the combination of hyperparameters {params} is invalid')
-            obj._sout(f'\tException: {e}')
-            exit_status = Status.INVALID
-
-        except Exception as e:
-            obj._sout(f'something went wrong for config {params}; skipping:')
-            obj._sout(f'\tException: {e}')
-            exit_status = Status.ERROR
-
-        job_descriptor['status'] = exit_status
-        job_descriptor['params'] = params
-        return job_descriptor
-    return wrapper
+    def failed(self):
+        return self.status != Status.SUCCESS
 
 
 class GridSearchQ(BaseQuantifier):
@@ -85,11 +55,14 @@ class GridSearchQ(BaseQuantifier):
     :param protocol: a sample generation protocol, an instance of :class:`quapy.protocol.AbstractProtocol`
     :param error: an error function (callable) or a string indicating the name of an error function (valid ones
         are those in :class:`quapy.error.QUANTIFICATION_ERROR`
-    :param refit: whether or not to refit the model on the whole labelled collection (training+validation) with
+    :param refit: whether to refit the model on the whole labelled collection (training+validation) with
         the best chosen hyperparameter combination. Ignored if protocol='gen'
     :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.
         Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up
         being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.
+    :param raise_errors: boolean, if True then raises an exception when a param combination yields any error, if
+        otherwise is False (default), then the combination is marked with an error status, but the process goes on.
+        However, if no configuration yields a valid model, then a ValueError exception will be raised.
     :param verbose: set to True to get information through the stdout
     """
 
@@ -101,6 +74,7 @@ class GridSearchQ(BaseQuantifier):
                  refit=True,
                  timeout=-1,
                  n_jobs=None,
+                 raise_errors=False,
                  verbose=False):
 
         self.model = model
@@ -109,6 +83,7 @@ class GridSearchQ(BaseQuantifier):
         self.refit = refit
         self.timeout = timeout
         self.n_jobs = qp._get_njobs(n_jobs)
+        self.raise_errors = raise_errors
         self.verbose = verbose
         self.__check_error(error)
         assert isinstance(protocol, AbstractProtocol), 'unknown protocol'
@@ -128,112 +103,97 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    def _prepare_classifier(self, args):
-        cls_params = args['cls-params']
-        training = args['training']
+    def _prepare_classifier(self, cls_params):
         model = deepcopy(self.model)
-        model.set_params(**cls_params)
-        predictions = model.classifier_fit_predict(training)
-        return {'model': model, 'predictions': predictions, 'cls-params': cls_params}
+
+        def job(cls_params):
+            model.set_params(**cls_params)
+            predictions = model.classifier_fit_predict(self._training)
+            return predictions
+
+        predictions, status, took = self._error_handler(job, cls_params)
+        self._sout(f'[classifier fit] hyperparams={cls_params} status={status} [took {took:.3f}s]')
+        return model, predictions, status, took
 
     def _prepare_aggregation(self, args):
-
-        model = args['model']
-        predictions = args['predictions']
-        cls_params = args['cls-params']
-        q_params = args['q-params']
-        training = args['training']
-
+        model, predictions, cls_took, cls_params, q_params = args
+        model = deepcopy(model)
         params = {**cls_params, **q_params}
 
-        def job(model):
-            tinit = time()
-            model = deepcopy(model)
-            # overrides default parameters with the parameters being explored at this iteration
+        def job(q_params):
             model.set_params(**q_params)
-            model.aggregation_fit(predictions, training)
+            model.aggregation_fit(predictions, self._training)
             score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
-            ttime = time()-tinit
+            return score
 
-            return {
-                'model': model,
-                'cls-params':cls_params,
-                'q-params': q_params,
-                'params': params,
-                'score': score,
-                'ttime':ttime
-            }
+        score, status, aggr_took = self._error_handler(job, q_params)
+        self._print_status(params, score, status, aggr_took)
+        return model, params, score, status, (cls_took+aggr_took)
 
-        out, status = self._error_handler(job, args)
-        if status == Status.SUCCESS:
-            self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} = {out["score"]:.5f} [took {out["time"]:.4f}s]')
-        elif status == Status.INVALID:
-            self._sout(f'the combination of hyperparameters {params} is invalid')
-        elif status == Status.
-
-
-    def _prepare_model(self, args):
-        params, training = args
+    def _prepare_nonaggr_model(self, params):
         model = deepcopy(self.model)
-        # overrides default parameters with the parameters being explored at this iteration
-        model.set_params(**params)
-        model.fit(training)
-        score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
-        return {'model': model, 'params': params, 'score': score}
 
+        def job(params):
+            model.set_params(**params)
+            model.fit(self._training)
+            score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
+            return score
+
+        score, status, took = self._error_handler(job, params)
+        self._print_status(params, score, status, took)
+        return model, params, score, status, took
 
     def _compute_scores_aggregative(self, training):
-
         # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
         cls_configs, q_configs = group_params(self.param_grid)
 
         # train all classifiers and get the predictions
-        partial_setups = qp.util.parallel(
+        self._training = training
+        cls_outs = qp.util.parallel(
             self._prepare_classifier,
-            ({'cls-params':params, 'training':training} for params in cls_configs),
-            seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs,
-            asarray=False,
-        )
-
-        # filter out classifier configurations that yield any error
-        for setup in partial_setups:
-            if setup['status'] != Status.SUCCESS:
-                self._sout(f'-> classifier hyperparemters {setup["params"]} caused '
-                           f'error {setup["status"]} and will be ignored')
-
-        partial_setups = [setup for setup in partial_setups if setup['status']==Status.SUCCESS]
-
-        if len(partial_setups) == 0:
-            raise ValueError('No valid configuration found for the classifier.')
-
-        # explore the quantifier-specific hyperparameters for each training configuration
-        scores = qp.util.parallel(
-            self._prepare_aggregation,
-            ({'q-params': setup[1], 'training': training, **setup[0]} for setup in itertools.product(partial_setups, q_configs)),
+            cls_configs,
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs
         )
 
-        return scores
+        # filter out classifier configurations that yielded any error
+        success_outs = []
+        for (model, predictions, status, took), cls_config in zip(cls_outs, cls_configs):
+            if status.success():
+                success_outs.append((model, predictions, took, cls_config))
+            else:
+                self.error_collector.append(status)
+
+        if len(success_outs) == 0:
+            raise ValueError('No valid configuration found for the classifier!')
+
+        # explore the quantifier-specific hyperparameters for each valid training configuration
+        aggr_configs = [(*out, q_config) for out, q_config in itertools.product(success_outs, q_configs)]
+        aggr_outs = qp.util.parallel(
+            self._prepare_aggregation,
+            aggr_configs,
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=self.n_jobs
+        )
+
+        return aggr_outs
 
     def _compute_scores_nonaggregative(self, training):
         configs = expand_grid(self.param_grid)
-
-        # pass a seed to parallel, so it is set in child processes
+        self._training = training
         scores = qp.util.parallel(
-            self._prepare_model,
-            ((params, training) for params in configs),
+            self._prepare_nonaggr_model,
+            configs,
             seed=qp.environ.get('_R_SEED', None),
             n_jobs=self.n_jobs
         )
         return scores
 
-    def _compute_scores(self, training):
-        if isinstance(self.model, AggregativeQuantifier):
-            return self._compute_scores_aggregative(training)
+    def _print_status(self, params, score, status, took):
+        if status.success():
+            self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} = {score:.5f} [took {took:.3f}s]')
         else:
-            return self._compute_scores_nonaggregative(training)
+            self._sout(f'error={status}')
 
     def fit(self, training: LabelledCollection):
         """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
@@ -251,31 +211,41 @@ class GridSearchQ(BaseQuantifier):
 
         tinit = time()
 
+        self.error_collector = []
+
         self._sout(f'starting model selection with n_jobs={self.n_jobs}')
-        results = self._compute_scores(training)
+        if isinstance(self.model, AggregativeQuantifier):
+            results = self._compute_scores_aggregative(training)
+        else:
+            results = self._compute_scores_nonaggregative(training)
 
         self.param_scores_ = {}
         self.best_score_ = None
-        for job_result in results:
-            score = job_result.get('score', None)
-            params = job_result['params']
-            if score is not None:
+        for model, params, score, status, took in results:
+            if status.success():
                 if self.best_score_ is None or score < self.best_score_:
                     self.best_score_ = score
                     self.best_params_ = params
-                    self.best_model_ = job_result['model']
+                    self.best_model_ = model
                 self.param_scores_[str(params)] = score
             else:
-                self.param_scores_[str(params)] = job_result['status']
+                self.param_scores_[str(params)] = status.status
+                self.error_collector.append(status)
 
         tend = time()-tinit
 
         if self.best_score_ is None:
-            raise TimeoutError('no combination of hyperparameters seem to work')
+            raise ValueError('no combination of hyperparameters seemed to work')
 
         self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
                    f'[took {tend:.4f}s]')
 
+        no_errors = len(self.error_collector)
+        if no_errors>0:
+            self._sout(f'warning: {no_errors} errors found')
+            for err in self.error_collector:
+                self._sout(f'\t{str(err)}')
+
         if self.refit:
             if isinstance(self.protocol, OnLabelledCollectionProtocol):
                 tinit = time()
@@ -284,6 +254,7 @@ class GridSearchQ(BaseQuantifier):
                 tend = time() - tinit
                 self.refit_time_ = tend
             else:
+                # already checked
                 raise RuntimeWarning(f'the model cannot be refit on the whole dataset')
 
         return self
@@ -324,23 +295,42 @@ class GridSearchQ(BaseQuantifier):
             return self.best_model_
         raise ValueError('best_model called before fit')
 
+    def _error_handler(self, func, params):
+        """
+        Endorses one job with two returned values: the status, and the time of execution
 
-    def _error_handler(self, func, *args, **kwargs):
+        :param func: the function to be called
+        :param params: parameters of the function
+        :return: `tuple(out, status, time)` where `out` is the function output,
+            `status` is an enum value from `Status`, and `time` is the time it
+            took to complete the call
+        """
+
+        output = None
+
+        def _handle(status, exception):
+            if self.raise_errors:
+                raise exception
+            else:
+                return ConfigStatus(params, status, str(e))
 
         try:
             with timeout(self.timeout):
-                output = func(*args, **kwargs)
-                return output, Status.SUCCESS
+                tinit = time()
+                output = func(params)
+                status = ConfigStatus(params, Status.SUCCESS)
 
-        except TimeoutError:
-            return None, Status.TIMEOUT
+        except TimeoutError as e:
+            status = _handle(Status.TIMEOUT, str(e))
 
-        except ValueError:
-            return None, Status.INVALID
+        except ValueError as e:
+            status = _handle(Status.INVALID, str(e))
 
-        except Exception:
-            return None, Status.ERROR
+        except Exception as e:
+            status = _handle(Status.ERROR, str(e))
 
+        took = time() - tinit
+        return output, status, took
 
 
 def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0):

From 5caf555d65e37c0e6a1040be296c75a96d4eaa7b Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 18 Dec 2023 10:24:36 +0100
Subject: [PATCH 12/22] mergin

---
 examples/model_selection.py |  13 +-
 quapy/method/kdey.py        | 234 ++++++++++++++++++++++++++++++++++++
 2 files changed, 241 insertions(+), 6 deletions(-)
 create mode 100644 quapy/method/kdey.py

diff --git a/examples/model_selection.py b/examples/model_selection.py
index 141cf91..50460fe 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -1,4 +1,5 @@
 import quapy as qp
+from method.kdey import KDEyML
 from quapy.method.non_aggregative import DMx
 from quapy.protocol import APP
 from quapy.method.aggregative import DMy
@@ -11,12 +12,13 @@ from time import time
 In this example, we show how to perform model selection on a DistributionMatching quantifier.
 """
 
-model = DMy(LogisticRegression())
+model = KDEyML(LogisticRegression())
 
 qp.environ['SAMPLE_SIZE'] = 100
 qp.environ['N_JOBS'] = -1
 
-training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
+# training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
+training, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean').train_test
 
 with qp.util.temp_seed(0):
 
@@ -39,14 +41,13 @@ with qp.util.temp_seed(0):
     param_grid = {
         'classifier__C': np.logspace(-3,3,7),
         'classifier__class_weight': ['balanced', None],
-        'nbins': [8, 16, 32, 64, 'poooo'],
+        'bandwidth': np.linspace(0.01, 0.2, 20),
     }
 
     tinit = time()
 
-
-    # model = OLD_GridSearchQ(
-    model = qp.model_selection.GridSearchQ(
+    model = OLD_GridSearchQ(
+    # model = qp.model_selection.GridSearchQ(
         model=model,
         param_grid=param_grid,
         protocol=protocol,
diff --git a/quapy/method/kdey.py b/quapy/method/kdey.py
new file mode 100644
index 0000000..c6f9794
--- /dev/null
+++ b/quapy/method/kdey.py
@@ -0,0 +1,234 @@
+from typing import Union
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.neighbors import KernelDensity
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, cross_generate_predictions
+import quapy.functional as F
+
+from sklearn.metrics.pairwise import rbf_kernel
+
+
+class KDEBase:
+
+    BANDWIDTH_METHOD = ['scott', 'silverman']
+
+    @classmethod
+    def _check_bandwidth(cls, bandwidth):
+        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
+            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
+        if isinstance(bandwidth, float):
+            assert 0 < bandwidth < 1,  "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
+
+    def get_kde_function(self, X, bandwidth):
+        return KernelDensity(bandwidth=bandwidth).fit(X)
+
+    def pdf(self, kde, X):
+        return np.exp(kde.score_samples(X))
+
+    def get_mixture_components(self, X, y, n_classes, bandwidth):
+        return [self.get_kde_function(X[y == cat], bandwidth) for cat in range(n_classes)]
+
+
+
+class KDEyML(AggregativeProbabilisticQuantifier, KDEBase):
+
+    def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
+        self._check_bandwidth(bandwidth)
+        self.classifier = classifier
+        self.val_split = val_split
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, _, _ = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        self.mix_densities = self.get_mixture_components(posteriors, y, data.n_classes, self.bandwidth)
+
+        return self
+
+    def aggregate(self, posteriors: np.ndarray):
+        """
+        Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood
+        of the data (i.e., that minimizes the negative log-likelihood)
+
+        :param posteriors: instances in the sample converted into posterior probabilities
+        :return: a vector of class prevalence estimates
+        """
+        np.random.RandomState(self.random_state)
+        epsilon = 1e-10
+        n_classes = len(self.mix_densities)
+        test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
+
+        def neg_loglikelihood(prev):
+            test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+            test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
+            return  -np.sum(test_loglikelihood)
+
+        return F.optim_minimize(neg_loglikelihood, n_classes)
+
+
+class KDEyHD(AggregativeProbabilisticQuantifier, KDEBase):
+
+    def __init__(self, classifier: BaseEstimator, val_split=10, divergence: str='HD',
+                 bandwidth=0.1, n_jobs=None, random_state=0, montecarlo_trials=10000):
+        
+        self._check_bandwidth(bandwidth)
+        self.classifier = classifier
+        self.val_split = val_split
+        self.divergence = divergence
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+        self.montecarlo_trials = montecarlo_trials
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, _, _ = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        self.mix_densities = self.get_mixture_components(posteriors, y, data.n_classes, self.bandwidth)
+
+        N = self.montecarlo_trials
+        rs = self.random_state
+        n = data.n_classes
+        self.reference_samples = np.vstack([kde_i.sample(N//n, random_state=rs) for kde_i in self.mix_densities])
+        self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.mix_densities])
+        self.reference_density = np.mean(self.reference_classwise_densities, axis=0)  # equiv. to (uniform @ self.reference_classwise_densities)
+
+        return self
+
+    def aggregate(self, posteriors: np.ndarray):
+        # we retain all n*N examples (sampled from a mixture with uniform parameter), and then
+        # apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS
+        n_classes = len(self.mix_densities)
+
+        test_kde = self.get_kde_function(posteriors, self.bandwidth)
+        test_densities = self.pdf(test_kde, self.reference_samples)
+
+        def f_squared_hellinger(u):
+            return (np.sqrt(u)-1)**2
+        
+        # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
+        if self.divergence.lower() == 'hd':
+            f = f_squared_hellinger
+        else:
+            raise ValueError('only squared HD is currently implemented')
+
+        epsilon = 1e-10
+        qs = test_densities + epsilon
+        rs = self.reference_density + epsilon
+        iw = qs/rs  #importance weights
+        p_class = self.reference_classwise_densities + epsilon
+        fracs = p_class/qs
+
+        def divergence(prev):
+            # ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs
+            ps_div_qs = prev @ fracs
+            return np.mean( f(ps_div_qs) * iw )
+
+        return F.optim_minimize(divergence, n_classes)
+
+
+class KDEyCS(AggregativeProbabilisticQuantifier):
+
+    def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
+        KDEBase._check_bandwidth(bandwidth)
+        self.classifier = classifier
+        self.val_split = val_split
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+
+    def gram_matrix_mix_sum(self, X, Y=None):
+        # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
+        # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are 
+        # two "scalar matrices" (h^2)*I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)
+        h = self.bandwidth
+        variance = 2 * (h**2)
+        nD = X.shape[1]
+        gamma = 1/(2*variance)
+        norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
+        gram = norm_factor * rbf_kernel(X, Y, gamma=gamma)
+        return gram.sum()
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, _, _ = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), \
+            'label name gaps not allowed in current implementation'
+
+        n = data.n_classes
+        P = posteriors
+
+        # counts_inv keeps track of the relative weight of each datapoint within its class
+        # (i.e., the weight in its KDE model)
+        counts_inv = 1 / (data.counts())
+
+        # tr_tr_sums corresponds to symbol \overline{B} in the paper
+        tr_tr_sums = np.zeros(shape=(n,n), dtype=float)
+        for i in range(n):
+            for j in range(n):
+                if i > j:
+                    tr_tr_sums[i,j] = tr_tr_sums[j,i]
+                else:
+                    block = self.gram_matrix_mix_sum(P[y == i], P[y == j] if i!=j else None)
+                    tr_tr_sums[i, j] = block
+
+        # keep track of these data structures for the test phase
+        self.Ptr = P
+        self.ytr = y
+        self.tr_tr_sums = tr_tr_sums
+        self.counts_inv = counts_inv
+
+        return self
+
+
+    def aggregate(self, posteriors: np.ndarray):
+        Ptr = self.Ptr
+        Pte = posteriors
+        y = self.ytr
+        tr_tr_sums = self.tr_tr_sums
+
+        M, nD = Pte.shape
+        Minv = (1/M) # t in the paper
+        n = Ptr.shape[1]
+
+
+        # becomes a constant that does not affect the optimization, no need to compute it
+        # partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv)
+
+        # tr_te_sums corresponds to \overline{a}*(1/Li)*(1/M) in the paper (note the constants
+        # are already aggregated to tr_te_sums, so these multiplications are not carried out
+        # at each iteration of the optimization phase)
+        tr_te_sums = np.zeros(shape=n, dtype=float)
+        for i in range(n):
+            tr_te_sums[i] = self.gram_matrix_mix_sum(Ptr[y==i], Pte) 
+
+        def divergence(alpha):
+            # called \overline{r} in the paper
+            alpha_ratio = alpha * self.counts_inv
+
+            # recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)
+            partA = -np.log((alpha_ratio @ tr_te_sums) * Minv)
+            partB = 0.5 * np.log(alpha_ratio @ tr_tr_sums @ alpha_ratio)
+            return partA + partB #+ partC
+
+        return F.optim_minimize(divergence, n)
+

From b882c234770af43cc6a1dc57ad6acedd7dae5f19 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 18 Dec 2023 15:43:36 +0100
Subject: [PATCH 13/22] kdey within the new grid search

---
 examples/model_selection.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/model_selection.py b/examples/model_selection.py
index 3145005..485acd8 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -1,7 +1,7 @@
 import quapy as qp
 from method.kdey import KDEyML
 from quapy.method.non_aggregative import DMx
-from quapy.protocol import APP
+from quapy.protocol import APP, UPP
 from quapy.method.aggregative import DMy
 from sklearn.linear_model import LogisticRegression
 from examples.comparing_gridsearch import OLD_GridSearchQ
@@ -18,7 +18,7 @@ qp.environ['SAMPLE_SIZE'] = 100
 qp.environ['N_JOBS'] = -1
 
 # training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
-training, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean').train_test
+training, test = qp.datasets.fetch_UCIMulticlassDataset('letter').train_test
 
 with qp.util.temp_seed(0):
 
@@ -30,7 +30,7 @@ with qp.util.temp_seed(0):
     # values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
     # We devote 30% of the dataset for this exploration.
     training, validation = training.split_stratified(train_prop=0.7)
-    protocol = APP(validation)
+    protocol = UPP(validation)
 
     # We will explore a classification-dependent hyper-parameter (e.g., the 'C'
     # hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
@@ -53,7 +53,7 @@ with qp.util.temp_seed(0):
         protocol=protocol,
         error='mae',  # the error to optimize is the MAE (a quantification-oriented loss)
         refit=False,   # retrain on the whole labelled set once done
-        raise_errors=False,
+        # raise_errors=False,
         verbose=True  # show information as the process goes on
     ).fit(training)
 
@@ -64,7 +64,7 @@ model = model.best_model_
 
 # evaluation in terms of MAE
 # we use the same evaluation protocol (APP) on the test set
-mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
+mae_score = qp.evaluation.evaluate(model, protocol=UPP(test), error_metric='mae')
 
 print(f'MAE={mae_score:.5f}')
 print(f'model selection took {tend-tinit:.1f}s')

From 2d12ce12b94f1f325062bf74f8f5a78e59f05643 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 18 Dec 2023 17:15:53 +0100
Subject: [PATCH 14/22] bugfix in APP

---
 examples/model_selection.py | 4 ++--
 quapy/protocol.py           | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/model_selection.py b/examples/model_selection.py
index 485acd8..4e52784 100644
--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@@ -46,8 +46,8 @@ with qp.util.temp_seed(0):
 
     tinit = time()
 
-    model = OLD_GridSearchQ(
-    # model = qp.model_selection.GridSearchQ(
+    # model = OLD_GridSearchQ(
+    model = qp.model_selection.GridSearchQ(
         model=model,
         param_grid=param_grid,
         protocol=protocol,
diff --git a/quapy/protocol.py b/quapy/protocol.py
index 7d7d1df..36362a9 100644
--- a/quapy/protocol.py
+++ b/quapy/protocol.py
@@ -257,8 +257,9 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
         """
         dimensions = self.data.n_classes
         s = F.prevalence_linspace(self.n_prevalences, repeats=1, smooth_limits_epsilon=self.smooth_limits_epsilon)
+        eps = (s[1]-s[0])/2 # handling floating rounding
         s = [s] * (dimensions - 1)
-        prevs = [p for p in itertools.product(*s, repeat=1) if (sum(p) <= 1.0)]
+        prevs = [p for p in itertools.product(*s, repeat=1) if (sum(p) < (1.+eps))]
         prevs = np.asarray(prevs).reshape(len(prevs), -1)
         if self.repeats > 1:
             prevs = np.repeat(prevs, self.repeats, axis=0)

From 6d53b68d7ffd77b4424dd4a7ffdcb7d8a7d34d96 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 10 Jan 2024 15:39:27 +0100
Subject: [PATCH 15/22] refactoring aggregative

---
 .gitignore                  |  28 +++++
 quapy/method/aggregative.py | 198 +++++++++++++++---------------------
 2 files changed, 108 insertions(+), 118 deletions(-)

diff --git a/.gitignore b/.gitignore
index b9703a3..f3a0a16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,31 @@ dmypy.json
 .pyre/
 
 *__pycache__*
+*.pdf
+*.zip
+*.png
+*.csv
+*.pkl
+*.dataframe
+
+
+# other projects
+LeQua2022
+MultiLabel
+NewMethods
+Ordinal
+Retrieval
+eDiscovery
+poster-cikm
+slides-cikm
+slides-short-cikm
+quick_experiment
+svm_perf_quantification/svm_struct
+svm_perf_quantification/svm_light
+TweetSentQuant
+
+
+
+
+
+
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index f3779be..15e1463 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -302,36 +302,20 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
                                      f'fit_classifier is set to False')
 
 
+class BinaryAggregativeQuantifier(AggregativeQuantifier, BinaryQuantifier):
+    
+    @property
+    def pos_label(self):
+        return self.classifier.classes_[1]
 
-# class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
-#     """
-#     Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
-#     at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
-#     is not the training set. There are three ways in which this distinction can be made, depending on how
-#     the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
-#     of training instances that should be devoted to validate, or (ii) an integer indicating the
-#     number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
-#     use for validation.
-#     """
-#
-#     @property
-#     def val_split(self):
-#         return self.val_split_
-#
-#     @val_split.setter
-#     def val_split(self, val_split):
-#         if isinstance(val_split, LabelledCollection):
-#             print('warning: setting val_split with a LabelledCollection will be inefficient in'
-#                   'model selection. Rather pass the LabelledCollection at fit time')
-#         self.val_split_ = val_split
-#
-#     def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
-#         print('method from CorrectionbasedAggregativeQuantifier')
-#         if predict_on is None:
-#             predict_on = self.val_split
-#         classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
-#         self.aggregation_fit(classif_predictions, data)
-#         return self
+    @property
+    def neg_label(self):
+        return self.classifier.classes_[0]
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
+        self._check_binary(data, self.__class__.__name__)
+        return super().fit(data, fit_classifier, val_split)
+    
 
 
 
@@ -383,7 +367,7 @@ class ACC(AggregativeCrispQuantifier):
     :param n_jobs: number of parallel workers
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
         self.classifier = classifier
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
@@ -476,7 +460,7 @@ class PACC(AggregativeSoftQuantifier):
     :param n_jobs: number of parallel workers
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
         self.classifier = classifier
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
@@ -599,7 +583,7 @@ class EMQrecalib(AggregativeSoftQuantifier):
         can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
         be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
         are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
-        for `k`); or as a collection defining the specific set of data to use for validation.
+        for `k`, default 5); or as a collection defining the specific set of data to use for validation.
         Alternatively, this set can be specified at fit time by indicating the exact set of data
         on which the predictions are to be generated.
     :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence;
@@ -671,7 +655,7 @@ class EMQrecalib(AggregativeSoftQuantifier):
         return posteriors
 
 
-class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
+class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
     """
     `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
     HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
@@ -683,10 +667,10 @@ class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
 
     :param classifier: a sklearn's Estimator that generates a binary classifier
     :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out
-        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself).
+        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         self.classifier = classifier
         self.val_split = val_split
 
@@ -701,12 +685,10 @@ class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
          :class:`quapy.data.base.LabelledCollection` indicating the validation set itself
         :return: self
         """
-
-        self._check_binary(data, self.__class__.__name__)
         P, y = classif_predictions.Xy
-        Px = P[:, 1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[y == self.classifier.classes_[1]]
-        self.Pxy0 = Px[y == self.classifier.classes_[0]]
+        Px = P[:, self.pos_label]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[y == self.pos_label]
+        self.Pxy0 = Px[y == self.neg_label]
 
         # pre-compute the histogram for positive and negative examples
         self.bins = np.linspace(10, 110, 11, dtype=int)  # [10, 20, 30, ..., 100, 110]
@@ -725,7 +707,7 @@ class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
         # and the final estimated a priori probability was taken as the median of these 11 estimates."
         # (González-Castro, et al., 2013).
 
-        Px = classif_posteriors[:, 1]  # takes only the P(y=+1|x)
+        Px = classif_posteriors[:, self.pos_label]  # takes only the P(y=+1|x)
 
         prev_estimations = []
         # for bins in np.linspace(10, 110, 11, dtype=int):  #[10, 20, 30, ..., 100, 110]
@@ -752,7 +734,7 @@ class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class DyS(AggregativeSoftQuantifier, BinaryQuantifier):
+class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
     """
     `DyS framework <https://ojs.aaai.org/index.php/AAAI/article/view/4376>`_ (DyS).
     DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that
@@ -761,14 +743,14 @@ class DyS(AggregativeSoftQuantifier, BinaryQuantifier):
 
     :param classifier: a sklearn's Estimator that generates a binary classifier
     :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out
-        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself).
+        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
     :param n_bins: an int with the number of bins to use to compute the histograms.
     :param divergence: a str indicating the name of divergence (currently supported ones are "HD" or "topsoe"), or a
         callable function computes the divergence between two distributions (two equally sized arrays).
     :param tol: a float with the tolerance for the ternary search algorithm.
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05):
         self.classifier = classifier
         self.val_split = val_split
         self.tol = tol
@@ -791,22 +773,17 @@ class DyS(AggregativeSoftQuantifier, BinaryQuantifier):
         # Left and right are the current bounds; the maximum is between them
         return (left + right) / 2
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
-        if val_split is None:
-            val_split = self.val_split
-
-        self._check_binary(data, self.__class__.__name__)
-        self.classifier, validation = _training_helper(
-            self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split)
-        Px = self.classify(validation.instances)[:, 1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]]
-        self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]]
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        Px, y = classif_predictions.Xy
+        Px = Px[:, self.pos_label]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[y == self.pos_label]
+        self.Pxy0 = Px[y == self.neg_label]
         self.Pxy1_density = np.histogram(self.Pxy1, bins=self.n_bins, range=(0, 1), density=True)[0]
         self.Pxy0_density = np.histogram(self.Pxy0, bins=self.n_bins, range=(0, 1), density=True)[0]
         return self
 
     def aggregate(self, classif_posteriors):
-        Px = classif_posteriors[:, 1]  # takes only the P(y=+1|x)
+        Px = classif_posteriors[:, self.pos_label]  # takes only the P(y=+1|x)
 
         Px_test = np.histogram(Px, bins=self.n_bins, range=(0, 1), density=True)[0]
         divergence = get_divergence(self.divergence)
@@ -819,37 +796,32 @@ class DyS(AggregativeSoftQuantifier, BinaryQuantifier):
         return np.asarray([1 - class1_prev, class1_prev])
 
 
-class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
+class SMM(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
     """
     `SMM method <https://ieeexplore.ieee.org/document/9260028>`_ (SMM).
     SMM is a simplification of matching distribution methods where the representation of the examples
-    is created using the mean instead of a histogram.
+    is created using the mean instead of a histogram (conceptually equivalent to PACC).
 
     :param classifier: a sklearn's Estimator that generates a binary classifier.
     :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out
-        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself).
+        validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         self.classifier = classifier
         self.val_split = val_split
       
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
-        if val_split is None:
-            val_split = self.val_split
-
-        self._check_binary(data, self.__class__.__name__)
-        self.classifier, validation = _training_helper(
-            self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split)
-        Px = self.classify(validation.instances)[:, 1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]]
-        self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]]
-        self.Pxy1_mean = np.mean(self.Pxy1)
-        self.Pxy0_mean = np.mean(self.Pxy0)
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        Px, y = classif_predictions.Xy
+        Px = Px[:, self.pos_label]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[y == self.pos_label]
+        self.Pxy0 = Px[y == self.neg_label]
+        self.Pxy1_mean = np.mean(self.Pxy1)  # equiv. TPR 
+        self.Pxy0_mean = np.mean(self.Pxy0)  # equiv. FPR
         return self
 
     def aggregate(self, classif_posteriors):
-        Px = classif_posteriors[:, 1]  # takes only the P(y=+1|x)
+        Px = classif_posteriors[:, self.pos_label]  # takes only the P(y=+1|x)
         Px_mean = np.mean(Px)
      
         class1_prev = (Px_mean - self.Pxy0_mean)/(self.Pxy1_mean - self.Pxy0_mean)
@@ -867,9 +839,9 @@ class DMy(AggregativeSoftQuantifier):
     :param classifier: a `sklearn`'s Estimator that generates a probabilistic classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the
         validation distribution.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the validation distribution should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     :param nbins: number of bins used to discretize the distributions (default 8)
     :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented)
@@ -890,7 +862,7 @@ class DMy(AggregativeSoftQuantifier):
         self.n_jobs = n_jobs
 
     # @classmethod
-    # def HDy(cls, classifier, val_split=0.4, n_jobs=None):
+    # def HDy(cls, classifier, val_split=5, n_jobs=None):
     #     from quapy.method.meta import MedianEstimator
     #
     #     hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
@@ -1114,7 +1086,7 @@ def newSVMRAE(svmperf_base=None, C=1):
     return newELM(svmperf_base, loss='mrae', C=C)
 
 
-class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
+class ThresholdOptimization(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
     """
     Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by
     `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
@@ -1127,31 +1099,20 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
         self.classifier = classifier
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None):
-        self._check_binary(data, "Threshold Optimization")
-
-        if val_split is None:
-            val_split = self.val_split
-
-        self.classifier, y, y_, classes, class_count = cross_generate_predictions(
-            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
-        )
-
-        self.cc = CC(self.classifier)
-
-        self.tpr, self.fpr = self._optimize_threshold(y, y_)
-
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        P, y = classif_predictions.Xy 
+        self.tpr, self.fpr, self.threshold = self._optimize_threshold(y, P)
         return self
 
     @abstractmethod
@@ -1173,14 +1134,15 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
 
         :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)
         :param probabilities: array-like with the posterior probabilities
-        :return: best `tpr` and `fpr` according to `_condition`
+        :return: best `tpr` and `fpr` and `threshold` according to `_condition`
         """
         best_candidate_threshold_score = None
         best_tpr = 0
         best_fpr = 0
-        candidate_thresholds = np.unique(probabilities[:, 1])
+        candidate_thresholds = np.unique(probabilities[:, self.pos_label])
         for candidate_threshold in candidate_thresholds:
-            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
+            y_ = self.classes_[1*(probabilities[:,1]>candidate_threshold)]
+            #y_ = [self.pos_label if p > candidate_threshold else self.neg_label for p in probabilities[:, 1]]
             TP, FP, FN, TN = self._compute_table(y, y_)
             tpr = self._compute_tpr(TP, FP)
             fpr = self._compute_fpr(FP, TN)
@@ -1190,15 +1152,15 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
                 best_tpr = tpr
                 best_fpr = fpr
 
-        return best_tpr, best_fpr
+        return best_tpr, best_fpr, best_candidate_threshold_score
 
     def aggregate(self, classif_predictions):
-        prevs_estim = self.cc.aggregate(classif_predictions)
-        if self.tpr - self.fpr == 0:
-            return prevs_estim
-        adjusted_prevs_estim = np.clip((prevs_estim[1] - self.fpr) / (self.tpr - self.fpr), 0, 1)
-        adjusted_prevs_estim = np.array((1 - adjusted_prevs_estim, adjusted_prevs_estim))
-        return adjusted_prevs_estim
+        class_scores = classif_predictions[:, self.pos_label]
+        prev_estim = np.mean(class_scores > self.threshold)
+        if self.tpr - self.fpr != 0:
+            prevs_estim = np.clip((prev_estim - self.fpr) / (self.tpr - self.fpr), 0, 1)
+        prevs_estim = np.array((1 - prevs_estim, prevs_estim))
+        return prevs_estim
 
     def _compute_table(self, y, y_):
         TP = np.logical_and(y == y_, y == self.classes_[1]).sum()
@@ -1229,13 +1191,13 @@ class T50(ThresholdOptimization):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
     def _condition(self, tpr, fpr) -> float:
@@ -1253,13 +1215,13 @@ class MAX(ThresholdOptimization):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
     def _condition(self, tpr, fpr) -> float:
@@ -1278,13 +1240,13 @@ class X(ThresholdOptimization):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
     def _condition(self, tpr, fpr) -> float:
@@ -1302,12 +1264,12 @@ class MS(ThresholdOptimization):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
     def _condition(self, tpr, fpr) -> float:
@@ -1339,12 +1301,12 @@ class MS2(MS):
     :param classifier: a sklearn's Estimator that generates a classifier
     :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
         misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of
+        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
         validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`), or as a
+        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
         :class:`quapy.data.base.LabelledCollection` (the split itself).
     """
-    def __init__(self, classifier: BaseEstimator, val_split=0.4):
+    def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
     def _optimize_threshold(self, y, probabilities):

From 896fa042d6c62181ba72f2065ea7ee86b55b43c1 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 17 Jan 2024 09:33:39 +0100
Subject: [PATCH 16/22] fixing threshold optimization-based techniques

---
 .gitignore                                    |   1 +
 ..._checking_optim_threshold_modifications.py | 136 +++++++++++++
 quapy/functional.py                           |  17 ++
 quapy/method/aggregative.py                   | 190 ++++++++----------
 4 files changed, 241 insertions(+), 103 deletions(-)
 create mode 100644 examples/_uci_experiments_checking_optim_threshold_modifications.py

diff --git a/.gitignore b/.gitignore
index f3a0a16..8eaff3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,4 @@ TweetSentQuant
 
 
 
+*.png
diff --git a/examples/_uci_experiments_checking_optim_threshold_modifications.py b/examples/_uci_experiments_checking_optim_threshold_modifications.py
new file mode 100644
index 0000000..51e5912
--- /dev/null
+++ b/examples/_uci_experiments_checking_optim_threshold_modifications.py
@@ -0,0 +1,136 @@
+from copy import deepcopy
+
+import quapy as qp
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression
+from quapy.classification.methods import LowRankLogisticRegression
+from quapy.method.meta import QuaNet
+from quapy.protocol import APP
+from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, HDy, newSVMAE, T50, X
+from quapy.method.meta import EHDy
+import numpy as np
+import os
+import pickle
+import itertools
+import argparse
+import torch
+import shutil
+
+
+N_JOBS = -1
+CUDA_N_JOBS = 2
+ENSEMBLE_N_JOBS = -1
+
+qp.environ['SAMPLE_SIZE'] = 100
+
+
+def newLR():
+    return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
+
+
+def calibratedLR():
+    return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
+
+
+__C_range = np.logspace(-3, 3, 7)
+lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
+svmperf_params = {'classifier__C': __C_range}
+
+
+def quantification_models():
+    yield 'acc', ACC(newLR()), lr_params
+    yield 'T50', T50(newLR()), lr_params
+    yield 'X', X(newLR()), lr_params
+    yield 'MAX', MAX(newLR()), lr_params
+    yield 'MS', MS(newLR()), lr_params
+    yield 'MS2', MS2(newLR()), lr_params
+
+
+def evaluate_experiment(true_prevalences, estim_prevalences):
+    print('\nEvaluation Metrics:\n' + '=' * 22)
+    for eval_measure in [qp.error.mae, qp.error.mrae]:
+        err = eval_measure(true_prevalences, estim_prevalences)
+        print(f'\t{eval_measure.__name__}={err:.4f}')
+    print()
+
+
+def result_path(path, dataset_name, model_name, run, optim_loss):
+    return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
+
+
+def is_already_computed(dataset_name, model_name, run, optim_loss):
+    return os.path.exists(result_path(args.results, dataset_name, model_name, run, optim_loss))
+
+
+def save_results(dataset_name, model_name, run, optim_loss, *results):
+    rpath = result_path(args.results, dataset_name, model_name, run, optim_loss)
+    qp.util.create_parent_dir(rpath)
+    with open(rpath, 'wb') as foo:
+        pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
+
+
+def run(experiment):
+    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
+    if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
+
+    collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
+    for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=1)):
+        if is_already_computed(dataset_name, model_name, run=run, optim_loss=optim_loss):
+            print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5 already computed.')
+            continue
+
+        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
+        # model selection (hyperparameter optimization for a quantification-oriented loss)
+        train, test = data.train_test
+        train, val = train.split_stratified()
+        if hyperparams is not None:
+            model_selection = qp.model_selection.GridSearchQ(
+                deepcopy(model),
+                param_grid=hyperparams,
+                protocol=APP(val, n_prevalences=21, repeats=25),
+                error=optim_loss,
+                refit=True,
+                timeout=60*60,
+                verbose=True
+            )
+            model_selection.fit(data.training)
+            model = model_selection.best_model()
+            best_params = model_selection.best_params_
+        else:
+            model.fit(data.training)
+            best_params = {}
+
+        # model evaluation
+        true_prevalences, estim_prevalences = qp.evaluation.prediction(
+            model,
+            protocol=APP(test, n_prevalences=21, repeats=100)
+        )
+        test_true_prevalence = data.test.prevalence()
+
+        evaluate_experiment(true_prevalences, estim_prevalences)
+        save_results(dataset_name, model_name, run, optim_loss,
+                     true_prevalences, estim_prevalences,
+                     data.training.prevalence(), test_true_prevalence,
+                     best_params)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
+    parser.add_argument('--results', metavar='RESULT_PATH', type=str, default='results_tmp',
+                        help='path to the directory where to store the results')
+    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
+                        help='path to the directory with svmperf')
+    args = parser.parse_args()
+
+    print(f'Result folder: {args.results}')
+    np.random.seed(0)
+
+    qp.environ['SVMPERF_HOME'] = args.svmperfpath
+
+    optim_losses = ['mae']
+    datasets = qp.datasets.UCI_DATASETS
+
+    models = quantification_models()
+    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
+
+    shutil.rmtree(args.checkpointdir, ignore_errors=True)
diff --git a/quapy/functional.py b/quapy/functional.py
index e29466f..d39b306 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -66,6 +66,23 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
         return prevalences
 
 
+def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
+    """
+    Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two
+    values representing a binary distribution.
+
+    :param positive_prevalence: prevalence for the positive class
+    :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution
+        is valid. If False, it then checks that the value is in the valid range, and raises an error if not.
+    :return: np.ndarray of shape `(2,)`
+    """
+    if clip_if_necessary:
+        positive_prevalence = np.clip(positive_prevalence, 0, 1)
+    else:
+        assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class'
+    return np.asarray([1-positive_prevalence, positive_prevalence])
+
+
 def HellingerDistance(P, Q) -> float:
     """
     Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`.
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 15e1463..6696402 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -159,28 +159,25 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         """
         self.classifier_ = classifier
 
-    @abstractmethod
     def classify(self, instances):
         """
         Provides the label predictions for the given instances. The predictions should respect the format expected by
         :meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for
-        non-probabilistic quantifiers
+        non-probabilistic quantifiers. The default one is "decision_function".
 
         :param instances: array-like of shape `(n_instances, n_features,)`
         :return: np.ndarray of shape `(n_instances,)` with label predictions
         """
-        ...
+        return getattr(self, self._classifier_method())(instances)
 
-    @abstractmethod
     def _classifier_method(self):
         """
-        Name of the method that must be used for issuing label predictions.
+        Name of the method that must be used for issuing label predictions. The default one is "decision_function".
 
         :return: string
         """
-        ...
+        return 'decision_function'
 
-    @abstractmethod
     def _check_classifier(self, adapt_if_necessary=False):
         """
         Guarantees that the underlying classifier implements the method required for issuing predictions, i.e.,
@@ -188,7 +185,8 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
 
         :param adapt_if_necessary: if True, the method will try to comply with the required specifications
         """
-        ...
+        assert hasattr(self.classifier, self._classifier_method()), \
+            f"the method does not implement the required {self._classifier_method()} method"
 
     def quantify(self, instances):
         """
@@ -229,32 +227,15 @@ class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
     Quantifiers by implementing specifications about crisp predictions.
     """
 
-    def classify(self, instances):
-        """
-        Provides the label (crisp) predictions for the given instances.
-
-        :param instances: array-like of shape `(n_instances, n_dimensions,)`
-        :return: np.ndarray of shape `(n_instances,)` with label predictions
-        """
-        return self.classifier.predict(instances)
-
     def _classifier_method(self):
         """
-        Name of the method that must be used for issuing label predictions.
+        Name of the method that must be used for issuing label predictions. For crisp quantifiers, the method
+        is 'predict', that returns an array of shape `(n_instances,)` of label predictions.
 
         :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
         """
         return 'predict'
 
-    def _check_classifier(self, adapt_if_necessary=False):
-        """
-        Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`
-
-        :param adapt_if_necessary: unused, added for compatibility
-        """
-        assert hasattr(self.classifier, self._classifier_method()), \
-            f"the method does not implement the required {self._classifier_method()} method"
-
 
 class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
     """
@@ -264,18 +245,11 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
     about soft predictions.
     """
 
-    def classify(self, instances):
-        """
-        Provides the posterior probabilities for the given instances.
-
-        :param instances: array-like of shape `(n_instances, n_dimensions,)`
-        :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities
-        """
-        return self.classifier.predict_proba(instances)
-
     def _classifier_method(self):
         """
-        Name of the method that must be used for issuing label predictions.
+        Name of the method that must be used for issuing label predictions. For probabilistic quantifiers, the method
+        is 'predict_proba', that returns an array of shape `(n_instances, n_dimensions,)` with posterior
+        probabilities.
 
         :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
         """
@@ -731,7 +705,7 @@ class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
             prev_estimations.append(prev_selected)
 
         class1_prev = np.median(prev_estimations)
-        return np.asarray([1 - class1_prev, class1_prev])
+        return F.as_binary_prevalence(class1_prev)
 
 
 class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
@@ -793,7 +767,7 @@ class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
             return divergence(Px_train, Px_test)
             
         class1_prev = self._ternary_search(f=distribution_distance, left=0, right=1, tol=self.tol)
-        return np.asarray([1 - class1_prev, class1_prev])
+        return F.as_binary_prevalence(class1_prev)
 
 
 class SMM(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
@@ -825,9 +799,7 @@ class SMM(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
         Px_mean = np.mean(Px)
      
         class1_prev = (Px_mean - self.Pxy0_mean)/(self.Pxy1_mean - self.Pxy0_mean)
-        class1_prev = np.clip(class1_prev, 0, 1)
-
-        return np.asarray([1 - class1_prev, class1_prev])
+        return F.as_binary_prevalence(class1_prev, clip_if_necessary=True)
 
 
 class DMy(AggregativeSoftQuantifier):
@@ -1086,7 +1058,7 @@ def newSVMRAE(svmperf_base=None, C=1):
     return newELM(svmperf_base, loss='mrae', C=C)
 
 
-class ThresholdOptimization(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
+class ThresholdOptimization(BinaryAggregativeQuantifier):
     """
     Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by
     `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
@@ -1110,13 +1082,8 @@ class ThresholdOptimization(AggregativeSoftQuantifier, BinaryAggregativeQuantifi
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        P, y = classif_predictions.Xy 
-        self.tpr, self.fpr, self.threshold = self._optimize_threshold(y, P)
-        return self
-
     @abstractmethod
-    def _condition(self, tpr, fpr) -> float:
+    def condition(self, tpr, fpr) -> float:
         """
         Implements the criterion according to which the threshold should be selected.
         This function should return the (float) score to be minimized.
@@ -1127,46 +1094,63 @@ class ThresholdOptimization(AggregativeSoftQuantifier, BinaryAggregativeQuantifi
         """
         ...
 
-    def _optimize_threshold(self, y, probabilities):
+    def discard(self, tpr, fpr) -> bool:
+        """
+        Indicates whether a combination of tpr and fpr should be discarded
+
+        :param tpr: float, true positive rate
+        :param fpr: float, false positive rate
+        :return: true if the combination is to be discarded, false otherwise
+        """
+        return (tpr + fpr) == 0
+
+
+    def _eval_candidate_thresholds(self, decision_scores, y):
         """
         Seeks for the best `tpr` and `fpr` according to the score obtained at different
         decision thresholds. The scoring function is implemented in function `_condition`.
 
+        :param decision_scores: array-like with the classification scores
         :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)
-        :param probabilities: array-like with the posterior probabilities
         :return: best `tpr` and `fpr` and `threshold` according to `_condition`
         """
-        best_candidate_threshold_score = None
-        best_tpr = 0
-        best_fpr = 0
-        candidate_thresholds = np.unique(probabilities[:, self.pos_label])
+        candidate_thresholds = np.unique(decision_scores)
+
+        candidates = []
+        scores = []
         for candidate_threshold in candidate_thresholds:
-            y_ = self.classes_[1*(probabilities[:,1]>candidate_threshold)]
-            #y_ = [self.pos_label if p > candidate_threshold else self.neg_label for p in probabilities[:, 1]]
+            y_ = self.classes_[1 * (decision_scores > candidate_threshold)]
             TP, FP, FN, TN = self._compute_table(y, y_)
             tpr = self._compute_tpr(TP, FP)
             fpr = self._compute_fpr(FP, TN)
-            condition_score = self._condition(tpr, fpr)
-            if best_candidate_threshold_score is None or condition_score < best_candidate_threshold_score:
-                best_candidate_threshold_score = condition_score
-                best_tpr = tpr
-                best_fpr = fpr
+            if not self.discard(tpr, fpr):
+                candidate_score = self.condition(tpr, fpr)
+                candidates.append([tpr, fpr, candidate_threshold])
+                scores.append(candidate_score)
 
-        return best_tpr, best_fpr, best_candidate_threshold_score
+        if len(candidates) == 0:
+            # if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard
+            # classify & count; this is akin to assign tpr=1, fpr=0, threshold=0
+            tpr, fpr, threshold, score = 1, 0, 0, 0
+            candidates.append([tpr, fpr, threshold, score])
 
-    def aggregate(self, classif_predictions):
-        class_scores = classif_predictions[:, self.pos_label]
-        prev_estim = np.mean(class_scores > self.threshold)
-        if self.tpr - self.fpr != 0:
-            prevs_estim = np.clip((prev_estim - self.fpr) / (self.tpr - self.fpr), 0, 1)
+        candidates = np.asarray(candidates)
+        candidates = candidates[np.argsort(scores)]  # sort candidates by candidate_score
+
+        return candidates
+
+    def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
+        prevs_estim = np.mean(classif_predictions > threshold)
+        if tpr - fpr != 0:
+            prevs_estim = np.clip((prevs_estim - fpr) / (tpr - fpr), 0, 1)
         prevs_estim = np.array((1 - prevs_estim, prevs_estim))
         return prevs_estim
 
     def _compute_table(self, y, y_):
-        TP = np.logical_and(y == y_, y == self.classes_[1]).sum()
-        FP = np.logical_and(y != y_, y == self.classes_[0]).sum()
-        FN = np.logical_and(y != y_, y == self.classes_[1]).sum()
-        TN = np.logical_and(y == y_, y == self.classes_[0]).sum()
+        TP = np.logical_and(y == y_, y == self.pos_label).sum()
+        FP = np.logical_and(y != y_, y == self.neg_label).sum()
+        FN = np.logical_and(y != y_, y == self.pos_label).sum()
+        TN = np.logical_and(y == y_, y == self.neg_label).sum()
         return TP, FP, FN, TN
 
     def _compute_tpr(self, TP, FP):
@@ -1179,13 +1163,23 @@ class ThresholdOptimization(AggregativeSoftQuantifier, BinaryAggregativeQuantifi
             return 0
         return FP / (FP + TN)
 
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        # the standard behavior is to keep the best threshold only
+        decision_scores, y = classif_predictions.Xy
+        self.tpr, self.fpr, self.threshold = self._eval_candidate_thresholds(decision_scores, y)[0]
+        return self
+
+    def aggregate(self, classif_predictions: np.ndarray):
+        # the standard behavior is to compute the adjusted count using the best threshold found
+        return self.aggregate_with_threshold(classif_predictions, self.tpr, self.fpr, self.threshold)
+
 
 class T50(ThresholdOptimization):
     """
     Threshold Optimization variant for :class:`ACC` as proposed by
     `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
     `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks
-    for the threshold that makes `tpr` cosest to 0.5.
+    for the threshold that makes `tpr` closest to 0.5.
     The goal is to bring improved stability to the denominator of the adjustment.
 
     :param classifier: a sklearn's Estimator that generates a classifier
@@ -1200,7 +1194,7 @@ class T50(ThresholdOptimization):
     def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
-    def _condition(self, tpr, fpr) -> float:
+    def condition(self, tpr, fpr) -> float:
         return abs(tpr - 0.5)
 
 
@@ -1224,7 +1218,7 @@ class MAX(ThresholdOptimization):
     def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
-    def _condition(self, tpr, fpr) -> float:
+    def condition(self, tpr, fpr) -> float:
         # MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)
         return (fpr - tpr)
 
@@ -1249,7 +1243,7 @@ class X(ThresholdOptimization):
     def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
-    def _condition(self, tpr, fpr) -> float:
+    def condition(self, tpr, fpr) -> float:
         return abs(1 - (tpr + fpr))
 
 
@@ -1272,21 +1266,22 @@ class MS(ThresholdOptimization):
     def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
-    def _condition(self, tpr, fpr) -> float:
-        pass
+    def condition(self, tpr, fpr) -> float:
+        return 1
 
-    def _optimize_threshold(self, y, probabilities):
-        tprs = []
-        fprs = []
-        candidate_thresholds = np.unique(probabilities[:, 1])
-        for candidate_threshold in candidate_thresholds:
-            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
-            TP, FP, FN, TN = self._compute_table(y, y_)
-            tpr = self._compute_tpr(TP, FP)
-            fpr = self._compute_fpr(FP, TN)
-            tprs.append(tpr)
-            fprs.append(fpr)
-        return np.median(tprs), np.median(fprs)
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        # keeps all candidates
+        decision_scores, y = classif_predictions.Xy
+        self.tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y)
+        return self
+
+    def aggregate(self, classif_predictions: np.ndarray):
+        prevalences = []
+        for tpr, fpr, threshold in self.tprs_fprs_thresholds:
+            pos_prev = self.aggregate_with_threshold(classif_predictions, tpr, fpr, threshold)[1]
+            prevalences.append(pos_prev)
+        median = np.median(prevalences)
+        return F.as_binary_prevalence(median)
 
 
 class MS2(MS):
@@ -1309,19 +1304,8 @@ class MS2(MS):
     def __init__(self, classifier: BaseEstimator, val_split=5):
         super().__init__(classifier, val_split)
 
-    def _optimize_threshold(self, y, probabilities):
-        tprs = [0, 1]
-        fprs = [0, 1]
-        candidate_thresholds = np.unique(probabilities[:, 1])
-        for candidate_threshold in candidate_thresholds:
-            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
-            TP, FP, FN, TN = self._compute_table(y, y_)
-            tpr = self._compute_tpr(TP, FP)
-            fpr = self._compute_fpr(FP, TN)
-            if (tpr - fpr) > 0.25:
-                tprs.append(tpr)
-                fprs.append(fpr)
-        return np.median(tprs), np.median(fprs)
+    def discard(self, tpr, fpr) -> bool:
+        return (tpr-fpr) <= 0.25
 
 
 class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):

From 9b2470c992e5a786c9d8ea98f08e8c6b5b03dd3f Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 17 Jan 2024 19:15:50 +0100
Subject: [PATCH 17/22] testing optimization threshold variants, not working

---
 ..._checking_optim_threshold_modifications.py | 107 +++++++++---------
 examples/uci_experiments.py                   |   2 +-
 quapy/method/aggregative.py                   |   6 +-
 quapy/model_selection.py                      |   2 +-
 4 files changed, 56 insertions(+), 61 deletions(-)

diff --git a/examples/_uci_experiments_checking_optim_threshold_modifications.py b/examples/_uci_experiments_checking_optim_threshold_modifications.py
index 51e5912..e5ae184 100644
--- a/examples/_uci_experiments_checking_optim_threshold_modifications.py
+++ b/examples/_uci_experiments_checking_optim_threshold_modifications.py
@@ -13,13 +13,11 @@ import os
 import pickle
 import itertools
 import argparse
-import torch
-import shutil
+from glob import glob
+import pandas as pd
 
 
 N_JOBS = -1
-CUDA_N_JOBS = 2
-ENSEMBLE_N_JOBS = -1
 
 qp.environ['SAMPLE_SIZE'] = 100
 
@@ -40,30 +38,23 @@ svmperf_params = {'classifier__C': __C_range}
 def quantification_models():
     yield 'acc', ACC(newLR()), lr_params
     yield 'T50', T50(newLR()), lr_params
-    yield 'X', X(newLR()), lr_params
-    yield 'MAX', MAX(newLR()), lr_params
+    #yield 'X', X(newLR()), lr_params
+    #yield 'MAX', MAX(newLR()), lr_params
     yield 'MS', MS(newLR()), lr_params
     yield 'MS2', MS2(newLR()), lr_params
 
 
-def evaluate_experiment(true_prevalences, estim_prevalences):
-    print('\nEvaluation Metrics:\n' + '=' * 22)
-    for eval_measure in [qp.error.mae, qp.error.mrae]:
-        err = eval_measure(true_prevalences, estim_prevalences)
-        print(f'\t{eval_measure.__name__}={err:.4f}')
-    print()
+
+def result_path(path, dataset_name, model_name, optim_loss):
+    return os.path.join(path, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
 
 
-def result_path(path, dataset_name, model_name, run, optim_loss):
-    return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
+def is_already_computed(dataset_name, model_name, optim_loss):
+    return os.path.exists(result_path(args.results, dataset_name, model_name, optim_loss))
 
 
-def is_already_computed(dataset_name, model_name, run, optim_loss):
-    return os.path.exists(result_path(args.results, dataset_name, model_name, run, optim_loss))
-
-
-def save_results(dataset_name, model_name, run, optim_loss, *results):
-    rpath = result_path(args.results, dataset_name, model_name, run, optim_loss)
+def save_results(dataset_name, model_name, optim_loss, *results):
+    rpath = result_path(args.results, dataset_name, model_name, optim_loss)
     qp.util.create_parent_dir(rpath)
     with open(rpath, 'wb') as foo:
         pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
@@ -73,45 +64,39 @@ def run(experiment):
     optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
     if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
 
-    collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
-    for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=1)):
-        if is_already_computed(dataset_name, model_name, run=run, optim_loss=optim_loss):
-            print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5 already computed.')
-            continue
+    if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
+        print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
+        return
 
-        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
-        # model selection (hyperparameter optimization for a quantification-oriented loss)
-        train, test = data.train_test
-        train, val = train.split_stratified()
-        if hyperparams is not None:
-            model_selection = qp.model_selection.GridSearchQ(
-                deepcopy(model),
-                param_grid=hyperparams,
-                protocol=APP(val, n_prevalences=21, repeats=25),
-                error=optim_loss,
-                refit=True,
-                timeout=60*60,
-                verbose=True
-            )
-            model_selection.fit(data.training)
-            model = model_selection.best_model()
-            best_params = model_selection.best_params_
-        else:
-            model.fit(data.training)
-            best_params = {}
+    dataset = qp.datasets.fetch_UCIDataset(dataset_name)
 
-        # model evaluation
-        true_prevalences, estim_prevalences = qp.evaluation.prediction(
-            model,
-            protocol=APP(test, n_prevalences=21, repeats=100)
+    print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
+    # model selection (hyperparameter optimization for a quantification-oriented loss)
+    train, test = dataset.train_test
+    train, val = train.split_stratified()
+    if hyperparams is not None:
+        model_selection = qp.model_selection.GridSearchQ(
+            deepcopy(model),
+            param_grid=hyperparams,
+            protocol=APP(val, n_prevalences=21, repeats=25),
+            error=optim_loss,
+            refit=True,
+            timeout=60*60,
+            verbose=True
         )
-        test_true_prevalence = data.test.prevalence()
+        model_selection.fit(train)
+        model = model_selection.best_model()
+    else:
+        model.fit(dataset.training)
 
-        evaluate_experiment(true_prevalences, estim_prevalences)
-        save_results(dataset_name, model_name, run, optim_loss,
-                     true_prevalences, estim_prevalences,
-                     data.training.prevalence(), test_true_prevalence,
-                     best_params)
+    # model evaluation
+    true_prevalences, estim_prevalences = qp.evaluation.prediction(
+        model,
+        protocol=APP(test, n_prevalences=21, repeats=100)
+    )
+
+    mae = qp.error.mae(true_prevalences, estim_prevalences)
+    save_results(dataset_name, model_name, optim_loss, mae)
 
 
 if __name__ == '__main__':
@@ -133,4 +118,14 @@ if __name__ == '__main__':
     models = quantification_models()
     qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
 
-    shutil.rmtree(args.checkpointdir, ignore_errors=True)
+    # open all results and show
+    df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
+    for i, file in enumerate(glob(f'{args.results}/*.pkl')):
+        mae = float(pickle.load(open(file, 'rb'))[0])
+        *dataset, method, _ = file.split('/')[-1].split('-')
+        dataset = '-'.join(dataset)
+        df.loc[i] = [method, dataset, mae]
+
+    print(df.pivot_table(index='dataset', columns='method', values='mae'))
+
+
diff --git a/examples/uci_experiments.py b/examples/uci_experiments.py
index 2cf5bac..09efe5d 100644
--- a/examples/uci_experiments.py
+++ b/examples/uci_experiments.py
@@ -104,7 +104,7 @@ def run(experiment):
                 timeout=60*60,
                 verbose=True
             )
-            model_selection.fit(data.training)
+            model_selection.fit(train)
             model = model_selection.best_model()
             best_params = model_selection.best_params_
         else:
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 6696402..8053d47 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -168,7 +168,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         :param instances: array-like of shape `(n_instances, n_features,)`
         :return: np.ndarray of shape `(n_instances,)` with label predictions
         """
-        return getattr(self, self._classifier_method())(instances)
+        return getattr(self.classifier, self._classifier_method())(instances)
 
     def _classifier_method(self):
         """
@@ -1142,8 +1142,8 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
     def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
         prevs_estim = np.mean(classif_predictions > threshold)
         if tpr - fpr != 0:
-            prevs_estim = np.clip((prevs_estim - fpr) / (tpr - fpr), 0, 1)
-        prevs_estim = np.array((1 - prevs_estim, prevs_estim))
+            prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
+        prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
         return prevs_estim
 
     def _compute_table(self, y, y_):
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 9017b99..307e7d3 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -112,7 +112,7 @@ class GridSearchQ(BaseQuantifier):
             return predictions
 
         predictions, status, took = self._error_handler(job, cls_params)
-        self._sout(f'[classifier fit] hyperparams={cls_params} status={status} [took {took:.3f}s]')
+        self._sout(f'[classifier fit] hyperparams={cls_params} [took {took:.3f}s]')
         return model, predictions, status, took
 
     def _prepare_aggregation(self, args):

From c0d92a2083cc961f42702815278a5688d51eaab9 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 18 Jan 2024 18:22:22 +0100
Subject: [PATCH 18/22] optimization threshold variants fixed

---
 ..._checking_optim_threshold_modifications.py | 15 +++++++-----
 quapy/functional.py                           |  5 ++--
 quapy/method/aggregative.py                   | 24 ++++++++++++-------
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/examples/_uci_experiments_checking_optim_threshold_modifications.py b/examples/_uci_experiments_checking_optim_threshold_modifications.py
index e5ae184..79f7208 100644
--- a/examples/_uci_experiments_checking_optim_threshold_modifications.py
+++ b/examples/_uci_experiments_checking_optim_threshold_modifications.py
@@ -15,7 +15,7 @@ import itertools
 import argparse
 from glob import glob
 import pandas as pd
-
+from time import time
 
 N_JOBS = -1
 
@@ -38,10 +38,11 @@ svmperf_params = {'classifier__C': __C_range}
 def quantification_models():
     yield 'acc', ACC(newLR()), lr_params
     yield 'T50', T50(newLR()), lr_params
-    #yield 'X', X(newLR()), lr_params
-    #yield 'MAX', MAX(newLR()), lr_params
+    yield 'X', X(newLR()), lr_params
+    yield 'MAX', MAX(newLR()), lr_params
     yield 'MS', MS(newLR()), lr_params
-    yield 'MS2', MS2(newLR()), lr_params
+    yield 'MS+', MS(newLR()), lr_params
+    # yield 'MS2', MS2(newLR()), lr_params
 
 
 
@@ -115,8 +116,10 @@ if __name__ == '__main__':
     optim_losses = ['mae']
     datasets = qp.datasets.UCI_DATASETS
 
+    tstart = time()
     models = quantification_models()
     qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
+    tend = time()
 
     # open all results and show
     df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
@@ -126,6 +129,6 @@ if __name__ == '__main__':
         dataset = '-'.join(dataset)
         df.loc[i] = [method, dataset, mae]
 
-    print(df.pivot_table(index='dataset', columns='method', values='mae'))
-
+    print(df.pivot_table(index='dataset', columns='method', values='mae', margins=True))
 
+    print(f'took {(tend-tstart)}s')
diff --git a/quapy/functional.py b/quapy/functional.py
index d39b306..c6dc351 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -66,7 +66,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
         return prevalences
 
 
-def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
+def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False):
     """
     Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two
     values representing a binary distribution.
@@ -80,7 +80,8 @@ def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
         positive_prevalence = np.clip(positive_prevalence, 0, 1)
     else:
         assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class'
-    return np.asarray([1-positive_prevalence, positive_prevalence])
+    return np.asarray([1-positive_prevalence, positive_prevalence]).T
+
 
 
 def HellingerDistance(P, Q) -> float:
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 8053d47..066f480 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1102,7 +1102,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
         :param fpr: float, false positive rate
         :return: true if the combination is to be discarded, false otherwise
         """
-        return (tpr + fpr) == 0
+        return (tpr - fpr) == 0
 
 
     def _eval_candidate_thresholds(self, decision_scores, y):
@@ -1119,9 +1119,9 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
         candidates = []
         scores = []
         for candidate_threshold in candidate_thresholds:
-            y_ = self.classes_[1 * (decision_scores > candidate_threshold)]
+            y_ = self.classes_[1 * (decision_scores >= candidate_threshold)]
             TP, FP, FN, TN = self._compute_table(y, y_)
-            tpr = self._compute_tpr(TP, FP)
+            tpr = self._compute_tpr(TP, FN)
             fpr = self._compute_fpr(FP, TN)
             if not self.discard(tpr, fpr):
                 candidate_score = self.condition(tpr, fpr)
@@ -1139,12 +1139,18 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
 
         return candidates
 
-    def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
-        prevs_estim = np.mean(classif_predictions > threshold)
-        if tpr - fpr != 0:
-            prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
-        prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
-        return prevs_estim
+    # def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
+    #     prevs_estim = np.mean(classif_predictions >= threshold)
+    #     if tpr - fpr != 0:
+    #         prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
+    #     prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
+    #     return prevs_estim
+
+    def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
+        prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
+        prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
+        prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True)
+        return prevs_estims.squeeze()
 
     def _compute_table(self, y, y_):
         TP = np.logical_and(y == y_, y == self.pos_label).sum()

From b68b58ad113bd9cb54c8aa73df2be7d35aaa4911 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 18 Jan 2024 18:26:40 +0100
Subject: [PATCH 19/22] fixed optimization threshold methods (again)

---
 quapy/method/aggregative.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 066f480..c6293c9 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1278,16 +1278,21 @@ class MS(ThresholdOptimization):
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         # keeps all candidates
         decision_scores, y = classif_predictions.Xy
-        self.tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y)
+        tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y)
+        self.tprs = tprs_fprs_thresholds[:, 0]
+        self.fprs = tprs_fprs_thresholds[:, 1]
+        self.thresholds = tprs_fprs_thresholds[:, 2]
         return self
 
     def aggregate(self, classif_predictions: np.ndarray):
-        prevalences = []
-        for tpr, fpr, threshold in self.tprs_fprs_thresholds:
-            pos_prev = self.aggregate_with_threshold(classif_predictions, tpr, fpr, threshold)[1]
-            prevalences.append(pos_prev)
-        median = np.median(prevalences)
-        return F.as_binary_prevalence(median)
+        prevalences = self.aggregate_with_threshold(classif_predictions, self.tprs, self.fprs, self.thresholds)
+        return np.median(prevalences, axis=0)
+        # prevalences = []
+        # for tpr, fpr, threshold in self.tprs_fprs_thresholds:
+        #     pos_prev = self.aggregate_with_threshold(classif_predictions, tpr, fpr, threshold)[1]
+        #     prevalences.append(pos_prev)
+        # median = np.median(prevalences)
+        # return F.as_binary_prevalence(median)
 
 
 class MS2(MS):

From 8d22ba39f41fb82d8d08c3a45504cc0ae2651b28 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 19 Jan 2024 18:11:22 +0100
Subject: [PATCH 20/22] method MS2 (Medium Sweep 2) fixed

---
 ..._checking_optim_threshold_modifications.py | 134 ------------------
 quapy/method/aggregative.py                   |  22 +--
 2 files changed, 6 insertions(+), 150 deletions(-)
 delete mode 100644 examples/_uci_experiments_checking_optim_threshold_modifications.py

diff --git a/examples/_uci_experiments_checking_optim_threshold_modifications.py b/examples/_uci_experiments_checking_optim_threshold_modifications.py
deleted file mode 100644
index 79f7208..0000000
--- a/examples/_uci_experiments_checking_optim_threshold_modifications.py
+++ /dev/null
@@ -1,134 +0,0 @@
-from copy import deepcopy
-
-import quapy as qp
-from sklearn.calibration import CalibratedClassifierCV
-from sklearn.linear_model import LogisticRegression
-from quapy.classification.methods import LowRankLogisticRegression
-from quapy.method.meta import QuaNet
-from quapy.protocol import APP
-from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, HDy, newSVMAE, T50, X
-from quapy.method.meta import EHDy
-import numpy as np
-import os
-import pickle
-import itertools
-import argparse
-from glob import glob
-import pandas as pd
-from time import time
-
-N_JOBS = -1
-
-qp.environ['SAMPLE_SIZE'] = 100
-
-
-def newLR():
-    return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
-
-
-def calibratedLR():
-    return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
-
-
-__C_range = np.logspace(-3, 3, 7)
-lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
-svmperf_params = {'classifier__C': __C_range}
-
-
-def quantification_models():
-    yield 'acc', ACC(newLR()), lr_params
-    yield 'T50', T50(newLR()), lr_params
-    yield 'X', X(newLR()), lr_params
-    yield 'MAX', MAX(newLR()), lr_params
-    yield 'MS', MS(newLR()), lr_params
-    yield 'MS+', MS(newLR()), lr_params
-    # yield 'MS2', MS2(newLR()), lr_params
-
-
-
-def result_path(path, dataset_name, model_name, optim_loss):
-    return os.path.join(path, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
-
-
-def is_already_computed(dataset_name, model_name, optim_loss):
-    return os.path.exists(result_path(args.results, dataset_name, model_name, optim_loss))
-
-
-def save_results(dataset_name, model_name, optim_loss, *results):
-    rpath = result_path(args.results, dataset_name, model_name, optim_loss)
-    qp.util.create_parent_dir(rpath)
-    with open(rpath, 'wb') as foo:
-        pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
-
-
-def run(experiment):
-    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
-    if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
-
-    if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
-        print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
-        return
-
-    dataset = qp.datasets.fetch_UCIDataset(dataset_name)
-
-    print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
-    # model selection (hyperparameter optimization for a quantification-oriented loss)
-    train, test = dataset.train_test
-    train, val = train.split_stratified()
-    if hyperparams is not None:
-        model_selection = qp.model_selection.GridSearchQ(
-            deepcopy(model),
-            param_grid=hyperparams,
-            protocol=APP(val, n_prevalences=21, repeats=25),
-            error=optim_loss,
-            refit=True,
-            timeout=60*60,
-            verbose=True
-        )
-        model_selection.fit(train)
-        model = model_selection.best_model()
-    else:
-        model.fit(dataset.training)
-
-    # model evaluation
-    true_prevalences, estim_prevalences = qp.evaluation.prediction(
-        model,
-        protocol=APP(test, n_prevalences=21, repeats=100)
-    )
-
-    mae = qp.error.mae(true_prevalences, estim_prevalences)
-    save_results(dataset_name, model_name, optim_loss, mae)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
-    parser.add_argument('--results', metavar='RESULT_PATH', type=str, default='results_tmp',
-                        help='path to the directory where to store the results')
-    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
-                        help='path to the directory with svmperf')
-    args = parser.parse_args()
-
-    print(f'Result folder: {args.results}')
-    np.random.seed(0)
-
-    qp.environ['SVMPERF_HOME'] = args.svmperfpath
-
-    optim_losses = ['mae']
-    datasets = qp.datasets.UCI_DATASETS
-
-    tstart = time()
-    models = quantification_models()
-    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
-    tend = time()
-
-    # open all results and show
-    df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
-    for i, file in enumerate(glob(f'{args.results}/*.pkl')):
-        mae = float(pickle.load(open(file, 'rb'))[0])
-        *dataset, method, _ = file.split('/')[-1].split('-')
-        dataset = '-'.join(dataset)
-        df.loc[i] = [method, dataset, mae]
-
-    print(df.pivot_table(index='dataset', columns='method', values='mae', margins=True))
-
-    print(f'took {(tend-tstart)}s')
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index c6293c9..c3e24db 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1131,21 +1131,15 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
         if len(candidates) == 0:
             # if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard
             # classify & count; this is akin to assign tpr=1, fpr=0, threshold=0
-            tpr, fpr, threshold, score = 1, 0, 0, 0
-            candidates.append([tpr, fpr, threshold, score])
+            tpr, fpr, threshold = 1, 0, 0
+            candidates.append([tpr, fpr, threshold])
+            scores.append(0)
 
         candidates = np.asarray(candidates)
         candidates = candidates[np.argsort(scores)]  # sort candidates by candidate_score
 
         return candidates
 
-    # def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
-    #     prevs_estim = np.mean(classif_predictions >= threshold)
-    #     if tpr - fpr != 0:
-    #         prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
-    #     prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
-    #     return prevs_estim
-
     def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
         prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
         prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
@@ -1286,13 +1280,9 @@ class MS(ThresholdOptimization):
 
     def aggregate(self, classif_predictions: np.ndarray):
         prevalences = self.aggregate_with_threshold(classif_predictions, self.tprs, self.fprs, self.thresholds)
-        return np.median(prevalences, axis=0)
-        # prevalences = []
-        # for tpr, fpr, threshold in self.tprs_fprs_thresholds:
-        #     pos_prev = self.aggregate_with_threshold(classif_predictions, tpr, fpr, threshold)[1]
-        #     prevalences.append(pos_prev)
-        # median = np.median(prevalences)
-        # return F.as_binary_prevalence(median)
+        if prevalences.ndim==2:
+            prevalences = np.median(prevalences, axis=0)
+        return prevalences
 
 
 class MS2(MS):

From 7137e7ac401798202439e3b741894738c18b853c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 19 Jan 2024 18:18:38 +0100
Subject: [PATCH 21/22] updating change log file to give credit to T.Schumacher
 and colleagues for pointing out the errors in the threshold optimization
 methods

---
 quapy/CHANGE_LOG.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt
index 1534038..43f4fdf 100644
--- a/quapy/CHANGE_LOG.txt
+++ b/quapy/CHANGE_LOG.txt
@@ -1,6 +1,9 @@
 Change Log 0.1.8
 ----------------
 
+- Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
+    this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of 
+    quantification methods. arXiv:2103.03223v3 [cs.LG]"
 - Added HDx and DistributionMatchingX to non-aggregative quantifiers (see also the new example "comparing_HDy_HDx.py")
 - New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
     to the following criteria:

From ff00de18cbf0396c8d1ad2f12bb96bb3fe12fb2b Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 19 Jan 2024 18:24:38 +0100
Subject: [PATCH 22/22] updating documentation a bit

---
 quapy/method/aggregative.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index c3e24db..b7b7409 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1141,6 +1141,8 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
         return candidates
 
     def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
+        # This function performs the adjusted count for given tpr, fpr, and threshold.
+        # Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length > 1
         prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
         prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
         prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True)
@@ -1164,8 +1166,8 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
         return FP / (FP + TN)
 
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        # the standard behavior is to keep the best threshold only
         decision_scores, y = classif_predictions.Xy
+        # the standard behavior is to keep the best threshold only
         self.tpr, self.fpr, self.threshold = self._eval_candidate_thresholds(decision_scores, y)[0]
         return self
 
@@ -1270,8 +1272,8 @@ class MS(ThresholdOptimization):
         return 1
 
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        # keeps all candidates
         decision_scores, y = classif_predictions.Xy
+        # keeps all candidates
         tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y)
         self.tprs = tprs_fprs_thresholds[:, 0]
         self.fprs = tprs_fprs_thresholds[:, 1]