refactoring aggregation methods

2024-01-25 14:33:41 +01:00 · 2024-01-25 14:33:41 +01:00 · 7ac834bd2c
parent efe385318f
commit 7ac834bd2c
9 changed files with 178 additions and 373 deletions
--- a/README.md
+++ b/README.md
@ -13,7 +13,7 @@ for facilitating the analysis and interpretation of the experimental results.

 ### Last updates:

-* Version 0.1.7 is released! major changes can be consulted [here](quapy/CHANGE_LOG.txt).
+* Version 0.1.8 is released! major changes can be consulted [here](quapy/CHANGE_LOG.txt).
 * A detailed documentation is now available [here](https://hlt-isti.github.io/QuaPy/)
 * The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/modules.html)

@ -96,6 +96,9 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
 * pandas, xlrd
 * matplotlib

+## Contributing
+
+In case you want to contribute improvements to quapy, please generate pull request to the "devel" branch.
  
 ## Documentation

--- a/examples/model_selection.py
+++ b/examples/model_selection.py
@ -1,5 +1,5 @@
 import quapy as qp
-from method.kdey import KDEyML
+from method._kdey import KDEyML
 from quapy.method.non_aggregative import DMx
 from quapy.protocol import APP, UPP
 from quapy.method.aggregative import DMy
--- a/quapy/CHANGE_LOG.txt
+++ b/quapy/CHANGE_LOG.txt
@ -4,15 +4,20 @@ Change Log 0.1.8
 - Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
    this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of 
    quantification methods. arXiv:2103.03223v3 [cs.LG]"
+    
 - Added HDx and DistributionMatchingX to non-aggregative quantifiers (see also the new example "comparing_HDy_HDx.py")
+
 - New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
    to the following criteria:
        - >1000 instances
        - >2 classes
        - classification datasets
        - Python API available
+
 - New IFCB (plankton) dataset added. See fetch_IFCB.
+
 - Added new evaluation measures NAE, NRAE
+
 - Added new meta method "MedianEstimator"; an ensemble of binary base quantifiers that receives as input a dictionary
    of hyperparameters that will explore exhaustively, fitting and generating predictions for each combination of
    hyperparameters, and that returns, as the prevalence estimates, the median across all predictions.
--- a/quapy/method/init.py
+++ b/quapy/method/init.py
@ -17,6 +17,9 @@ AGGREGATIVE_METHODS = {
    aggregative.MAX,
    aggregative.MS,
    aggregative.MS2,
+    aggregative.KDEyML,
+    aggregative.KDEyCS,
+    aggregative.KDEyHD,
 }


--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@ -12,6 +12,9 @@ from sklearn.metrics.pairwise import rbf_kernel


 class KDEBase:
+    """
+    Common ancestor for KDE-based methods. Implements some common routines.
+    """

    BANDWIDTH_METHOD = ['scott', 'silverman']

@ -156,7 +159,6 @@ class KDEyCS(AggregativeSoftQuantifier):
        assert all(sorted(np.unique(y)) == np.arange(n)), \
            'label name gaps not allowed in current implementation'

-
        # counts_inv keeps track of the relative weight of each datapoint within its class
        # (i.e., the weight in its KDE model)
        counts_inv = 1 / (data.counts())
@ -190,7 +192,6 @@ class KDEyCS(AggregativeSoftQuantifier):
        Minv = (1/M) # t in the paper
        n = Ptr.shape[1]

-
        # becomes a constant that does not affect the optimization, no need to compute it
        # partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv)

--- a/quapy/method/_neural.py
+++ b/quapy/method/_neural.py
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -50,6 +50,16 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
                  'model selection. Rather pass the LabelledCollection at fit time')
        self.val_split_ = val_split

+    def _check_init_parameters(self):
+        """
+        Implements any check to be performed in the parameters of the init method before undertaking
+        the training of the quantifier. This is made as to allow for a quick execution stop when the
+        parameters are not valid.
+
+        :return: Nothing. May raise an exception.
+        """
+        pass
+
    def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
        """
        Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
@ -59,6 +69,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
            learner has been trained outside the quantifier.
        :return: self
        """
+        self._check_init_parameters()
        classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
        self.aggregation_fit(classif_predictions, data)
        return self
@ -113,8 +124,9 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
                    raise ValueError(f'invalid value {predict_on} in fit. '
                                     f'Specify a integer >1 for kFCV estimation.')
                else:
+                    n_jobs = self.n_jobs if hasattr(self, 'n_jobs') else qp._get_njobs(None)
                    predictions = cross_val_predict(
-                        self.classifier, *data.Xy, cv=predict_on, n_jobs=self.n_jobs, method=self._classifier_method())
+                        self.classifier, *data.Xy, cv=predict_on, n_jobs=n_jobs, method=self._classifier_method())
                    predictions = LabelledCollection(predictions, data.y, classes=data.classes_)
                    self.classifier.fit(*data.Xy)
            else:
@ -291,8 +303,6 @@ class BinaryAggregativeQuantifier(AggregativeQuantifier, BinaryQuantifier):
        return super().fit(data, fit_classifier, val_split)


-
-
 # Methods
 # ------------------------------------
 class CC(AggregativeCrispQuantifier):
@ -333,18 +343,28 @@ class ACC(AggregativeCrispQuantifier):
    :param classifier: a sklearn's Estimator that generates a classifier
    :param val_split: specifies the data used for generating classifier predictions. This specification
        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
-        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`); or as a collection defining the specific set of data to use for validation.
        Alternatively, this set can be specified at fit time by indicating the exact set of data
        on which the predictions are to be generated.
    :param n_jobs: number of parallel workers
+    :param solver: indicates the method to be used for obtaining the final esimates. The default choice
+        is 'exact', which comes down to solving the system of linear equations `Ax=B` where `A` is a 
+        matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in 
+        binary) and `B` is the vector of prevalence values estimated via CC, as $x=A^{-1}B$. This solution 
+        might not exist for degenerated classifiers, in which case the method defaults to classify and count 
+        (i.e., does not attempt any adjustment).
+        Another option is to search for the prevalence vector that minimizes the loss |Ax-B|. The latter is
+        achieved by indicating solver='minimize'.
    """

-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='exact'):
        self.classifier = classifier
        self.val_split = val_split
        self.n_jobs = qp._get_njobs(n_jobs)
+        assert solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
+        self.solver = solver

    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
        """
@ -358,7 +378,7 @@ class ACC(AggregativeCrispQuantifier):

    @classmethod
    def getPteCondEstim(cls, classes, y, y_):
-        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
+        # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        conf = confusion_matrix(y, y_, labels=classes).T
        conf = conf.astype(float)
@ -372,10 +392,10 @@ class ACC(AggregativeCrispQuantifier):

    def aggregate(self, classif_predictions):
        prevs_estim = self.cc.aggregate(classif_predictions)
-        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)

    @classmethod
-    def solve_adjustment(cls, PteCondEstim, prevs_estim):
+    def solve_adjustment(cls, PteCondEstim, prevs_estim, solver='exact'):
        """
        Solves the system linear system :math:`Ax = B` with :math:`A` = `PteCondEstim` and :math:`B` = `prevs_estim`

@ -383,16 +403,24 @@ class ACC(AggregativeCrispQuantifier):
            of :math:`P(y_i|y_j)`, that is, the probability that an instance that belongs to :math:`y_j` ends up being
            classified as belonging to :math:`y_i`
        :param prevs_estim: a `np.ndarray` of shape `(n_classes,)` with the class prevalence estimates
+        :param solver: indicates the method to use for solving the system of linear equations. Valid options are
+             'exact' (tries to solve the system --may fail if the misclassificatin matrix has rank < n_classes) or
+             'optim_minimize' (minimizes a norm --always exists). 
        :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates
        """
        A = PteCondEstim
        B = prevs_estim
-        try:
-            adjusted_prevs = np.linalg.solve(A, B)
-            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
-            adjusted_prevs /= adjusted_prevs.sum()
-        except np.linalg.LinAlgError:
-            adjusted_prevs = prevs_estim  # no way to adjust them!
+        if solver == 'exact':
+            try:
+                adjusted_prevs = np.linalg.solve(A, B)
+                adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
+                adjusted_prevs /= adjusted_prevs.sum()
+            except np.linalg.LinAlgError:
+                adjusted_prevs = prevs_estim  # no way to adjust them!
+        elif solver == 'minimize':
+            def loss(prev):
+                return np.linalg.norm(A@prev - B)
+            return F.optim_minimize(loss, n_classes=A.shape[0])
        return adjusted_prevs


@ -427,7 +455,7 @@ class PACC(AggregativeSoftQuantifier):
    :param classifier: a sklearn's Estimator that generates a classifier
    :param val_split: specifies the data used for generating classifier predictions. This specification
        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
-        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data
        on which the predictions are to be generated.
@ -455,7 +483,7 @@ class PACC(AggregativeSoftQuantifier):

    @classmethod
    def getPteCondEstim(cls, classes, y, y_):
-        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
+        # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        n_classes = len(classes)
        confusion = np.eye(n_classes)
@ -475,17 +503,100 @@ class EMQ(AggregativeSoftQuantifier):
    probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via
    maximum-likelihood estimation, in a mutually recursive way, until convergence.

+    This implementation also gives access to the heuristics proposed by `Alexandari et al. paper
+    <http://proceedings.mlr.press/v119/alexandari20a.html>`_. These heuristics consist of using, as the training
+    prevalence, an estimate of it obtained via k-fold cross validation (instead of the true training prevalence),
+    and to recalibrate the posterior probabilities of the classifier.
+
    :param classifier: a sklearn's Estimator that generates a classifier
+    :param val_split: specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set; or as an integer, indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`, default 5); or as a collection defining the specific set of data to use for validation.
+        Alternatively, this set can be specified at fit time by indicating the exact set of data
+        on which the predictions are to be generated. This hyperparameter is only meant to be used when the
+        heuristics are to be applied, i.e., if a recalibration is required. The default value is None (meaning
+        the recalibration is not required). In case this hyperparameter is set to a value other than None, but
+        the recalibration is not required (recalib=None), a warning message will be raised.
+    :param exact_train_prev: set to True (default) for using the true training prevalence as the initial observation;
+        set to False for computing the training prevalence as an estimate of it, i.e., as the expected
+        value of the posterior probabilities of the training instances.
+    :param recalib: a string indicating the method of recalibration.
+        Available choices include "nbvs" (No-Bias Vector Scaling), "bcts" (Bias-Corrected Temperature Scaling,
+        default), "ts" (Temperature Scaling), and "vs" (Vector Scaling). Default is None (no recalibration).
+    :param n_jobs: number of parallel workers. Only used for recalibrating the classifier if `val_split` is set to
+        an integer `k` --the number of folds.
    """

    MAX_ITER = 1000
    EPSILON = 1e-4

-    def __init__(self, classifier: BaseEstimator):
+    def __init__(self, classifier: BaseEstimator, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None):
        self.classifier = classifier
+        self.val_split = val_split
+        self.exact_train_prev = exact_train_prev
+        self.recalib = recalib
+        self.n_jobs = n_jobs
+
+    @classmethod
+    def EMQ_BCTS(cls, classifier: BaseEstimator, n_jobs=None):
+        """
+        Constructs an instance of EMQ using the best configuration found in the `Alexandari et al. paper
+        <http://proceedings.mlr.press/v119/alexandari20a.html>`_, i.e., one that relies on Bias-Corrected Temperature
+        Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of
+        the true training prevalence.
+
+        :param classifier: a sklearn's Estimator that generates a classifier
+        :param n_jobs: number of parallel workers.
+        :return: An instance of EMQ with BCTS
+        """
+        return EMQ(classifier, val_split=5, exact_train_prev=False, recalib='bcts', n_jobs=n_jobs)
+
+    def _check_init_parameters(self):
+        if self.val_split is not None:
+            if self.exact_train_prev and self.recalib is None:
+                raise RuntimeWarning(f'The parameter {self.val_split=} was specified for EMQ, while the parameters '
+                      f'{self.exact_train_prev=} and {self.recalib=}. This has no effect and causes an unnecessary '
+                      f'overload.')
+
+    def classify(self, instances):
+        """
+        Provides the posterior probabilities for the given instances. If the classifier was required
+        to be recalibrated, then these posteriors are recalibrated accordingly.
+
+        :param instances: array-like of shape `(n_instances, n_dimensions,)`
+        :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities
+        """
+        posteriors = self.classifier.predict_proba(instances)
+        if hasattr(self, 'calibration_function') and self.calibration_function is not None:
+            posteriors = self.calibration_function(posteriors)
+        return posteriors

    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        self.train_prevalence = data.prevalence()
+        if self.recalib is not None:
+            P, y = classif_predictions.Xy
+            if self.recalib == 'nbvs':
+                calibrator = NoBiasVectorScaling()
+            elif self.recalib == 'bcts':
+                calibrator = TempScaling(bias_positions='all')
+            elif self.recalib == 'ts':
+                calibrator = TempScaling()
+            elif self.recalib == 'vs':
+                calibrator = VectorScaling()
+            else:
+                raise ValueError('invalid param argument for recalibration method; available ones are '
+                                 '"nbvs", "bcts", "ts", and "vs".')
+
+            self.calibration_function = calibrator(P, np.eye(data.n_classes)[y], posterior_supplied=True)
+
+        if self.exact_train_prev:
+            self.train_prevalence = data.prevalence()
+        else:
+            train_posteriors = classif_predictions.X
+            if self.recalib is not None:
+                train_posteriors = self.calibration_function(train_posteriors)
+            self.train_prevalence = F.prevalence_from_probabilities(train_posteriors)

    def aggregate(self, classif_posteriors, epsilon=EPSILON):
        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
@ -542,93 +653,6 @@ class EMQ(AggregativeSoftQuantifier):
        return qs, ps


-class EMQrecalib(AggregativeSoftQuantifier):
-    """
-    `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
-    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
-    `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_.
-
-    These heuristics consist of using, as the training prevalence, an estimate of it obtained via k-fold cross
-    validation (instead of the true training prevalence), and to recalibrate the posterior probabilities of
-    the classifier.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: specifies the data used for generating classifier predictions. This specification
-        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
-        be extracted from the training set (default 0.4); or as an integer, indicating that the predictions
-        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
-        for `k`, default 5); or as a collection defining the specific set of data to use for validation.
-        Alternatively, this set can be specified at fit time by indicating the exact set of data
-        on which the predictions are to be generated.
-    :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence;
-        or set to False for computing the training prevalence as an estimate of it, i.e., as the expected
-        value of the posterior probabilities of the training instances
-    :param recalib: a string indicating the method of recalibration.
-        Available choices include "nbvs" (No-Bias Vector Scaling), "bcts" (Bias-Corrected Temperature Scaling,
-        default), "ts" (Temperature Scaling), and "vs" (Vector Scaling).
-    :param n_jobs: number of parallel workers
-    """
-
-    MAX_ITER = 1000
-    EPSILON = 1e-4
-
-    def __init__(self, classifier: BaseEstimator, val_split=5, exact_train_prev=False, recalib='bcts', n_jobs=None):
-        self.classifier = classifier
-        self.val_split = val_split
-        self.exact_train_prev = exact_train_prev
-        self.recalib = recalib
-        self.n_jobs = n_jobs
-
-    def classify(self, instances):
-        """
-        Provides the posterior probabilities for the given instances. If the classifier is
-        recalibrated, then these posteriors will be recalibrated accordingly.
-
-        :param instances: array-like of shape `(n_instances, n_dimensions,)`
-        :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities
-        """
-        posteriors = self.classifier.predict_proba(instances)
-        if hasattr(self, 'calibration_function') and self.calibration_function is not None:
-            posteriors = self.calibration_function(posteriors)
-        return posteriors
-
-    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        if self.recalib is not None:
-            P, y = classif_predictions.Xy
-            if self.recalib == 'nbvs':
-                calibrator = NoBiasVectorScaling()
-            elif self.recalib == 'bcts':
-                calibrator = TempScaling(bias_positions='all')
-            elif self.recalib == 'ts':
-                calibrator = TempScaling()
-            elif self.recalib == 'vs':
-                calibrator = VectorScaling()
-            else:
-                raise ValueError('invalid param argument for recalibration method; available ones are '
-                                 '"nbvs", "bcts", "ts", and "vs".')
-
-            self.calibration_function = calibrator(P, np.eye(data.n_classes)[y], posterior_supplied=True)
-
-        if self.exact_train_prev:
-            self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
-        else:
-            if self.recalib is not None:
-                train_posteriors = self.classify(data.X)
-            else:
-                train_posteriors = classif_predictions.X
-
-            self.train_prevalence = np.mean(train_posteriors, axis=0)
-
-    def aggregate(self, classif_posteriors, epsilon=EPSILON):
-        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon)
-        return priors
-
-    def predict_proba(self, instances, epsilon=EPSILON):
-        classif_posteriors = self.classify(instances)
-        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon)
-        return posteriors
-
-
 class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
    """
    `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
@ -722,14 +746,16 @@ class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
    :param divergence: a str indicating the name of divergence (currently supported ones are "HD" or "topsoe"), or a
        callable function computes the divergence between two distributions (two equally sized arrays).
    :param tol: a float with the tolerance for the ternary search algorithm.
+    :param n_jobs: number of parallel workers.
    """

-    def __init__(self, classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05, n_jobs=None):
        self.classifier = classifier
        self.val_split = val_split
        self.tol = tol
        self.divergence = divergence
        self.n_bins = n_bins
+        self.n_jobs = n_jobs

    def _ternary_search(self, f, left, right, tol):
        """
@ -1058,259 +1084,6 @@ def newSVMRAE(svmperf_base=None, C=1):
    return newELM(svmperf_base, loss='mrae', C=C)


-class ThresholdOptimization(BinaryAggregativeQuantifier):
-    """
-    Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_.
-    The goal is to bring improved stability to the denominator of the adjustment.
-    The different variants are based on different heuristics for choosing a decision threshold
-    that would allow for more true positives and many more false positives, on the grounds this
-    would deliver larger denominators.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-
-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
-        self.classifier = classifier
-        self.val_split = val_split
-        self.n_jobs = qp._get_njobs(n_jobs)
-
-    @abstractmethod
-    def condition(self, tpr, fpr) -> float:
-        """
-        Implements the criterion according to which the threshold should be selected.
-        This function should return the (float) score to be minimized.
-
-        :param tpr: float, true positive rate
-        :param fpr: float, false positive rate
-        :return: float, a score for the given `tpr` and `fpr`
-        """
-        ...
-
-    def discard(self, tpr, fpr) -> bool:
-        """
-        Indicates whether a combination of tpr and fpr should be discarded
-
-        :param tpr: float, true positive rate
-        :param fpr: float, false positive rate
-        :return: true if the combination is to be discarded, false otherwise
-        """
-        return (tpr - fpr) == 0
-
-
-    def _eval_candidate_thresholds(self, decision_scores, y):
-        """
-        Seeks for the best `tpr` and `fpr` according to the score obtained at different
-        decision thresholds. The scoring function is implemented in function `_condition`.
-
-        :param decision_scores: array-like with the classification scores
-        :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)
-        :return: best `tpr` and `fpr` and `threshold` according to `_condition`
-        """
-        candidate_thresholds = np.unique(decision_scores)
-
-        candidates = []
-        scores = []
-        for candidate_threshold in candidate_thresholds:
-            y_ = self.classes_[1 * (decision_scores >= candidate_threshold)]
-            TP, FP, FN, TN = self._compute_table(y, y_)
-            tpr = self._compute_tpr(TP, FN)
-            fpr = self._compute_fpr(FP, TN)
-            if not self.discard(tpr, fpr):
-                candidate_score = self.condition(tpr, fpr)
-                candidates.append([tpr, fpr, candidate_threshold])
-                scores.append(candidate_score)
-
-        if len(candidates) == 0:
-            # if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard
-            # classify & count; this is akin to assign tpr=1, fpr=0, threshold=0
-            tpr, fpr, threshold = 1, 0, 0
-            candidates.append([tpr, fpr, threshold])
-            scores.append(0)
-
-        candidates = np.asarray(candidates)
-        candidates = candidates[np.argsort(scores)]  # sort candidates by candidate_score
-
-        return candidates
-
-    def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
-        # This function performs the adjusted count for given tpr, fpr, and threshold.
-        # Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length > 1
-        prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
-        prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
-        prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True)
-        return prevs_estims.squeeze()
-
-    def _compute_table(self, y, y_):
-        TP = np.logical_and(y == y_, y == self.pos_label).sum()
-        FP = np.logical_and(y != y_, y == self.neg_label).sum()
-        FN = np.logical_and(y != y_, y == self.pos_label).sum()
-        TN = np.logical_and(y == y_, y == self.neg_label).sum()
-        return TP, FP, FN, TN
-
-    def _compute_tpr(self, TP, FP):
-        if TP + FP == 0:
-            return 1
-        return TP / (TP + FP)
-
-    def _compute_fpr(self, FP, TN):
-        if FP + TN == 0:
-            return 0
-        return FP / (FP + TN)
-
-    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        decision_scores, y = classif_predictions.Xy
-        # the standard behavior is to keep the best threshold only
-        self.tpr, self.fpr, self.threshold = self._eval_candidate_thresholds(decision_scores, y)[0]
-        return self
-
-    def aggregate(self, classif_predictions: np.ndarray):
-        # the standard behavior is to compute the adjusted count using the best threshold found
-        return self.aggregate_with_threshold(classif_predictions, self.tpr, self.fpr, self.threshold)
-
-
-class T50(ThresholdOptimization):
-    """
-    Threshold Optimization variant for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks
-    for the threshold that makes `tpr` closest to 0.5.
-    The goal is to bring improved stability to the denominator of the adjustment.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-
-    def __init__(self, classifier: BaseEstimator, val_split=5):
-        super().__init__(classifier, val_split)
-
-    def condition(self, tpr, fpr) -> float:
-        return abs(tpr - 0.5)
-
-
-class MAX(ThresholdOptimization):
-    """
-    Threshold Optimization variant for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks
-    for the threshold that maximizes `tpr-fpr`.
-    The goal is to bring improved stability to the denominator of the adjustment.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-
-    def __init__(self, classifier: BaseEstimator, val_split=5):
-        super().__init__(classifier, val_split)
-
-    def condition(self, tpr, fpr) -> float:
-        # MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)
-        return (fpr - tpr)
-
-
-class X(ThresholdOptimization):
-    """
-    Threshold Optimization variant for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks
-    for the threshold that yields `tpr=1-fpr`.
-    The goal is to bring improved stability to the denominator of the adjustment.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-
-    def __init__(self, classifier: BaseEstimator, val_split=5):
-        super().__init__(classifier, val_split)
-
-    def condition(self, tpr, fpr) -> float:
-        return abs(1 - (tpr + fpr))
-
-
-class MS(ThresholdOptimization):
-    """
-    Median Sweep. Threshold Optimization variant for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates
-    class prevalence estimates for all decision thresholds and returns the median of them all.
-    The goal is to bring improved stability to the denominator of the adjustment.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-    def __init__(self, classifier: BaseEstimator, val_split=5):
-        super().__init__(classifier, val_split)
-
-    def condition(self, tpr, fpr) -> float:
-        return 1
-
-    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
-        decision_scores, y = classif_predictions.Xy
-        # keeps all candidates
-        tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y)
-        self.tprs = tprs_fprs_thresholds[:, 0]
-        self.fprs = tprs_fprs_thresholds[:, 1]
-        self.thresholds = tprs_fprs_thresholds[:, 2]
-        return self
-
-    def aggregate(self, classif_predictions: np.ndarray):
-        prevalences = self.aggregate_with_threshold(classif_predictions, self.tprs, self.fprs, self.thresholds)
-        if prevalences.ndim==2:
-            prevalences = np.median(prevalences, axis=0)
-        return prevalences
-
-
-class MS2(MS):
-    """
-    Median Sweep 2. Threshold Optimization variant for :class:`ACC` as proposed by
-    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
-    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates
-    class prevalence estimates for all decision thresholds and returns the median of for cases in
-    which `tpr-fpr>0.25`
-    The goal is to bring improved stability to the denominator of the adjustment.
-
-    :param classifier: a sklearn's Estimator that generates a classifier
-    :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the
-        misclassification rates are to be estimated.
-        This parameter can be indicated as a real value (between 0 and 1), representing a proportion of
-        validation data, or as an integer, indicating that the misclassification rates should be estimated via
-        `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
-        :class:`quapy.data.base.LabelledCollection` (the split itself).
-    """
-    def __init__(self, classifier: BaseEstimator, val_split=5):
-        super().__init__(classifier, val_split)
-
-    def discard(self, tpr, fpr) -> bool:
-        return (tpr-fpr) <= 0.25
-
-
 class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
    """
    Allows any binary quantifier to perform quantification on single-label datasets.
@ -1476,6 +1249,26 @@ class AggregativeMedianEstimator(BinaryQuantifier):
        )
        return np.median(prev_preds, axis=0)

+
+#---------------------------------------------------------------
+# imports
+#---------------------------------------------------------------
+
+from . import _threshold_optim
+
+T50 = _threshold_optim.T50
+MAX = _threshold_optim.MAX
+X   = _threshold_optim.X
+MS  = _threshold_optim.MS
+MS2 = _threshold_optim.MS2
+
+
+from . import _kdey
+
+KDEyML = _kdey.KDEyML
+KDEyHD = _kdey.KDEyHD
+KDEyCS = _kdey.KDEyCS
+
 #---------------------------------------------------------------
 # aliases
 #---------------------------------------------------------------
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@ -15,13 +15,13 @@ from quapy.method.base import BaseQuantifier, BinaryQuantifier
 from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier

 try:
-    from . import neural
+    from . import _neural
 except ModuleNotFoundError:
-    neural = None
+    _neural = None


-if neural:
-    QuaNet = neural.QuaNetTrainer
+if _neural:
+    QuaNet = _neural.QuaNetTrainer
 else:
    QuaNet = "QuaNet is not available due to missing torch package"

--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@ -3,6 +3,7 @@ import pytest
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import LinearSVC

+import method.aggregative
 import quapy as qp
 from quapy.model_selection import GridSearchQ
 from quapy.method.base import BinaryQuantifier
@ -13,8 +14,8 @@ from quapy.protocol import APP
 from quapy.method.aggregative import DMy
 from quapy.method.meta import MedianEstimator

-datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'),
-            pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
+# datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'),
+#             pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]

 tinydatasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True).reduce(), id='tiny_hcr'),
            pytest.param(qp.datasets.fetch_UCIDataset('ionosphere').reduce(), id='tiny_ionosphere')]
@ -22,7 +23,7 @@ tinydatasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True).reduc
 learners = [LogisticRegression, LinearSVC]


-@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('dataset', tinydatasets)
@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS)
@pytest.mark.parametrize('learner', learners)
 def test_aggregative_methods(dataset: Dataset, aggregative_method, learner):
@ -42,7 +43,7 @@ def test_aggregative_methods(dataset: Dataset, aggregative_method, learner):
    assert type(error) == np.float64


-@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('dataset', tinydatasets)
@pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS)
 def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method):
    model = non_aggregative_method()
@ -61,7 +62,7 @@ def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method):
    assert type(error) == np.float64


-@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS)
+@pytest.mark.parametrize('base_method', [method.aggregative.ACC, method.aggregative.PACC])
@pytest.mark.parametrize('learner', [LogisticRegression])
@pytest.mark.parametrize('dataset', tinydatasets)
@pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
@ -93,7 +94,6 @@ def test_quanet_method():
        print('skipping QuaNet test due to missing torch package')
        return

-
    qp.environ['SAMPLE_SIZE'] = 100

    # load the kindle dataset as text, and convert words to numerical indexes