adding the approximate solution to ACC and PACC as suggested by Mirko Bunse

2024-01-25 16:43:00 +01:00 · 2024-01-25 16:43:00 +01:00 · 74efa9751d
parent 7ac834bd2c
commit 74efa9751d
3 changed files with 47 additions and 10 deletions
--- a/quapy/CHANGE_LOG.txt
+++ b/quapy/CHANGE_LOG.txt
@ -1,6 +1,13 @@
 Change Log 0.1.8
 ----------------

+- Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
+    of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
+    does sometimes not exist. In cases like this, quapy < 0.1.8 resorted to CC for providing a plausible solution.
+    ACC and PACC now resorts to an approximated solution in such cases (minimizing the L2-norm of the difference
+    between Ax-B) as proposed by Mirko Bunse. A quick experiment reveals this heuristic greatly improves the results
+    of ACC and PACC in T2A@LeQua.
+
 - Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
    this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of 
    quantification methods. arXiv:2103.03223v3 [cs.LG]"
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -349,23 +349,25 @@ class ACC(AggregativeCrispQuantifier):
        Alternatively, this set can be specified at fit time by indicating the exact set of data
        on which the predictions are to be generated.
    :param n_jobs: number of parallel workers
-    :param solver: indicates the method to be used for obtaining the final esimates. The default choice
-        is 'exact', which comes down to solving the system of linear equations `Ax=B` where `A` is a 
+    :param solver: indicates the method to be used for obtaining the final estimates. The choice
+        'exact' comes down to solving the system of linear equations `Ax=B` where `A` is a
        matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in 
        binary) and `B` is the vector of prevalence values estimated via CC, as $x=A^{-1}B$. This solution 
        might not exist for degenerated classifiers, in which case the method defaults to classify and count 
        (i.e., does not attempt any adjustment).
        Another option is to search for the prevalence vector that minimizes the loss |Ax-B|. The latter is
-        achieved by indicating solver='minimize'.
+        achieved by indicating solver='minimize'. This one generally works better, and is the default parameter.
    """

-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='exact'):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
        self.classifier = classifier
        self.val_split = val_split
        self.n_jobs = qp._get_njobs(n_jobs)
-        assert solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
        self.solver = solver

+    def _check_init_parameters(self):
+        assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
+
    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
        """
        Estimates the misclassification rates.
@ -408,20 +410,29 @@ class ACC(AggregativeCrispQuantifier):
             'optim_minimize' (minimizes a norm --always exists). 
        :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates
        """
+
        A = PteCondEstim
        B = prevs_estim
+
        if solver == 'exact':
+            # attempts an exact solution of the linear system (may fail)
+
            try:
                adjusted_prevs = np.linalg.solve(A, B)
                adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
                adjusted_prevs /= adjusted_prevs.sum()
            except np.linalg.LinAlgError:
                adjusted_prevs = prevs_estim  # no way to adjust them!
+
+            return adjusted_prevs
+
        elif solver == 'minimize':
+            # poses the problem as an optimization one, and tries to minimize the norm of the differences
+
            def loss(prev):
-                return np.linalg.norm(A@prev - B)
+                return np.linalg.norm(A @ prev - B)
+
            return F.optim_minimize(loss, n_classes=A.shape[0])
-        return adjusted_prevs


 class PCC(AggregativeSoftQuantifier):
@ -462,10 +473,14 @@ class PACC(AggregativeSoftQuantifier):
    :param n_jobs: number of parallel workers
    """

-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
        self.classifier = classifier
        self.val_split = val_split
        self.n_jobs = qp._get_njobs(n_jobs)
+        self.solver = solver
+
+    def _check_init_parameters(self):
+        assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"

    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
        """
@ -479,7 +494,7 @@ class PACC(AggregativeSoftQuantifier):

    def aggregate(self, classif_posteriors):
        prevs_estim = self.pcc.aggregate(classif_posteriors)
-        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)

    @classmethod
    def getPteCondEstim(cls, classes, y, y_):
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -143,6 +143,21 @@ class GridSearchQ(BaseQuantifier):
        self._print_status(params, score, status, took)
        return model, params, score, status, took

+    def _break_down_fit(self):
+        """
+        Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).
+        In order to do so, some conditions should be met: a) the quantifier is of type aggregative,
+        b) the set of hyperparameters can be split into two disjoint non-empty groups.
+
+        :return: True if the conditions are met, False otherwise
+        """
+        if not isinstance(self.model, AggregativeQuantifier):
+            return False
+        cls_configs, q_configs = group_params(self.param_grid)
+        if (len(cls_configs) == 1) or (len(q_configs)==1):
+            return False
+        return True
+
    def _compute_scores_aggregative(self, training):
        # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
        cls_configs, q_configs = group_params(self.param_grid)
@ -214,7 +229,7 @@ class GridSearchQ(BaseQuantifier):
        self.error_collector = []

        self._sout(f'starting model selection with n_jobs={self.n_jobs}')
-        if isinstance(self.model, AggregativeQuantifier):
+        if self._break_down_fit():
            results = self._compute_scores_aggregative(training)
        else:
            results = self._compute_scores_nonaggregative(training)