Add projection onto the probability simplex

2024-03-15 17:06:20 +01:00 · 2024-03-15 17:06:20 +01:00 · 4dd66b1921
parent 020530e14f
commit 4dd66b1921
2 changed files with 54 additions and 4 deletions
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -1,6 +1,6 @@
 import itertools
 from collections import defaultdict
-from typing import Union, Callable
+from typing import Literal, Union, Callable
 import scipy
 import numpy as np
@ -374,4 +374,55 @@ def linear_search(loss, n_classes):
        if min_score is None or score < min_score:
            prev_selected, min_score = prev, score
-    return np.asarray([1 - prev_selected, prev_selected])
+    return np.asarray([1 - prev_selected, prev_selected])
 def _project_onto_probability_simplex(v: np.ndarray) -> np.ndarray:
    """Projects a point onto the probability simplex.
    The code is adapted from Mathieu Blondel's BSD-licensed
    `implementation <https://gist.github.com/mblondel/6f3b7aaad90606b98f71>`_
    which is accompanying the paper
    Mathieu Blondel, Akinori Fujino, and Naonori Ueda.
    Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex,
    ICPR 2014, `URL <http://www.mblondel.org/publications/mblondel-icpr2014.pdf>`_
    :param v: point in n-dimensional space, shape `(n,)`
    :return: projection of `v` onto (n-1)-dimensional probability simplex, shape `(n,)`
    """
    v = np.asarray(v)
    n = len(v)
    # Sort the values in the descending order
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u) - 1.0
    ind = np.arange(1, n + 1)
    cond = u - cssv / ind > 0
    rho = ind[cond][-1]
    theta = cssv[cond][-1] / float(rho)
    return np.maximum(v - theta, 0)
 def clip_prevalence(p: np.ndarray, method: Literal[None, "none", "clip", "project"]) -> np.ndarray:
    """
    Clips the proportions vector `p` so that it is a valid probability distribution.
    :param p: the proportions vector to be clipped, shape `(n_classes,)`
    :param method: the method to use for normalization.
        If `None` or `"none"`, no normalization is performed.
        If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1.
        If `"project"`, the values are projected onto the probability simplex.
    :return: the normalized prevalence vector, shape `(n_classes,)`
    """
    if method is None or method == "none":
        return p
    elif method == "clip":
        adjusted = np.clip(p, 0, 1)
        return adjusted / adjusted.sum()
    elif method == "project":
        return _project_onto_probability_simplex(p)
    else:
        raise ValueError(f"Method {method} not known.")
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -443,8 +443,7 @@ class ACC(AggregativeCrispQuantifier):
            try:
                adjusted_prevs = np.linalg.solve(A, B)
-                adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
+                adjusted_prevs = F.clip_prevalence(adjusted_prevs, method="clip")
                adjusted_prevs /= adjusted_prevs.sum()
            except np.linalg.LinAlgError:
                adjusted_prevs = prevs_estim  # no way to adjust them!