Add projection onto the probability simplex

This commit is contained in:
Paweł Czyż 2024-03-15 17:06:20 +01:00
parent 020530e14f
commit 4dd66b1921
2 changed files with 54 additions and 4 deletions

View File

@ -1,6 +1,6 @@
import itertools import itertools
from collections import defaultdict from collections import defaultdict
from typing import Union, Callable from typing import Literal, Union, Callable
import scipy import scipy
import numpy as np import numpy as np
@ -374,4 +374,55 @@ def linear_search(loss, n_classes):
if min_score is None or score < min_score: if min_score is None or score < min_score:
prev_selected, min_score = prev, score prev_selected, min_score = prev, score
return np.asarray([1 - prev_selected, prev_selected]) return np.asarray([1 - prev_selected, prev_selected])
def _project_onto_probability_simplex(v: np.ndarray) -> np.ndarray:
"""Projects a point onto the probability simplex.
The code is adapted from Mathieu Blondel's BSD-licensed
`implementation <https://gist.github.com/mblondel/6f3b7aaad90606b98f71>`_
which is accompanying the paper
Mathieu Blondel, Akinori Fujino, and Naonori Ueda.
Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex,
ICPR 2014, `URL <http://www.mblondel.org/publications/mblondel-icpr2014.pdf>`_
:param v: point in n-dimensional space, shape `(n,)`
:return: projection of `v` onto (n-1)-dimensional probability simplex, shape `(n,)`
"""
v = np.asarray(v)
n = len(v)
# Sort the values in the descending order
u = np.sort(v)[::-1]
cssv = np.cumsum(u) - 1.0
ind = np.arange(1, n + 1)
cond = u - cssv / ind > 0
rho = ind[cond][-1]
theta = cssv[cond][-1] / float(rho)
return np.maximum(v - theta, 0)
def clip_prevalence(p: np.ndarray, method: Literal[None, "none", "clip", "project"]) -> np.ndarray:
"""
Clips the proportions vector `p` so that it is a valid probability distribution.
:param p: the proportions vector to be clipped, shape `(n_classes,)`
:param method: the method to use for normalization.
If `None` or `"none"`, no normalization is performed.
If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1.
If `"project"`, the values are projected onto the probability simplex.
:return: the normalized prevalence vector, shape `(n_classes,)`
"""
if method is None or method == "none":
return p
elif method == "clip":
adjusted = np.clip(p, 0, 1)
return adjusted / adjusted.sum()
elif method == "project":
return _project_onto_probability_simplex(p)
else:
raise ValueError(f"Method {method} not known.")

View File

@ -443,8 +443,7 @@ class ACC(AggregativeCrispQuantifier):
try: try:
adjusted_prevs = np.linalg.solve(A, B) adjusted_prevs = np.linalg.solve(A, B)
adjusted_prevs = np.clip(adjusted_prevs, 0, 1) adjusted_prevs = F.clip_prevalence(adjusted_prevs, method="clip")
adjusted_prevs /= adjusted_prevs.sum()
except np.linalg.LinAlgError: except np.linalg.LinAlgError:
adjusted_prevs = prevs_estim # no way to adjust them! adjusted_prevs = prevs_estim # no way to adjust them!