added ILR to conf regions

This commit is contained in:
Alejandro Moreo Fernandez 2025-12-04 19:16:52 +01:00
parent d87625bd09
commit b180aae16c
3 changed files with 146 additions and 53 deletions

View File

@ -7,7 +7,7 @@ import pandas as pd
from glob import glob from glob import glob
from pathlib import Path from pathlib import Path
import quapy as qp import quapy as qp
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000) pd.set_option('display.width', 2000)
@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
def update_pickle_with_region(report, file, conf_name, conf_region_class):
if f'coverage-{conf_name}' not in report:
cov, amp = compute_coverage_amplitude(conf_region_class)
update_fields = {
f'coverage-{conf_name}': cov,
f'amplitude-{conf_name}': amp,
}
update_pickle(report, file, update_fields)
for setup in ['binary', 'multiclass']: for setup in ['binary', 'multiclass']:
path = f'./results/{setup}/*.pkl' path = f'./results/{setup}/*.pkl'
table = defaultdict(list) table = defaultdict(list)
@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
table['c-CI'].extend(results['coverage']) table['c-CI'].extend(results['coverage'])
table['a-CI'].extend(results['amplitude']) table['a-CI'].extend(results['amplitude'])
if 'coverage-CE' not in report: update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex) update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR) update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
update_fields = {
'coverage-CE': covCE,
'amplitude-CE': ampCE,
'coverage-CLR': covCLR,
'amplitude-CLR': ampCLR
}
update_pickle(report, file, update_fields)
table['c-CE'].extend(report['coverage-CE']) table['c-CE'].extend(report['coverage-CE'])
table['a-CE'].extend(report['amplitude-CE']) table['a-CE'].extend(report['amplitude-CE'])
@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
table['c-CLR'].extend(report['coverage-CLR']) table['c-CLR'].extend(report['coverage-CLR'])
table['a-CLR'].extend(report['amplitude-CLR']) table['a-CLR'].extend(report['amplitude-CLR'])
table['c-ILR'].extend(report['coverage-ILR'])
table['a-ILR'].extend(report['amplitude-ILR'])
df = pd.DataFrame(table) df = pd.DataFrame(table)
@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
if n > max_classes: if n > max_classes:
df = df[df["dataset"] != data_name] df = df[df["dataset"] != data_name]
for region in ['CI', 'CE', 'CLR']: for region in ['CI', 'CE', 'CLR', 'ILR']:
pv = pd.pivot_table( pv = pd.pivot_table(
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
) )

View File

@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap from matplotlib.colors import ListedColormap
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True, def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
@ -185,9 +185,11 @@ if __name__ == '__main__':
yield 'CI', ConfidenceIntervals(prevs) yield 'CI', ConfidenceIntervals(prevs)
yield 'CE', ConfidenceEllipseSimplex(prevs) yield 'CE', ConfidenceEllipseSimplex(prevs)
yield 'CLR', ConfidenceEllipseCLR(prevs) yield 'CLR', ConfidenceEllipseCLR(prevs)
yield 'ILR', ConfidenceEllipseILR(prevs)
resolution = 100
alpha_str = ','.join([f'{str(i)}' for i in alpha]) alpha_str = ','.join([f'{str(i)}' for i in alpha])
for crname, cr in regions(): for crname, cr in regions():
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000, plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png') save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')

View File

@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
return float(np.mean(within_ellipse)) return float(np.mean(within_ellipse))
class CompositionalTransformation(ABC):
"""
Abstract class of transformations from compositional data.
Basically, callable functions with an "inverse" function.
"""
@abstractmethod
def __call__(self, X): ...
@abstractmethod
def inverse(self, Z): ...
EPSILON=1e-6
class CLRtransformation(CompositionalTransformation):
"""
Centered log-ratio (CLR), from compositional analysis
"""
def __call__(self, X):
"""
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:param epsilon: small float for prevalence smoothing
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
X = np.asarray(X)
X = qp.error.smooth(X, self.EPSILON)
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
return np.log(X / G)
def inverse(self, Z):
"""
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
:param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
return softmax(Z, axis=-1)
class ILRtransformation(CompositionalTransformation):
"""
Isometric log-ratio (ILR), from compositional analysis
"""
def __call__(self, X):
X = np.asarray(X)
X = qp.error.smooth(X, self.EPSILON)
k = X.shape[-1]
V = self.get_V(k) # (k-1, k)
logp = np.log(X)
return logp @ V.T
def inverse(self, Z):
Z = np.asarray(Z)
# get dimension
k_minus_1 = Z.shape[-1]
k = k_minus_1 + 1
V = self.get_V(k) # (k-1, k)
logp = Z @ V
p = np.exp(logp)
p = p / np.sum(p, axis=-1, keepdims=True)
return p
@lru_cache(maxsize=None)
def get_V(self, k):
def helmert_matrix(k):
"""
Returns the (k x k) Helmert matrix.
"""
H = np.zeros((k, k))
for i in range(1, k):
H[i, :i] = 1
H[i, i] = -(i)
H[i] = H[i] / np.sqrt(i * (i + 1))
# row 0 stays zeros; will be discarded
return H
def ilr_basis(k):
"""
Constructs an orthonormal ILR basis using the Helmert submatrix.
Output shape: (k-1, k)
"""
H = helmert_matrix(k)
V = H[1:, :] # remove first row of zeros
return V
return ilr_basis(k)
class ConfidenceEllipseSimplex(ConfidenceRegionABC): class ConfidenceEllipseSimplex(ConfidenceRegionABC):
""" """
Instantiates a Confidence Ellipse in the probability simplex. Instantiates a Confidence Ellipse in the probability simplex.
@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_) return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
class ConfidenceEllipseCLR(ConfidenceRegionABC): class ConfidenceEllipseTransformed(ConfidenceRegionABC):
""" """
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space. Instantiates a Confidence Ellipse in a transformed space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes) :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95) :param confidence_level: float, the confidence level (default 0.95)
""" """
def __init__(self, samples, confidence_level=0.95): def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
samples = np.asarray(samples) samples = np.asarray(samples)
self.clr = CLRtransformation() self.transformation = transformation
Z = self.clr(samples) Z = self.transformation(samples)
self.mean_ = np.mean(samples, axis=0) self.mean_ = np.mean(samples, axis=0)
self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level) self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
self._samples = samples self._samples = samples
@property @property
@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,) :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
:return: float in [0,1] :return: float in [0,1]
""" """
transformed_values = self.clr(true_value) transformed_values = self.transformation(true_value)
return self.conf_region_clr.coverage(transformed_values) return self.conf_region_z.coverage(transformed_values)
class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
"""
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95)
"""
def __init__(self, samples, confidence_level=0.95):
super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
"""
Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95)
"""
def __init__(self, samples, confidence_level=0.95):
super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
class ConfidenceIntervals(ConfidenceRegionABC): class ConfidenceIntervals(ConfidenceRegionABC):
@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']' return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
class CLRtransformation:
"""
Centered log-ratio, from component analysis
"""
def __call__(self, X, epsilon=1e-6):
"""
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:param epsilon: small float for prevalence smoothing
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
X = np.asarray(X)
X = qp.error.smooth(X, epsilon)
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
return np.log(X / G)
def inverse(self, X):
"""
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
return softmax(X, axis=-1)
class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier): class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
""" """