added ILR to conf regions
This commit is contained in:
parent
d87625bd09
commit
b180aae16c
|
|
@ -7,7 +7,7 @@ import pandas as pd
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
|
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
|
||||||
|
|
||||||
pd.set_option('display.max_columns', None)
|
pd.set_option('display.max_columns', None)
|
||||||
pd.set_option('display.width', 2000)
|
pd.set_option('display.width', 2000)
|
||||||
|
|
@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
|
||||||
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
|
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
|
||||||
|
def update_pickle_with_region(report, file, conf_name, conf_region_class):
|
||||||
|
if f'coverage-{conf_name}' not in report:
|
||||||
|
cov, amp = compute_coverage_amplitude(conf_region_class)
|
||||||
|
|
||||||
|
update_fields = {
|
||||||
|
f'coverage-{conf_name}': cov,
|
||||||
|
f'amplitude-{conf_name}': amp,
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pickle(report, file, update_fields)
|
||||||
|
|
||||||
for setup in ['binary', 'multiclass']:
|
for setup in ['binary', 'multiclass']:
|
||||||
path = f'./results/{setup}/*.pkl'
|
path = f'./results/{setup}/*.pkl'
|
||||||
table = defaultdict(list)
|
table = defaultdict(list)
|
||||||
|
|
@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
|
||||||
table['c-CI'].extend(results['coverage'])
|
table['c-CI'].extend(results['coverage'])
|
||||||
table['a-CI'].extend(results['amplitude'])
|
table['a-CI'].extend(results['amplitude'])
|
||||||
|
|
||||||
if 'coverage-CE' not in report:
|
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||||
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
|
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||||
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
|
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||||
|
|
||||||
update_fields = {
|
|
||||||
'coverage-CE': covCE,
|
|
||||||
'amplitude-CE': ampCE,
|
|
||||||
'coverage-CLR': covCLR,
|
|
||||||
'amplitude-CLR': ampCLR
|
|
||||||
}
|
|
||||||
|
|
||||||
update_pickle(report, file, update_fields)
|
|
||||||
|
|
||||||
table['c-CE'].extend(report['coverage-CE'])
|
table['c-CE'].extend(report['coverage-CE'])
|
||||||
table['a-CE'].extend(report['amplitude-CE'])
|
table['a-CE'].extend(report['amplitude-CE'])
|
||||||
|
|
@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
|
||||||
table['c-CLR'].extend(report['coverage-CLR'])
|
table['c-CLR'].extend(report['coverage-CLR'])
|
||||||
table['a-CLR'].extend(report['amplitude-CLR'])
|
table['a-CLR'].extend(report['amplitude-CLR'])
|
||||||
|
|
||||||
|
table['c-ILR'].extend(report['coverage-ILR'])
|
||||||
|
table['a-ILR'].extend(report['amplitude-ILR'])
|
||||||
|
|
||||||
|
|
||||||
df = pd.DataFrame(table)
|
df = pd.DataFrame(table)
|
||||||
|
|
||||||
|
|
@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
|
||||||
if n > max_classes:
|
if n > max_classes:
|
||||||
df = df[df["dataset"] != data_name]
|
df = df[df["dataset"] != data_name]
|
||||||
|
|
||||||
for region in ['CI', 'CE', 'CLR']:
|
for region in ['CI', 'CE', 'CLR', 'ILR']:
|
||||||
pv = pd.pivot_table(
|
pv = pd.pivot_table(
|
||||||
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
|
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
|
||||||
from matplotlib.colors import ListedColormap
|
from matplotlib.colors import ListedColormap
|
||||||
from scipy.stats import gaussian_kde
|
from scipy.stats import gaussian_kde
|
||||||
|
|
||||||
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
|
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
|
||||||
|
|
||||||
|
|
||||||
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
|
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
|
||||||
|
|
@ -185,9 +185,11 @@ if __name__ == '__main__':
|
||||||
yield 'CI', ConfidenceIntervals(prevs)
|
yield 'CI', ConfidenceIntervals(prevs)
|
||||||
yield 'CE', ConfidenceEllipseSimplex(prevs)
|
yield 'CE', ConfidenceEllipseSimplex(prevs)
|
||||||
yield 'CLR', ConfidenceEllipseCLR(prevs)
|
yield 'CLR', ConfidenceEllipseCLR(prevs)
|
||||||
|
yield 'ILR', ConfidenceEllipseILR(prevs)
|
||||||
|
|
||||||
|
resolution = 100
|
||||||
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||||
for crname, cr in regions():
|
for crname, cr in regions():
|
||||||
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
|
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
|
||||||
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
|
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
|
||||||
return float(np.mean(within_ellipse))
|
return float(np.mean(within_ellipse))
|
||||||
|
|
||||||
|
|
||||||
|
class CompositionalTransformation(ABC):
|
||||||
|
"""
|
||||||
|
Abstract class of transformations from compositional data.
|
||||||
|
Basically, callable functions with an "inverse" function.
|
||||||
|
"""
|
||||||
|
@abstractmethod
|
||||||
|
def __call__(self, X): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def inverse(self, Z): ...
|
||||||
|
|
||||||
|
EPSILON=1e-6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CLRtransformation(CompositionalTransformation):
|
||||||
|
"""
|
||||||
|
Centered log-ratio (CLR), from compositional analysis
|
||||||
|
"""
|
||||||
|
def __call__(self, X):
|
||||||
|
"""
|
||||||
|
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
|
||||||
|
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
|
||||||
|
|
||||||
|
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||||
|
:param epsilon: small float for prevalence smoothing
|
||||||
|
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||||
|
"""
|
||||||
|
X = np.asarray(X)
|
||||||
|
X = qp.error.smooth(X, self.EPSILON)
|
||||||
|
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
||||||
|
return np.log(X / G)
|
||||||
|
|
||||||
|
def inverse(self, Z):
|
||||||
|
"""
|
||||||
|
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
||||||
|
|
||||||
|
:param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||||
|
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||||
|
"""
|
||||||
|
return softmax(Z, axis=-1)
|
||||||
|
|
||||||
|
|
||||||
|
class ILRtransformation(CompositionalTransformation):
|
||||||
|
"""
|
||||||
|
Isometric log-ratio (ILR), from compositional analysis
|
||||||
|
"""
|
||||||
|
def __call__(self, X):
|
||||||
|
X = np.asarray(X)
|
||||||
|
X = qp.error.smooth(X, self.EPSILON)
|
||||||
|
k = X.shape[-1]
|
||||||
|
V = self.get_V(k) # (k-1, k)
|
||||||
|
logp = np.log(X)
|
||||||
|
return logp @ V.T
|
||||||
|
|
||||||
|
def inverse(self, Z):
|
||||||
|
Z = np.asarray(Z)
|
||||||
|
# get dimension
|
||||||
|
k_minus_1 = Z.shape[-1]
|
||||||
|
k = k_minus_1 + 1
|
||||||
|
V = self.get_V(k) # (k-1, k)
|
||||||
|
logp = Z @ V
|
||||||
|
p = np.exp(logp)
|
||||||
|
p = p / np.sum(p, axis=-1, keepdims=True)
|
||||||
|
return p
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_V(self, k):
|
||||||
|
def helmert_matrix(k):
|
||||||
|
"""
|
||||||
|
Returns the (k x k) Helmert matrix.
|
||||||
|
"""
|
||||||
|
H = np.zeros((k, k))
|
||||||
|
for i in range(1, k):
|
||||||
|
H[i, :i] = 1
|
||||||
|
H[i, i] = -(i)
|
||||||
|
H[i] = H[i] / np.sqrt(i * (i + 1))
|
||||||
|
# row 0 stays zeros; will be discarded
|
||||||
|
return H
|
||||||
|
|
||||||
|
def ilr_basis(k):
|
||||||
|
"""
|
||||||
|
Constructs an orthonormal ILR basis using the Helmert submatrix.
|
||||||
|
Output shape: (k-1, k)
|
||||||
|
"""
|
||||||
|
H = helmert_matrix(k)
|
||||||
|
V = H[1:, :] # remove first row of zeros
|
||||||
|
return V
|
||||||
|
|
||||||
|
return ilr_basis(k)
|
||||||
|
|
||||||
|
|
||||||
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||||
"""
|
"""
|
||||||
Instantiates a Confidence Ellipse in the probability simplex.
|
Instantiates a Confidence Ellipse in the probability simplex.
|
||||||
|
|
@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||||
return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
|
return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
|
||||||
|
|
||||||
|
|
||||||
class ConfidenceEllipseCLR(ConfidenceRegionABC):
|
class ConfidenceEllipseTransformed(ConfidenceRegionABC):
|
||||||
"""
|
"""
|
||||||
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
|
Instantiates a Confidence Ellipse in a transformed space.
|
||||||
|
|
||||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||||
:param confidence_level: float, the confidence level (default 0.95)
|
:param confidence_level: float, the confidence level (default 0.95)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, samples, confidence_level=0.95):
|
def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
|
||||||
samples = np.asarray(samples)
|
samples = np.asarray(samples)
|
||||||
self.clr = CLRtransformation()
|
self.transformation = transformation
|
||||||
Z = self.clr(samples)
|
Z = self.transformation(samples)
|
||||||
self.mean_ = np.mean(samples, axis=0)
|
self.mean_ = np.mean(samples, axis=0)
|
||||||
self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
|
self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
|
||||||
self._samples = samples
|
self._samples = samples
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
|
||||||
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
|
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
|
||||||
:return: float in [0,1]
|
:return: float in [0,1]
|
||||||
"""
|
"""
|
||||||
transformed_values = self.clr(true_value)
|
transformed_values = self.transformation(true_value)
|
||||||
return self.conf_region_clr.coverage(transformed_values)
|
return self.conf_region_z.coverage(transformed_values)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
|
||||||
|
"""
|
||||||
|
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
|
||||||
|
|
||||||
|
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||||
|
:param confidence_level: float, the confidence level (default 0.95)
|
||||||
|
"""
|
||||||
|
def __init__(self, samples, confidence_level=0.95):
|
||||||
|
super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
|
||||||
|
"""
|
||||||
|
Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
|
||||||
|
|
||||||
|
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||||
|
:param confidence_level: float, the confidence level (default 0.95)
|
||||||
|
"""
|
||||||
|
def __init__(self, samples, confidence_level=0.95):
|
||||||
|
super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
|
||||||
|
|
||||||
|
|
||||||
class ConfidenceIntervals(ConfidenceRegionABC):
|
class ConfidenceIntervals(ConfidenceRegionABC):
|
||||||
|
|
@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
|
||||||
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
|
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
|
||||||
|
|
||||||
|
|
||||||
class CLRtransformation:
|
|
||||||
"""
|
|
||||||
Centered log-ratio, from component analysis
|
|
||||||
"""
|
|
||||||
def __call__(self, X, epsilon=1e-6):
|
|
||||||
"""
|
|
||||||
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
|
|
||||||
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
|
|
||||||
|
|
||||||
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
|
||||||
:param epsilon: small float for prevalence smoothing
|
|
||||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
|
||||||
"""
|
|
||||||
X = np.asarray(X)
|
|
||||||
X = qp.error.smooth(X, epsilon)
|
|
||||||
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
|
||||||
return np.log(X / G)
|
|
||||||
|
|
||||||
|
|
||||||
def inverse(self, X):
|
|
||||||
"""
|
|
||||||
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
|
||||||
|
|
||||||
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
|
||||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
|
||||||
"""
|
|
||||||
return softmax(X, axis=-1)
|
|
||||||
|
|
||||||
|
|
||||||
class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue