forked from moreo/QuaPy
optimization threshold variants fixed
This commit is contained in:
parent
9b2470c992
commit
c0d92a2083
|
@ -15,7 +15,7 @@ import itertools
|
||||||
import argparse
|
import argparse
|
||||||
from glob import glob
|
from glob import glob
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from time import time
|
||||||
|
|
||||||
N_JOBS = -1
|
N_JOBS = -1
|
||||||
|
|
||||||
|
@ -38,10 +38,11 @@ svmperf_params = {'classifier__C': __C_range}
|
||||||
def quantification_models():
|
def quantification_models():
|
||||||
yield 'acc', ACC(newLR()), lr_params
|
yield 'acc', ACC(newLR()), lr_params
|
||||||
yield 'T50', T50(newLR()), lr_params
|
yield 'T50', T50(newLR()), lr_params
|
||||||
#yield 'X', X(newLR()), lr_params
|
yield 'X', X(newLR()), lr_params
|
||||||
#yield 'MAX', MAX(newLR()), lr_params
|
yield 'MAX', MAX(newLR()), lr_params
|
||||||
yield 'MS', MS(newLR()), lr_params
|
yield 'MS', MS(newLR()), lr_params
|
||||||
yield 'MS2', MS2(newLR()), lr_params
|
yield 'MS+', MS(newLR()), lr_params
|
||||||
|
# yield 'MS2', MS2(newLR()), lr_params
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -115,8 +116,10 @@ if __name__ == '__main__':
|
||||||
optim_losses = ['mae']
|
optim_losses = ['mae']
|
||||||
datasets = qp.datasets.UCI_DATASETS
|
datasets = qp.datasets.UCI_DATASETS
|
||||||
|
|
||||||
|
tstart = time()
|
||||||
models = quantification_models()
|
models = quantification_models()
|
||||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
|
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
|
||||||
|
tend = time()
|
||||||
|
|
||||||
# open all results and show
|
# open all results and show
|
||||||
df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
|
df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
|
||||||
|
@ -126,6 +129,6 @@ if __name__ == '__main__':
|
||||||
dataset = '-'.join(dataset)
|
dataset = '-'.join(dataset)
|
||||||
df.loc[i] = [method, dataset, mae]
|
df.loc[i] = [method, dataset, mae]
|
||||||
|
|
||||||
print(df.pivot_table(index='dataset', columns='method', values='mae'))
|
print(df.pivot_table(index='dataset', columns='method', values='mae', margins=True))
|
||||||
|
|
||||||
|
|
||||||
|
print(f'took {(tend-tstart)}s')
|
||||||
|
|
|
@ -66,7 +66,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
|
||||||
return prevalences
|
return prevalences
|
||||||
|
|
||||||
|
|
||||||
def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
|
def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False):
|
||||||
"""
|
"""
|
||||||
Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two
|
Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two
|
||||||
values representing a binary distribution.
|
values representing a binary distribution.
|
||||||
|
@ -80,7 +80,8 @@ def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
|
||||||
positive_prevalence = np.clip(positive_prevalence, 0, 1)
|
positive_prevalence = np.clip(positive_prevalence, 0, 1)
|
||||||
else:
|
else:
|
||||||
assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class'
|
assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class'
|
||||||
return np.asarray([1-positive_prevalence, positive_prevalence])
|
return np.asarray([1-positive_prevalence, positive_prevalence]).T
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def HellingerDistance(P, Q) -> float:
|
def HellingerDistance(P, Q) -> float:
|
||||||
|
|
|
@ -1102,7 +1102,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
||||||
:param fpr: float, false positive rate
|
:param fpr: float, false positive rate
|
||||||
:return: true if the combination is to be discarded, false otherwise
|
:return: true if the combination is to be discarded, false otherwise
|
||||||
"""
|
"""
|
||||||
return (tpr + fpr) == 0
|
return (tpr - fpr) == 0
|
||||||
|
|
||||||
|
|
||||||
def _eval_candidate_thresholds(self, decision_scores, y):
|
def _eval_candidate_thresholds(self, decision_scores, y):
|
||||||
|
@ -1119,9 +1119,9 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
||||||
candidates = []
|
candidates = []
|
||||||
scores = []
|
scores = []
|
||||||
for candidate_threshold in candidate_thresholds:
|
for candidate_threshold in candidate_thresholds:
|
||||||
y_ = self.classes_[1 * (decision_scores > candidate_threshold)]
|
y_ = self.classes_[1 * (decision_scores >= candidate_threshold)]
|
||||||
TP, FP, FN, TN = self._compute_table(y, y_)
|
TP, FP, FN, TN = self._compute_table(y, y_)
|
||||||
tpr = self._compute_tpr(TP, FP)
|
tpr = self._compute_tpr(TP, FN)
|
||||||
fpr = self._compute_fpr(FP, TN)
|
fpr = self._compute_fpr(FP, TN)
|
||||||
if not self.discard(tpr, fpr):
|
if not self.discard(tpr, fpr):
|
||||||
candidate_score = self.condition(tpr, fpr)
|
candidate_score = self.condition(tpr, fpr)
|
||||||
|
@ -1139,12 +1139,18 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
||||||
|
|
||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
|
# def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
|
||||||
prevs_estim = np.mean(classif_predictions > threshold)
|
# prevs_estim = np.mean(classif_predictions >= threshold)
|
||||||
if tpr - fpr != 0:
|
# if tpr - fpr != 0:
|
||||||
prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
|
# prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
|
||||||
prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
|
# prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
|
||||||
return prevs_estim
|
# return prevs_estim
|
||||||
|
|
||||||
|
def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
|
||||||
|
prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
|
||||||
|
prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
|
||||||
|
prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True)
|
||||||
|
return prevs_estims.squeeze()
|
||||||
|
|
||||||
def _compute_table(self, y, y_):
|
def _compute_table(self, y, y_):
|
||||||
TP = np.logical_and(y == y_, y == self.pos_label).sum()
|
TP = np.logical_and(y == y_, y == self.pos_label).sum()
|
||||||
|
|
Loading…
Reference in New Issue