forked from moreo/QuaPy
optimization threshold variants fixed
This commit is contained in:
parent
9b2470c992
commit
c0d92a2083
|
@ -15,7 +15,7 @@ import itertools
|
|||
import argparse
|
||||
from glob import glob
|
||||
import pandas as pd
|
||||
|
||||
from time import time
|
||||
|
||||
N_JOBS = -1
|
||||
|
||||
|
@ -38,10 +38,11 @@ svmperf_params = {'classifier__C': __C_range}
|
|||
def quantification_models():
|
||||
yield 'acc', ACC(newLR()), lr_params
|
||||
yield 'T50', T50(newLR()), lr_params
|
||||
#yield 'X', X(newLR()), lr_params
|
||||
#yield 'MAX', MAX(newLR()), lr_params
|
||||
yield 'X', X(newLR()), lr_params
|
||||
yield 'MAX', MAX(newLR()), lr_params
|
||||
yield 'MS', MS(newLR()), lr_params
|
||||
yield 'MS2', MS2(newLR()), lr_params
|
||||
yield 'MS+', MS(newLR()), lr_params
|
||||
# yield 'MS2', MS2(newLR()), lr_params
|
||||
|
||||
|
||||
|
||||
|
@ -115,8 +116,10 @@ if __name__ == '__main__':
|
|||
optim_losses = ['mae']
|
||||
datasets = qp.datasets.UCI_DATASETS
|
||||
|
||||
tstart = time()
|
||||
models = quantification_models()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
|
||||
tend = time()
|
||||
|
||||
# open all results and show
|
||||
df = pd.DataFrame(columns=('method', 'dataset', 'mae'))
|
||||
|
@ -126,6 +129,6 @@ if __name__ == '__main__':
|
|||
dataset = '-'.join(dataset)
|
||||
df.loc[i] = [method, dataset, mae]
|
||||
|
||||
print(df.pivot_table(index='dataset', columns='method', values='mae'))
|
||||
|
||||
print(df.pivot_table(index='dataset', columns='method', values='mae', margins=True))
|
||||
|
||||
print(f'took {(tend-tstart)}s')
|
||||
|
|
|
@ -66,7 +66,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
|
|||
return prevalences
|
||||
|
||||
|
||||
def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
|
||||
def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False):
|
||||
"""
|
||||
Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two
|
||||
values representing a binary distribution.
|
||||
|
@ -80,7 +80,8 @@ def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False):
|
|||
positive_prevalence = np.clip(positive_prevalence, 0, 1)
|
||||
else:
|
||||
assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class'
|
||||
return np.asarray([1-positive_prevalence, positive_prevalence])
|
||||
return np.asarray([1-positive_prevalence, positive_prevalence]).T
|
||||
|
||||
|
||||
|
||||
def HellingerDistance(P, Q) -> float:
|
||||
|
|
|
@ -1102,7 +1102,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
|||
:param fpr: float, false positive rate
|
||||
:return: true if the combination is to be discarded, false otherwise
|
||||
"""
|
||||
return (tpr + fpr) == 0
|
||||
return (tpr - fpr) == 0
|
||||
|
||||
|
||||
def _eval_candidate_thresholds(self, decision_scores, y):
|
||||
|
@ -1119,9 +1119,9 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
|||
candidates = []
|
||||
scores = []
|
||||
for candidate_threshold in candidate_thresholds:
|
||||
y_ = self.classes_[1 * (decision_scores > candidate_threshold)]
|
||||
y_ = self.classes_[1 * (decision_scores >= candidate_threshold)]
|
||||
TP, FP, FN, TN = self._compute_table(y, y_)
|
||||
tpr = self._compute_tpr(TP, FP)
|
||||
tpr = self._compute_tpr(TP, FN)
|
||||
fpr = self._compute_fpr(FP, TN)
|
||||
if not self.discard(tpr, fpr):
|
||||
candidate_score = self.condition(tpr, fpr)
|
||||
|
@ -1139,12 +1139,18 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
|||
|
||||
return candidates
|
||||
|
||||
def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
|
||||
prevs_estim = np.mean(classif_predictions > threshold)
|
||||
if tpr - fpr != 0:
|
||||
prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
|
||||
prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
|
||||
return prevs_estim
|
||||
# def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold):
|
||||
# prevs_estim = np.mean(classif_predictions >= threshold)
|
||||
# if tpr - fpr != 0:
|
||||
# prevs_estim = (prevs_estim - fpr) / (tpr - fpr)
|
||||
# prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True)
|
||||
# return prevs_estim
|
||||
|
||||
def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds):
|
||||
prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0)
|
||||
prevs_estims = (prevs_estims - fprs) / (tprs - fprs)
|
||||
prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True)
|
||||
return prevs_estims.squeeze()
|
||||
|
||||
def _compute_table(self, y, y_):
|
||||
TP = np.logical_and(y == y_, y == self.pos_label).sum()
|
||||
|
|
Loading…
Reference in New Issue