forked from moreo/QuaPy
cleaning branch
This commit is contained in:
parent
b3ccf71edb
commit
caa7fd2884
|
@ -0,0 +1,315 @@
|
||||||
|
import numpy as np
|
||||||
|
import scipy.special
|
||||||
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import f1_score
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from model_selection import GridSearchQ
|
||||||
|
from quapy.protocol import APP
|
||||||
|
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
|
||||||
|
from sklearn import clone
|
||||||
|
import quapy.functional as F
|
||||||
|
|
||||||
|
# datasets = qp.datasets.UCI_DATASETS
|
||||||
|
datasets = ['imdb']
|
||||||
|
|
||||||
|
# target = 'f1'
|
||||||
|
target = 'acc'
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
def method_1(cls, q, train, val, sample, y=None, y_hat=None):
|
||||||
|
"""
|
||||||
|
Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
|
||||||
|
the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
|
||||||
|
by relying on the PPS assumptions.
|
||||||
|
|
||||||
|
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
y_val = val.labels
|
||||||
|
y_hat_val = cls.predict(val.instances)
|
||||||
|
|
||||||
|
# q = EMQ(LogisticRegression(class_weight='balanced'))
|
||||||
|
# q.fit(val, fit_classifier=True)
|
||||||
|
# q = EMQ(cls)
|
||||||
|
# q.fit(train, fit_classifier=False)
|
||||||
|
|
||||||
|
|
||||||
|
# q = KDEyML(cls)
|
||||||
|
# q.fit(train, val_split=val, fit_classifier=False)
|
||||||
|
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
||||||
|
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
||||||
|
p_hat = q.quantify(sample.instances)
|
||||||
|
cont_table_hat = p_hat * M_hat
|
||||||
|
# cont_table_hat = np.clip(cont_table_hat, 0, 1)
|
||||||
|
# cont_table_hat = cont_table_hat / cont_table_hat.sum()
|
||||||
|
|
||||||
|
print('true_prev: ', sample.prevalence())
|
||||||
|
print('estim_prev: ', p_hat)
|
||||||
|
print('M-true:\n', M_true)
|
||||||
|
print('M-hat:\n', M_hat)
|
||||||
|
print('cont_table:\n', cont_table_hat)
|
||||||
|
|
||||||
|
tp = cont_table_hat[1, 1]
|
||||||
|
tn = cont_table_hat[0, 0]
|
||||||
|
fn = cont_table_hat[0, 1]
|
||||||
|
fp = cont_table_hat[1, 0]
|
||||||
|
|
||||||
|
return tn, fn, fp, tp
|
||||||
|
|
||||||
|
|
||||||
|
def method_2(cls, train, val, sample, y=None, y_hat=None):
|
||||||
|
"""
|
||||||
|
Assume P and Q are the training and test distributions
|
||||||
|
Solves the following system of linear equations:
|
||||||
|
tp + fp = CC (the classify & count estimate, observed)
|
||||||
|
fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
|
||||||
|
tp + fp + fn + tn = 1 (trivial)
|
||||||
|
|
||||||
|
There are 4 unknowns and 3 equations. The fourth required one is established
|
||||||
|
by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
|
||||||
|
this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
|
||||||
|
In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
|
||||||
|
in Q (unknown, hereafter tpr_Q) should
|
||||||
|
be the same. This means:
|
||||||
|
tpr = tpr_Q = tp / (tp + fn)
|
||||||
|
after some manipulation:
|
||||||
|
tp (tpr-1) + fn (tpr) = 0 <-- our last equation
|
||||||
|
|
||||||
|
Note that the last equation relies on the estimate tpr. It is likely that, the more
|
||||||
|
positives we have, the more reliable this estimate is. This suggests that, in cases
|
||||||
|
in which we have more negatives in the validation set than positives, it might be
|
||||||
|
convenient to resort to the true negative rate (tnr) instead. This gives rise to
|
||||||
|
the alternative fourth equation:
|
||||||
|
tn (tnr-1) + fp (tnr) = 0
|
||||||
|
|
||||||
|
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
y_val = val.labels
|
||||||
|
y_hat_val = cls.predict(val.instances)
|
||||||
|
|
||||||
|
q = ACC(cls)
|
||||||
|
q.fit(train, val_split=val, fit_classifier=False)
|
||||||
|
p_hat = q.quantify(sample.instances)
|
||||||
|
pos_prev = p_hat[1]
|
||||||
|
# pos_prev = sample.prevalence()[1]
|
||||||
|
|
||||||
|
cc = CC(cls)
|
||||||
|
cc.fit(train, fit_classifier=False)
|
||||||
|
cc_prev = cc.quantify(sample.instances)[1]
|
||||||
|
|
||||||
|
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
||||||
|
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
||||||
|
cont_table_true = sample.prevalence() * M_true
|
||||||
|
|
||||||
|
if val.prevalence()[1] > 0.5:
|
||||||
|
|
||||||
|
# in this case, the tpr might be a more reliable estimate than tnr
|
||||||
|
tpr_hat = M_hat[1, 1]
|
||||||
|
|
||||||
|
A = np.asarray([
|
||||||
|
[0, 0, 1, 1],
|
||||||
|
[0, 1, 0, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
|
[0, tpr_hat, 0, tpr_hat - 1]
|
||||||
|
])
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
# in this case, the tnr might be a more reliable estimate than tpr
|
||||||
|
tnr_hat = M_hat[0, 0]
|
||||||
|
|
||||||
|
A = np.asarray([
|
||||||
|
[0, 0, 1, 1],
|
||||||
|
[0, 1, 0, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
|
[tnr_hat-1, 0, tnr_hat, 0]
|
||||||
|
])
|
||||||
|
|
||||||
|
b = np.asarray(
|
||||||
|
[cc_prev, pos_prev, 1, 0]
|
||||||
|
)
|
||||||
|
|
||||||
|
tn, fn, fp, tp = np.linalg.solve(A, b)
|
||||||
|
|
||||||
|
cont_table_estim = np.asarray([
|
||||||
|
[tn, fn],
|
||||||
|
[fp, tp]
|
||||||
|
])
|
||||||
|
|
||||||
|
# if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
|
||||||
|
# cont_table_estim = scipy.special.softmax(cont_table_estim)
|
||||||
|
|
||||||
|
print('true_prev: ', sample.prevalence())
|
||||||
|
print('estim_prev: ', p_hat)
|
||||||
|
print('true_cont_table:\n', cont_table_true)
|
||||||
|
print('estim_cont_table:\n', cont_table_estim)
|
||||||
|
# print('true_tpr', M_true[1,1])
|
||||||
|
# print('estim_tpr', tpr_hat)
|
||||||
|
|
||||||
|
|
||||||
|
return tn, fn, fp, tp
|
||||||
|
|
||||||
|
|
||||||
|
def method_3(cls, train, val, sample, y=None, y_hat=None):
|
||||||
|
"""
|
||||||
|
This is just method 2 but without involving any quapy's quantifier.
|
||||||
|
|
||||||
|
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
classes = val.classes_
|
||||||
|
y_val = val.labels
|
||||||
|
y_hat_val = cls.predict(val.instances)
|
||||||
|
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
|
||||||
|
y_hat_test = cls.predict(sample.instances)
|
||||||
|
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
|
||||||
|
tpr_hat = M_hat[1,1]
|
||||||
|
fpr_hat = M_hat[1,0]
|
||||||
|
tnr_hat = M_hat[0,0]
|
||||||
|
if tpr_hat!=fpr_hat:
|
||||||
|
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
|
||||||
|
else:
|
||||||
|
print('-->', tpr_hat)
|
||||||
|
pos_prev_test_hat = pos_prev_cc
|
||||||
|
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
|
||||||
|
pos_prev_val = val.prevalence()[1]
|
||||||
|
|
||||||
|
if pos_prev_val > 0.5:
|
||||||
|
# in this case, the tpr might be a more reliable estimate than tnr
|
||||||
|
A = np.asarray([
|
||||||
|
[0, 0, 1, 1],
|
||||||
|
[0, 1, 0, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
|
[0, tpr_hat, 0, tpr_hat - 1]
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
# in this case, the tnr might be a more reliable estimate than tpr
|
||||||
|
A = np.asarray([
|
||||||
|
[0, 0, 1, 1],
|
||||||
|
[0, 1, 0, 1],
|
||||||
|
[1, 1, 1, 1],
|
||||||
|
[tnr_hat-1, 0, tnr_hat, 0]
|
||||||
|
])
|
||||||
|
|
||||||
|
b = np.asarray(
|
||||||
|
[pos_prev_cc, pos_prev_test_hat, 1, 0]
|
||||||
|
)
|
||||||
|
|
||||||
|
tn, fn, fp, tp = np.linalg.solve(A, b)
|
||||||
|
|
||||||
|
return tn, fn, fp, tp
|
||||||
|
|
||||||
|
|
||||||
|
def cls_eval_from_counters(tn, fn, fp, tp):
|
||||||
|
if target == 'acc':
|
||||||
|
acc_hat = (tp + tn)
|
||||||
|
else:
|
||||||
|
den = (2 * tp + fn + fp)
|
||||||
|
if den > 0:
|
||||||
|
acc_hat = 2 * tp / den
|
||||||
|
else:
|
||||||
|
acc_hat = 0
|
||||||
|
return acc_hat
|
||||||
|
|
||||||
|
|
||||||
|
def cls_eval_from_labels(y, y_hat):
|
||||||
|
if target == 'acc':
|
||||||
|
acc = (y_hat == y).mean()
|
||||||
|
else:
|
||||||
|
acc = f1_score(y, y_hat, zero_division=0)
|
||||||
|
return acc
|
||||||
|
|
||||||
|
|
||||||
|
for dataset_name in datasets:
|
||||||
|
|
||||||
|
train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
|
||||||
|
|
||||||
|
xs = []
|
||||||
|
ys_1 = []
|
||||||
|
ys_trval = []
|
||||||
|
ys_3 = []
|
||||||
|
|
||||||
|
train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
|
||||||
|
for train in train_prot():
|
||||||
|
if np.product(train.prevalence()) == 0:
|
||||||
|
# skip experiments with no positives or no negatives in training
|
||||||
|
continue
|
||||||
|
|
||||||
|
cls = LogisticRegression(class_weight='balanced', C=100)
|
||||||
|
# cls = CalibratedClassifierCV(LinearSVC())
|
||||||
|
|
||||||
|
train, val = train.split_stratified(train_prop=0.5, random_state=0)
|
||||||
|
|
||||||
|
print(f'dataset name = {dataset_name}')
|
||||||
|
print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
|
||||||
|
print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
|
||||||
|
print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
|
||||||
|
|
||||||
|
cls.fit(*train.Xy)
|
||||||
|
|
||||||
|
# q = KDEyML(cls)
|
||||||
|
q = ACC(LogisticRegression())
|
||||||
|
q.fit(train, val_split=val, fit_classifier=True)
|
||||||
|
# q = GridSearchQ(PACC(cls),
|
||||||
|
# param_grid={'classifier__C':np.logspace(-2,2,5)},
|
||||||
|
# protocol=APP(val, sample_size=1000),
|
||||||
|
# verbose=True,
|
||||||
|
# n_jobs=-1).fit(train)
|
||||||
|
|
||||||
|
acc_trval = cls_eval_from_labels(val.labels, cls.predict(val.instances))
|
||||||
|
|
||||||
|
|
||||||
|
for sample in APP(test, n_prevalences=21, repeats=1, sample_size=1000, return_type='labelled_collection')():
|
||||||
|
print('='*80)
|
||||||
|
y_hat = cls.predict(sample.instances)
|
||||||
|
y = sample.labels
|
||||||
|
acc_true = cls_eval_from_labels(y, y_hat)
|
||||||
|
xs.append(acc_true)
|
||||||
|
ys_trval.append(acc_trval)
|
||||||
|
|
||||||
|
tn, fn, fp, tp = method_1(cls, q, train, val, sample, y, y_hat)
|
||||||
|
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
|
||||||
|
ys_1.append(acc_hat)
|
||||||
|
|
||||||
|
tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
|
||||||
|
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
|
||||||
|
ys_3.append(acc_hat)
|
||||||
|
|
||||||
|
error = abs(acc_true - acc_hat)
|
||||||
|
errors.append(error)
|
||||||
|
|
||||||
|
print(f'classifier accuracy={acc_true:.3f}')
|
||||||
|
print(f'estimated accuracy={acc_hat:.3f}')
|
||||||
|
print(f'estimation error={error:.4f}')
|
||||||
|
|
||||||
|
|
||||||
|
print('process end')
|
||||||
|
print('='*80)
|
||||||
|
print(f'mean error = {np.mean(errors)}')
|
||||||
|
print(f'std error = {np.std(errors)}')
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Create scatter plot
|
||||||
|
plt.plot([0, 1], [0, 1], color='black', linestyle='--')
|
||||||
|
plt.scatter(xs, ys_1, label='method 1')
|
||||||
|
plt.scatter(xs, ys_3, label='method 3')
|
||||||
|
plt.scatter(xs, ys_trval, label='tr-val')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# Add labels and title
|
||||||
|
plt.xlabel('True Accuracy')
|
||||||
|
plt.ylabel('Estim Accuracy')
|
||||||
|
|
||||||
|
# Display the plot
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
from tqdm import tqdm
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
import os
|
||||||
|
import quapy as qp
|
||||||
|
from method.aggregative import PACC, EMQ, PCC, CC, ACC, HDy
|
||||||
|
from models import *
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def clf():
|
||||||
|
# return CalibratedClassifierCV(LinearSVC(class_weight=None))
|
||||||
|
return LogisticRegression(class_weight=None)
|
||||||
|
|
||||||
|
|
||||||
|
def F1(contingency_table):
|
||||||
|
# tn = contingency_table[0, 0]
|
||||||
|
tp = contingency_table[1, 1]
|
||||||
|
fp = contingency_table[0, 1]
|
||||||
|
fn = contingency_table[1, 0]
|
||||||
|
den = (2*tp+fp+fn)
|
||||||
|
if den>0:
|
||||||
|
return 2*tp/den
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def accuracy(contingency_table):
|
||||||
|
tn = contingency_table[0, 0]
|
||||||
|
tp = contingency_table[1, 1]
|
||||||
|
fp = contingency_table[0, 1]
|
||||||
|
fn = contingency_table[1, 0]
|
||||||
|
return (tp+tn)/(tp+fp+fn+tn)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_series(series, repeats, metric_name, train_prev=None, savepath=None):
|
||||||
|
|
||||||
|
for key in series:
|
||||||
|
print(series[key])
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
|
||||||
|
def bin(v):
|
||||||
|
mat = np.asarray(v).reshape(-1, repeats)
|
||||||
|
return mat.mean(axis=1), mat.std(axis=1)
|
||||||
|
|
||||||
|
x = series['prev']
|
||||||
|
x,_ = bin(x)
|
||||||
|
|
||||||
|
for serie in series:
|
||||||
|
if serie=='prev': continue
|
||||||
|
values = series[serie]
|
||||||
|
print(serie, values)
|
||||||
|
val_mean, val_std = bin(values)
|
||||||
|
ax.errorbar(x, val_mean, label=serie, fmt='-', marker='o')
|
||||||
|
ax.fill_between(x, val_mean - val_std, val_mean + val_std, alpha=0.25)
|
||||||
|
|
||||||
|
if train_prev is not None:
|
||||||
|
ax.axvline(x=train_prev, label='tr-prev', color='k', linestyle='--')
|
||||||
|
# ax.scatter(train_prev, train_prev, c='c', label='tr-prev', linewidth=2, edgecolor='k', s=100, zorder=3)
|
||||||
|
|
||||||
|
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||||
|
|
||||||
|
ax.grid()
|
||||||
|
ax.set_title(metric_name)
|
||||||
|
ax.set(xlabel='$p_U(\oplus)$', ylabel='estimated '+metric_name,
|
||||||
|
title='Classifier accuracy in terms of '+metric_name)
|
||||||
|
|
||||||
|
if savepath is None:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
os.makedirs(Path(savepath).parent, exist_ok=True)
|
||||||
|
plt.savefig(savepath, bbox_inches='tight')
|
||||||
|
|
||||||
|
|
||||||
|
dataset='imdb'
|
||||||
|
data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=5, pickle=True)
|
||||||
|
|
||||||
|
# qp.data.preprocessing.reduce_columns(data, min_df=5, inplace=True)
|
||||||
|
# print('num_features', data.training.instances.shape[1])
|
||||||
|
|
||||||
|
train = data.training
|
||||||
|
test = data.test
|
||||||
|
|
||||||
|
upper = UpperBound(clf(), y_test=None).fit(train)
|
||||||
|
|
||||||
|
mlcfe = MLCMEstimator(clf(), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
|
||||||
|
emq_quant = QuantificationCMPredictor(clf(), EMQ(LogisticRegression()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
# cc_quant = QuantificationCMPredictor(clf(), CC(clf()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
# pcc_quant = QuantificationCMPredictor(clf(), PCC(clf()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
# acc_quant = QuantificationCMPredictor(clf(), ACC(clf()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
pacc_quant = QuantificationCMPredictor(clf(), PACC(clf()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
# hdy_quant = QuantificationCMPredictor(clf(), HDy(clf()), strategy='kfcv', k=5, n_jobs=-1).fit(train)
|
||||||
|
|
||||||
|
sld = EMQ(LogisticRegression()).fit(train)
|
||||||
|
pacc = PACC(clf()).fit(train)
|
||||||
|
|
||||||
|
contenders = [
|
||||||
|
('kFCV+MLPE', mlcfe),
|
||||||
|
('SLD', emq_quant),
|
||||||
|
# ('CC', cc_quant),
|
||||||
|
# ('PCC', pcc_quant),
|
||||||
|
# ('ACC', acc_quant),
|
||||||
|
('PACC', pacc_quant),
|
||||||
|
# ('HDy', hdy_quant)
|
||||||
|
]
|
||||||
|
|
||||||
|
metric = F1
|
||||||
|
# metric = accuracy
|
||||||
|
|
||||||
|
repeats = 10
|
||||||
|
with qp.util.temp_seed(42):
|
||||||
|
samples_idx = [idx for idx in test.artificial_sampling_index_generator(sample_size=500, n_prevalences=21, repeats=repeats)]
|
||||||
|
|
||||||
|
|
||||||
|
series = defaultdict(lambda: [])
|
||||||
|
for idx in tqdm(samples_idx, desc='generating predictions'):
|
||||||
|
sample = test.sampling_from_index(idx)
|
||||||
|
|
||||||
|
upper.show_true_labels(sample.labels)
|
||||||
|
upper_conf_matrix = upper.predict(sample.instances)
|
||||||
|
metric_true = metric(upper_conf_matrix)
|
||||||
|
series['Upper'].append(metric_true)
|
||||||
|
|
||||||
|
for mname, method in contenders:
|
||||||
|
conf_matrix = method.predict(sample.instances)
|
||||||
|
estim_metric = metric(conf_matrix)
|
||||||
|
series[mname].append(estim_metric)
|
||||||
|
if hasattr(method, 'quantify'):
|
||||||
|
series[mname+'-prev'].append(method.quantify(sample.instances))
|
||||||
|
|
||||||
|
series['binsld-prev'].append(sld.quantify(sample.instances)[1])
|
||||||
|
series['binpacc-prev'].append(pacc.quantify(sample.instances)[1])
|
||||||
|
series['optimal-prev'].append(sample.prevalence()[1])
|
||||||
|
series['prev'].append(sample.prevalence()[1])
|
||||||
|
|
||||||
|
metricname = metric.__name__
|
||||||
|
plot_series(series, repeats, metric_name=metricname, train_prev=train.prevalence()[1], savepath='./plots/'+dataset+'_LinearSVC_'+metricname+'.pdf')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,179 @@
|
||||||
|
import numpy as np
|
||||||
|
import quapy as qp
|
||||||
|
from sklearn import clone
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
import scipy
|
||||||
|
from scipy.sparse import issparse, csr_matrix
|
||||||
|
from data import LabelledCollection
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from sklearn.model_selection import cross_val_predict
|
||||||
|
|
||||||
|
|
||||||
|
class ConfusionMatrixPredictor(ABC):
|
||||||
|
"""
|
||||||
|
Abstract class of predictors of a confusion matrix for the performance of a classifier.
|
||||||
|
For the binary case, this accounts to predicting the 4-cell contingency table consisting of the
|
||||||
|
true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN) that
|
||||||
|
most evaluation metrics make use of.
|
||||||
|
"""
|
||||||
|
@abstractmethod
|
||||||
|
def fit(self, train: LabelledCollection):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict(self, test):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MLCMEstimator(ConfusionMatrixPredictor):
|
||||||
|
"""
|
||||||
|
The Maximum Likelihood Confusion Matrix Estimator is a method that relies on the IID assumption, and thus
|
||||||
|
computes, via k-FCV (or any other technique) the counters of the confusion matrix, assuming that those are
|
||||||
|
good estimates for the test case.
|
||||||
|
"""
|
||||||
|
def __init__(self, classifier, strategy='kfcv', **kwargs):
|
||||||
|
assert strategy in ['kfcv'], 'unknown strategy'
|
||||||
|
if strategy=='kfcv':
|
||||||
|
assert 'k' in kwargs, 'strategy "kfcv" requires "k" to be passed as an argument'
|
||||||
|
self.classifier = classifier
|
||||||
|
self.strategy = strategy
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
def sout(self, msg):
|
||||||
|
if 'verbose' in self.kwargs:
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
def fit(self, train: LabelledCollection):
|
||||||
|
X, y = train.Xy
|
||||||
|
if self.strategy == 'kfcv':
|
||||||
|
k=self.kwargs['k']
|
||||||
|
n_jobs = self.kwargs['n_jobs'] if 'n_jobs' in self.kwargs else 1
|
||||||
|
predict = self.kwargs['predict'] if 'predict' in self.kwargs else 'predict'
|
||||||
|
self.sout(f'{self.__class__.__name__}: '
|
||||||
|
f'running cross_val_predict with k={k} n_jobs={n_jobs} predict={predict}')
|
||||||
|
predictions = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method=predict)
|
||||||
|
self.conf_matrix = confusion_matrix(y, predictions, labels=train.classes_)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, test):
|
||||||
|
"""
|
||||||
|
This method disregards the test set, under the assumption that it is IID wrt the training. This meaning that
|
||||||
|
the confusion matrix for the test data should coincide with the one computed for training (using any cross
|
||||||
|
validation strategy).
|
||||||
|
|
||||||
|
:param test: test collection (ignored)
|
||||||
|
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
||||||
|
"""
|
||||||
|
return self.conf_matrix
|
||||||
|
|
||||||
|
|
||||||
|
class UpperBound(ConfusionMatrixPredictor):
|
||||||
|
def __init__(self, classifier, y_test):
|
||||||
|
self.classifier = classifier
|
||||||
|
self.y_test = y_test
|
||||||
|
|
||||||
|
def fit(self, train: LabelledCollection):
|
||||||
|
self.classifier.fit(*train.Xy)
|
||||||
|
self.classes = train.classes_
|
||||||
|
return self
|
||||||
|
|
||||||
|
def show_true_labels(self, y_test):
|
||||||
|
self.y_test = y_test
|
||||||
|
|
||||||
|
def predict(self, test):
|
||||||
|
predictions = self.classifier.predict(test)
|
||||||
|
return confusion_matrix(self.y_test, predictions, labels=self.classes)
|
||||||
|
|
||||||
|
|
||||||
|
def get_counters(y_true, y_pred):
|
||||||
|
counters = np.full(shape=y_true.shape, fill_value=-1)
|
||||||
|
counters[np.logical_and(y_true == 1, y_pred == 1)] = 0
|
||||||
|
counters[np.logical_and(y_true == 1, y_pred == 0)] = 1
|
||||||
|
counters[np.logical_and(y_true == 0, y_pred == 1)] = 2
|
||||||
|
counters[np.logical_and(y_true == 0, y_pred == 0)] = 3
|
||||||
|
class_map = {
|
||||||
|
0:'tp',
|
||||||
|
1:'fn',
|
||||||
|
2:'fp',
|
||||||
|
3:'tn'
|
||||||
|
}
|
||||||
|
return counters, class_map
|
||||||
|
|
||||||
|
|
||||||
|
def safehstack(matrix, posteriors):
|
||||||
|
if issparse(matrix):
|
||||||
|
instances = csr_matrix(scipy.sparse.hstack([matrix, posteriors]))
|
||||||
|
else:
|
||||||
|
instances = np.hstack([matrix, posteriors])
|
||||||
|
return instances
|
||||||
|
|
||||||
|
|
||||||
|
class QuantificationCMPredictor(ConfusionMatrixPredictor):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
def __init__(self, classifier, quantifier, strategy='kfcv', **kwargs):
|
||||||
|
assert strategy in ['kfcv'], 'unknown strategy'
|
||||||
|
if strategy=='kfcv':
|
||||||
|
assert 'k' in kwargs, 'strategy "kfcv" requires "k" to be passed as an argument'
|
||||||
|
self.classifier = clone(classifier)
|
||||||
|
self.quantifier = quantifier
|
||||||
|
self.strategy = strategy
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
def sout(self, msg):
|
||||||
|
if 'verbose' in self.kwargs:
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
def fit(self, train: LabelledCollection):
|
||||||
|
X, y = train.Xy
|
||||||
|
if self.strategy == 'kfcv':
|
||||||
|
k=self.kwargs['k']
|
||||||
|
n_jobs = self.kwargs['n_jobs'] if 'n_jobs' in self.kwargs else 1
|
||||||
|
self.sout(f'{self.__class__.__name__}: '
|
||||||
|
f'running cross_val_predict with k={k} n_jobs={n_jobs}')
|
||||||
|
predictions = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method='predict')
|
||||||
|
posteriors = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method='predict_proba')
|
||||||
|
self.classifier.fit(X, y)
|
||||||
|
instances = safehstack(train.instances, posteriors)
|
||||||
|
counters, class_map = get_counters(train.labels, predictions)
|
||||||
|
q_data = LabelledCollection(instances=instances, labels=counters, classes_=[0,1,2,3])
|
||||||
|
print('counters prevalence', q_data.counts())
|
||||||
|
self.quantifier.fit(q_data)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, test):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param test: test collection (ignored)
|
||||||
|
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
||||||
|
"""
|
||||||
|
posteriors = self.classifier.predict_proba(test)
|
||||||
|
instances = safehstack(test, posteriors)
|
||||||
|
counters = self.quantifier.quantify(instances)
|
||||||
|
tp, fn, fp, tn = counters
|
||||||
|
conf_matrix = np.asarray([[tn, fp], [fn, tp]])
|
||||||
|
return conf_matrix
|
||||||
|
|
||||||
|
def quantify(self, test):
|
||||||
|
posteriors = self.classifier.predict_proba(test)
|
||||||
|
instances = safehstack(test, posteriors)
|
||||||
|
counters = self.quantifier.quantify(instances)
|
||||||
|
tp, fn, fp, tn = counters
|
||||||
|
den_tpr = (tp+fn)
|
||||||
|
if den_tpr>0:
|
||||||
|
tpr = tp/den_tpr
|
||||||
|
else:
|
||||||
|
tpr = 1
|
||||||
|
|
||||||
|
den_fpr = (fp+tn)
|
||||||
|
if den_fpr>0:
|
||||||
|
fpr = fp / den_fpr
|
||||||
|
else:
|
||||||
|
fpr = 0
|
||||||
|
|
||||||
|
pcc = posteriors.sum(axis=0)[1]
|
||||||
|
pacc = (pcc-fpr)/(tpr-fpr)
|
||||||
|
pacc = np.clip(pacc, 0, 1)
|
||||||
|
|
||||||
|
q = tp+fn
|
||||||
|
return q
|
Loading…
Reference in New Issue