300 lines
9.6 KiB
Python
300 lines
9.6 KiB
Python
from functools import wraps
|
|
from statistics import mean
|
|
|
|
import numpy as np
|
|
import sklearn.metrics as metrics
|
|
from quapy.data import LabelledCollection
|
|
from quapy.protocol import AbstractStochasticSeededProtocol
|
|
from scipy.sparse import issparse
|
|
from sklearn.base import BaseEstimator
|
|
from sklearn.model_selection import cross_validate
|
|
|
|
import baselines.atc as atc
|
|
import baselines.doc as doc
|
|
import baselines.impweight as iw
|
|
import baselines.rca as rcalib
|
|
|
|
from .report import EvaluationReport
|
|
|
|
_baselines = {}
|
|
|
|
|
|
def baseline(func):
|
|
@wraps(func)
|
|
def wrapper(c_model, validation, protocol):
|
|
return func(c_model, validation, protocol)
|
|
|
|
_baselines[func.__name__] = wrapper
|
|
|
|
return wrapper
|
|
|
|
|
|
@baseline
|
|
def kfcv(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict",
|
|
):
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
scoring = ["accuracy", "f1_macro"]
|
|
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
|
|
acc_score = mean(scores["test_accuracy"])
|
|
f1_score = mean(scores["test_f1_macro"])
|
|
|
|
report = EvaluationReport(name="kfcv")
|
|
for test in protocol():
|
|
test_preds = c_model_predict(test.X)
|
|
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
|
report.append_row(
|
|
test.prevalence(),
|
|
acc_score=acc_score,
|
|
f1_score=f1_score,
|
|
acc=meta_acc,
|
|
f1=meta_f1,
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def ref(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
):
|
|
c_model_predict = getattr(c_model, "predict")
|
|
report = EvaluationReport(name="ref")
|
|
for test in protocol():
|
|
test_preds = c_model_predict(test.X)
|
|
report.append_row(
|
|
test.prevalence(),
|
|
acc_score=metrics.accuracy_score(test.y, test_preds),
|
|
f1_score=metrics.f1_score(test.y, test_preds),
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def atc_mc(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict_proba",
|
|
):
|
|
"""garg"""
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
## Load ID validation data probs and labels
|
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
|
|
|
## score function, e.g., negative entropy or argmax confidence
|
|
val_scores = atc.get_max_conf(val_probs)
|
|
val_preds = np.argmax(val_probs, axis=-1)
|
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
|
|
|
report = EvaluationReport(name="atc_mc")
|
|
for test in protocol():
|
|
## Load OOD test data probs
|
|
test_probs = c_model_predict(test.X)
|
|
test_preds = np.argmax(test_probs, axis=-1)
|
|
test_scores = atc.get_max_conf(test_probs)
|
|
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
|
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
|
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
|
report.append_row(
|
|
test.prevalence(),
|
|
acc=meta_acc,
|
|
acc_score=atc_accuracy,
|
|
f1_score=f1_score,
|
|
f1=meta_f1,
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def atc_ne(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict_proba",
|
|
):
|
|
"""garg"""
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
## Load ID validation data probs and labels
|
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
|
|
|
## score function, e.g., negative entropy or argmax confidence
|
|
val_scores = atc.get_entropy(val_probs)
|
|
val_preds = np.argmax(val_probs, axis=-1)
|
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
|
|
|
report = EvaluationReport(name="atc_ne")
|
|
for test in protocol():
|
|
## Load OOD test data probs
|
|
test_probs = c_model_predict(test.X)
|
|
test_preds = np.argmax(test_probs, axis=-1)
|
|
test_scores = atc.get_entropy(test_probs)
|
|
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
|
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
|
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
|
report.append_row(
|
|
test.prevalence(),
|
|
acc=meta_acc,
|
|
acc_score=atc_accuracy,
|
|
f1_score=f1_score,
|
|
f1=meta_f1,
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def doc_feat(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict_proba",
|
|
):
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
|
val_scores = np.max(val_probs, axis=-1)
|
|
val_preds = np.argmax(val_probs, axis=-1)
|
|
v1acc = np.mean(val_preds == val_labels) * 100
|
|
|
|
report = EvaluationReport(name="doc_feat")
|
|
for test in protocol():
|
|
test_probs = c_model_predict(test.X)
|
|
test_preds = np.argmax(test_probs, axis=-1)
|
|
test_scores = np.max(test_probs, axis=-1)
|
|
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
|
|
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
|
|
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def rca(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict",
|
|
):
|
|
"""elsahar19"""
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
val_pred1 = c_model_predict(validation.X)
|
|
|
|
report = EvaluationReport(name="rca")
|
|
for test in protocol():
|
|
try:
|
|
test_pred = c_model_predict(test.X)
|
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
|
c_model2_predict = getattr(c_model2, predict_method)
|
|
val_pred2 = c_model2_predict(validation.X)
|
|
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
|
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
|
except ValueError:
|
|
report.append_row(
|
|
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def rca_star(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict",
|
|
):
|
|
"""elsahar19"""
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
validation1, validation2 = validation.split_stratified(
|
|
train_prop=0.5, random_state=0
|
|
)
|
|
val1_pred = c_model_predict(validation1.X)
|
|
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
|
c_model1_predict = getattr(c_model1, predict_method)
|
|
val2_pred1 = c_model1_predict(validation2.X)
|
|
|
|
report = EvaluationReport(name="rca_star")
|
|
for test in protocol():
|
|
try:
|
|
test_pred = c_model_predict(test.X)
|
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
|
c_model2_predict = getattr(c_model2, predict_method)
|
|
val2_pred2 = c_model2_predict(validation2.X)
|
|
rca_star_score = 1.0 - rcalib.get_score(
|
|
val2_pred1, val2_pred2, validation2.y
|
|
)
|
|
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
|
report.append_row(
|
|
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
|
)
|
|
except ValueError:
|
|
report.append_row(
|
|
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def logreg(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict",
|
|
):
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
val_preds = c_model_predict(validation.X)
|
|
|
|
report = EvaluationReport(name="logreg")
|
|
for test in protocol():
|
|
wx = iw.logreg(validation.X, validation.y, test.X)
|
|
test_preds = c_model_predict(test.X)
|
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
|
meta_score = abs(estim_acc - true_acc)
|
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
|
|
|
return report
|
|
|
|
|
|
@baseline
|
|
def kdex2(
|
|
c_model: BaseEstimator,
|
|
validation: LabelledCollection,
|
|
protocol: AbstractStochasticSeededProtocol,
|
|
predict_method="predict",
|
|
):
|
|
c_model_predict = getattr(c_model, predict_method)
|
|
|
|
val_preds = c_model_predict(validation.X)
|
|
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
|
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
|
|
|
report = EvaluationReport(name="kdex2")
|
|
for test in protocol():
|
|
Xte = test.X.toarray() if issparse(test.X) else test.X
|
|
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
|
test_preds = c_model_predict(Xte)
|
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
|
meta_score = abs(estim_acc - true_acc)
|
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
|
|
|
return report
|