QuAcc/quacc/evaluation/baseline.py

300 lines
9.6 KiB
Python

from functools import wraps
from statistics import mean
import numpy as np
import sklearn.metrics as metrics
from quapy.data import LabelledCollection
from quapy.protocol import AbstractStochasticSeededProtocol
from scipy.sparse import issparse
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_validate
import baselines.atc as atc
import baselines.doc as doc
import baselines.impweight as iw
import baselines.rca as rcalib
from .report import EvaluationReport
_baselines = {}
def baseline(func):
@wraps(func)
def wrapper(c_model, validation, protocol):
return func(c_model, validation, protocol)
_baselines[func.__name__] = wrapper
return wrapper
@baseline
def kfcv(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
c_model_predict = getattr(c_model, predict_method)
scoring = ["accuracy", "f1_macro"]
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
acc_score = mean(scores["test_accuracy"])
f1_score = mean(scores["test_f1_macro"])
report = EvaluationReport(name="kfcv")
for test in protocol():
test_preds = c_model_predict(test.X)
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
report.append_row(
test.prevalence(),
acc_score=acc_score,
f1_score=f1_score,
acc=meta_acc,
f1=meta_f1,
)
return report
@baseline
def ref(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
):
c_model_predict = getattr(c_model, "predict")
report = EvaluationReport(name="ref")
for test in protocol():
test_preds = c_model_predict(test.X)
report.append_row(
test.prevalence(),
acc_score=metrics.accuracy_score(test.y, test_preds),
f1_score=metrics.f1_score(test.y, test_preds),
)
return report
@baseline
def atc_mc(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict_proba",
):
"""garg"""
c_model_predict = getattr(c_model, predict_method)
## Load ID validation data probs and labels
val_probs, val_labels = c_model_predict(validation.X), validation.y
## score function, e.g., negative entropy or argmax confidence
val_scores = atc.get_max_conf(val_probs)
val_preds = np.argmax(val_probs, axis=-1)
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
report = EvaluationReport(name="atc_mc")
for test in protocol():
## Load OOD test data probs
test_probs = c_model_predict(test.X)
test_preds = np.argmax(test_probs, axis=-1)
test_scores = atc.get_max_conf(test_probs)
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
report.append_row(
test.prevalence(),
acc=meta_acc,
acc_score=atc_accuracy,
f1_score=f1_score,
f1=meta_f1,
)
return report
@baseline
def atc_ne(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict_proba",
):
"""garg"""
c_model_predict = getattr(c_model, predict_method)
## Load ID validation data probs and labels
val_probs, val_labels = c_model_predict(validation.X), validation.y
## score function, e.g., negative entropy or argmax confidence
val_scores = atc.get_entropy(val_probs)
val_preds = np.argmax(val_probs, axis=-1)
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
report = EvaluationReport(name="atc_ne")
for test in protocol():
## Load OOD test data probs
test_probs = c_model_predict(test.X)
test_preds = np.argmax(test_probs, axis=-1)
test_scores = atc.get_entropy(test_probs)
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
report.append_row(
test.prevalence(),
acc=meta_acc,
acc_score=atc_accuracy,
f1_score=f1_score,
f1=meta_f1,
)
return report
@baseline
def doc_feat(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict_proba",
):
c_model_predict = getattr(c_model, predict_method)
val_probs, val_labels = c_model_predict(validation.X), validation.y
val_scores = np.max(val_probs, axis=-1)
val_preds = np.argmax(val_probs, axis=-1)
v1acc = np.mean(val_preds == val_labels) * 100
report = EvaluationReport(name="doc_feat")
for test in protocol():
test_probs = c_model_predict(test.X)
test_preds = np.argmax(test_probs, axis=-1)
test_scores = np.max(test_probs, axis=-1)
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
return report
@baseline
def rca(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
"""elsahar19"""
c_model_predict = getattr(c_model, predict_method)
val_pred1 = c_model_predict(validation.X)
report = EvaluationReport(name="rca")
for test in protocol():
try:
test_pred = c_model_predict(test.X)
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
c_model2_predict = getattr(c_model2, predict_method)
val_pred2 = c_model2_predict(validation.X)
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
except ValueError:
report.append_row(
test.prevalence(), acc=float("nan"), acc_score=float("nan")
)
return report
@baseline
def rca_star(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
"""elsahar19"""
c_model_predict = getattr(c_model, predict_method)
validation1, validation2 = validation.split_stratified(
train_prop=0.5, random_state=0
)
val1_pred = c_model_predict(validation1.X)
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
c_model1_predict = getattr(c_model1, predict_method)
val2_pred1 = c_model1_predict(validation2.X)
report = EvaluationReport(name="rca_star")
for test in protocol():
try:
test_pred = c_model_predict(test.X)
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
c_model2_predict = getattr(c_model2, predict_method)
val2_pred2 = c_model2_predict(validation2.X)
rca_star_score = 1.0 - rcalib.get_score(
val2_pred1, val2_pred2, validation2.y
)
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
report.append_row(
test.prevalence(), acc=meta_score, acc_score=rca_star_score
)
except ValueError:
report.append_row(
test.prevalence(), acc=float("nan"), acc_score=float("nan")
)
return report
@baseline
def logreg(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
c_model_predict = getattr(c_model, predict_method)
val_preds = c_model_predict(validation.X)
report = EvaluationReport(name="logreg")
for test in protocol():
wx = iw.logreg(validation.X, validation.y, test.X)
test_preds = c_model_predict(test.X)
estim_acc = iw.get_acc(val_preds, validation.y, wx)
true_acc = metrics.accuracy_score(test.y, test_preds)
meta_score = abs(estim_acc - true_acc)
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
return report
@baseline
def kdex2(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
c_model_predict = getattr(c_model, predict_method)
val_preds = c_model_predict(validation.X)
log_likelihood_val = iw.kdex2_lltr(validation.X)
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
report = EvaluationReport(name="kdex2")
for test in protocol():
Xte = test.X.toarray() if issparse(test.X) else test.X
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
test_preds = c_model_predict(Xte)
estim_acc = iw.get_acc(val_preds, validation.y, wx)
true_acc = metrics.accuracy_score(test.y, test_preds)
meta_score = abs(estim_acc - true_acc)
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
return report