QuAcc/quacc/evaluation/method.py

150 lines
4.2 KiB
Python
Raw Normal View History

import multiprocessing
import time
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection
from quapy.protocol import (
APP,
AbstractStochasticSeededProtocol,
OnLabelledCollectionProtocol,
)
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import quacc.error as error
import quacc.evaluation.baseline as baseline
from quacc.dataset import get_imdb, get_rcv1, get_spambase
from quacc.evaluation.report import EvaluationReport
from ..estimator import (
AccuracyEstimator,
BinaryQuantifierAccuracyEstimator,
MulticlassAccuracyEstimator,
)
qp.environ["SAMPLE_SIZE"] = 100
pd.set_option("display.float_format", "{:.4f}".format)
n_prevalences = 21
repreats = 100
def estimate(
estimator: AccuracyEstimator,
protocol: AbstractStochasticSeededProtocol,
):
# ensure that the protocol returns a LabelledCollection for each iteration
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
base_prevs, true_prevs, estim_prevs = [], [], []
for sample in protocol():
e_sample = estimator.extend(sample)
estim_prev = estimator.estimate(e_sample.X, ext=True)
base_prevs.append(sample.prevalence())
true_prevs.append(e_sample.prevalence())
estim_prevs.append(estim_prev)
return base_prevs, true_prevs, estim_prevs
def evaluation_report(
estimator: AccuracyEstimator,
protocol: AbstractStochasticSeededProtocol,
method: str,
) -> EvaluationReport:
base_prevs, true_prevs, estim_prevs = estimate(estimator, protocol)
report = EvaluationReport(prefix=method)
for base_prev, true_prev, estim_prev in zip(base_prevs, true_prevs, estim_prevs):
acc_score = error.acc(estim_prev)
f1_score = error.f1(estim_prev)
report.append_row(
base_prev,
acc_score=1. - acc_score,
acc = abs(error.acc(true_prev) - acc_score),
f1_score=f1_score,
f1=abs(error.f1(true_prev) - f1_score)
)
return report
def evaluate(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
method: str,
):
estimator : AccuracyEstimator = {
"bin": BinaryQuantifierAccuracyEstimator,
"mul": MulticlassAccuracyEstimator,
}[method](c_model)
estimator.fit(validation)
return evaluation_report(estimator, protocol, method)
def evaluate_binary(model, validation, protocol):
return evaluate(model, validation, protocol, "bin")
def evaluate_multiclass(model, validation, protocol):
return evaluate(model, validation, protocol, "mul")
def fit_and_estimate(_estimate, train, validation, test):
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
start = time.time()
result = _estimate(model, validation, protocol)
end = time.time()
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison(dataset: str, **kwargs) -> EvaluationReport:
train, validation, test = {
"spambase": get_spambase,
"imdb": get_imdb,
"rcv1": get_rcv1,
}[dataset](**kwargs)
for k,v in kwargs.items():
print(k, ":", v)
prevs = {
"train": train.prevalence(),
"validation": validation.prevalence(),
}
start = time.time()
with multiprocessing.Pool(8) as pool:
estimators = [
evaluate_binary,
evaluate_multiclass,
baseline.kfcv,
baseline.atc_mc,
baseline.atc_ne,
baseline.doc_feat,
baseline.rca_score,
baseline.rca_star_score,
]
tasks = [(estim, train, validation, test) for estim in estimators]
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
results = list(map(lambda r: r.get(), results))
er = EvaluationReport.combine_reports(*list(map(lambda r: r["result"], results)))
times = {r["name"]:r["time"] for r in results}
end = time.time()
times["tot"] = end - start
er.times = times
er.prevs = prevs
return er