150 lines
4.2 KiB
Python
150 lines
4.2 KiB
Python
|
import multiprocessing
|
||
|
import time
|
||
|
|
||
|
import pandas as pd
|
||
|
import quapy as qp
|
||
|
from quapy.data import LabelledCollection
|
||
|
from quapy.protocol import (
|
||
|
APP,
|
||
|
AbstractStochasticSeededProtocol,
|
||
|
OnLabelledCollectionProtocol,
|
||
|
)
|
||
|
from sklearn.base import BaseEstimator
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
|
||
|
import quacc.error as error
|
||
|
import quacc.evaluation.baseline as baseline
|
||
|
from quacc.dataset import get_imdb, get_rcv1, get_spambase
|
||
|
from quacc.evaluation.report import EvaluationReport
|
||
|
|
||
|
from ..estimator import (
|
||
|
AccuracyEstimator,
|
||
|
BinaryQuantifierAccuracyEstimator,
|
||
|
MulticlassAccuracyEstimator,
|
||
|
)
|
||
|
|
||
|
qp.environ["SAMPLE_SIZE"] = 100
|
||
|
|
||
|
pd.set_option("display.float_format", "{:.4f}".format)
|
||
|
|
||
|
n_prevalences = 21
|
||
|
repreats = 100
|
||
|
|
||
|
|
||
|
def estimate(
|
||
|
estimator: AccuracyEstimator,
|
||
|
protocol: AbstractStochasticSeededProtocol,
|
||
|
):
|
||
|
# ensure that the protocol returns a LabelledCollection for each iteration
|
||
|
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
||
|
|
||
|
base_prevs, true_prevs, estim_prevs = [], [], []
|
||
|
for sample in protocol():
|
||
|
e_sample = estimator.extend(sample)
|
||
|
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||
|
base_prevs.append(sample.prevalence())
|
||
|
true_prevs.append(e_sample.prevalence())
|
||
|
estim_prevs.append(estim_prev)
|
||
|
|
||
|
return base_prevs, true_prevs, estim_prevs
|
||
|
|
||
|
|
||
|
def evaluation_report(
|
||
|
estimator: AccuracyEstimator,
|
||
|
protocol: AbstractStochasticSeededProtocol,
|
||
|
method: str,
|
||
|
) -> EvaluationReport:
|
||
|
base_prevs, true_prevs, estim_prevs = estimate(estimator, protocol)
|
||
|
report = EvaluationReport(prefix=method)
|
||
|
|
||
|
for base_prev, true_prev, estim_prev in zip(base_prevs, true_prevs, estim_prevs):
|
||
|
acc_score = error.acc(estim_prev)
|
||
|
f1_score = error.f1(estim_prev)
|
||
|
report.append_row(
|
||
|
base_prev,
|
||
|
acc_score=1. - acc_score,
|
||
|
acc = abs(error.acc(true_prev) - acc_score),
|
||
|
f1_score=f1_score,
|
||
|
f1=abs(error.f1(true_prev) - f1_score)
|
||
|
)
|
||
|
|
||
|
return report
|
||
|
|
||
|
|
||
|
def evaluate(
|
||
|
c_model: BaseEstimator,
|
||
|
validation: LabelledCollection,
|
||
|
protocol: AbstractStochasticSeededProtocol,
|
||
|
method: str,
|
||
|
):
|
||
|
estimator : AccuracyEstimator = {
|
||
|
"bin": BinaryQuantifierAccuracyEstimator,
|
||
|
"mul": MulticlassAccuracyEstimator,
|
||
|
}[method](c_model)
|
||
|
estimator.fit(validation)
|
||
|
return evaluation_report(estimator, protocol, method)
|
||
|
|
||
|
|
||
|
def evaluate_binary(model, validation, protocol):
|
||
|
return evaluate(model, validation, protocol, "bin")
|
||
|
|
||
|
|
||
|
def evaluate_multiclass(model, validation, protocol):
|
||
|
return evaluate(model, validation, protocol, "mul")
|
||
|
|
||
|
|
||
|
def fit_and_estimate(_estimate, train, validation, test):
|
||
|
model = LogisticRegression()
|
||
|
|
||
|
model.fit(*train.Xy)
|
||
|
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
|
||
|
start = time.time()
|
||
|
result = _estimate(model, validation, protocol)
|
||
|
end = time.time()
|
||
|
|
||
|
return {
|
||
|
"name": _estimate.__name__,
|
||
|
"result": result,
|
||
|
"time": end - start,
|
||
|
}
|
||
|
|
||
|
|
||
|
def evaluate_comparison(dataset: str, **kwargs) -> EvaluationReport:
|
||
|
train, validation, test = {
|
||
|
"spambase": get_spambase,
|
||
|
"imdb": get_imdb,
|
||
|
"rcv1": get_rcv1,
|
||
|
}[dataset](**kwargs)
|
||
|
|
||
|
for k,v in kwargs.items():
|
||
|
print(k, ":", v)
|
||
|
|
||
|
prevs = {
|
||
|
"train": train.prevalence(),
|
||
|
"validation": validation.prevalence(),
|
||
|
}
|
||
|
|
||
|
start = time.time()
|
||
|
with multiprocessing.Pool(8) as pool:
|
||
|
estimators = [
|
||
|
evaluate_binary,
|
||
|
evaluate_multiclass,
|
||
|
baseline.kfcv,
|
||
|
baseline.atc_mc,
|
||
|
baseline.atc_ne,
|
||
|
baseline.doc_feat,
|
||
|
baseline.rca_score,
|
||
|
baseline.rca_star_score,
|
||
|
]
|
||
|
tasks = [(estim, train, validation, test) for estim in estimators]
|
||
|
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
|
||
|
results = list(map(lambda r: r.get(), results))
|
||
|
er = EvaluationReport.combine_reports(*list(map(lambda r: r["result"], results)))
|
||
|
times = {r["name"]:r["time"] for r in results}
|
||
|
end = time.time()
|
||
|
times["tot"] = end - start
|
||
|
er.times = times
|
||
|
er.prevs = prevs
|
||
|
|
||
|
return er
|