producing tables in benchmarks

This commit is contained in:
Alejandro Moreo Fernandez 2024-04-08 15:25:29 +02:00
parent a04723a976
commit 49a8cf3b0d
6 changed files with 369 additions and 1 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "result_table"]
path = result_table
url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git

View File

313
quapy/benchmarking/_base.py Normal file
View File

@ -0,0 +1,313 @@
import itertools
import os
from copy import deepcopy
from os.path import join
from dataclasses import dataclass
from typing import List, Union, Callable
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.aggregative import PACC
from quapy.protocol import APP, UPP, AbstractProtocol
from quapy.model_selection import GridSearchQ
from quapy.method.base import BaseQuantifier
from result_table.src.table import Table
def makedirs(dir):
print('creating ', dir)
os.makedirs(dir, exist_ok=True)
@dataclass
class MethodDescriptor:
id: str
name: str
instance: BaseQuantifier
hyperparams: dict
class Benchmark(ABC):
ID_SEPARATOR = '__' # used to separate components in a run-ID, cannot be used within the component IDs
def __init__(self, home_dir, n_jobs=3):
self.home_dir = home_dir
self.n_jobs = n_jobs
assert n_jobs!=-1, ('Setting n_jobs=-1 will probably blow your memory. '
'Specify a positive number.')
makedirs(home_dir)
makedirs(join(home_dir, 'results'))
makedirs(join(home_dir, 'params'))
makedirs(join(home_dir, 'tables'))
makedirs(join(home_dir, 'plots'))
def _run_id(self, method: MethodDescriptor, dataset: str):
sep = Benchmark.ID_SEPARATOR
assert sep not in method.id, \
(f'separator {sep} cannot be used in method ID ({method.id}), '
f'please change the method ID or redefine {Benchmark.ID_SEPARATOR=}')
assert sep not in dataset, \
(f'separator {sep} cannot be used in dataset name ({dataset}), '
f'please redefine {Benchmark.ID_SEPARATOR=}')
return sep.join([method.id, dataset])
def _result_path(self, method: MethodDescriptor, dataset: str):
id = self._run_id(method, dataset)
return join(self.home_dir, 'results', id + '.pkl')
def _params_path(self, method: MethodDescriptor, dataset: str):
id = self._run_id(method, dataset)
chosen = join(self.home_dir, 'params', id + 'chosen.pkl')
scores = join(self.home_dir, 'params', id + 'scores.pkl')
return chosen, scores
def _exist_run(self, method: MethodDescriptor, dataset: str):
return os.path.exists(self._result_path(method, dataset))
def _open_method_dataset_result(self, method: MethodDescriptor, dataset: str):
if not self._exist_run(method, dataset):
raise ValueError(f'cannot open result for method={method.id} and {dataset=}')
def check_dataset(self, dataset:str):
assert dataset in self.list_datasets(), f'unknown dataset {dataset}'
@abstractmethod
def list_datasets(self)-> List[str]:
...
@abstractmethod
def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0)-> pd.DataFrame:
...
def gen_tables(self, results, metrics=None):
if metrics is None:
metrics = ['mae', 'mrae', 'mkld', 'mnkld']
tables = {}
for (method, dataset, result) in results:
col_metrics = result.columns.values[2:]
for metric in metrics:
if metric not in col_metrics:
print(f'error; requested {metric=} not found among the columns in the dataframe')
continue
if metric not in tables:
tables[metric] = Table(name=metric)
table = tables[metric]
table.add(dataset, method.name, result[metric].values)
Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
def gen_plots(self):
pass
def show_report(self, method, dataset, report: pd.DataFrame):
id = method.id
MAE = report['mae'].mean()
mae_std = report['mae'].std()
MRAE = report['mrae'].mean()
mrae_std = report['mrae'].std()
print(f'{id}\t{dataset}:\t{MAE=:.4f}+-{mae_std:.4f}\t{MRAE=:.4f}+-{mrae_std:.4f}')
def run(self,
methods: Union[List[MethodDescriptor], MethodDescriptor],
datasets:Union[List[str],str]=None,
force=False):
if not isinstance(methods, list):
methods = [methods]
if datasets is None:
datasets = self.list_datasets()
elif not isinstance(datasets, list):
datasets = [datasets]
results = []
pending_job_args = []
for method, dataset in itertools.product(methods, datasets):
self.check_dataset(dataset)
if not force and self._exist_run(method, dataset):
result = pd.read_pickle(self._result_path(method, dataset))
results.append((method, dataset, result))
else:
pending_job_args.append((method, dataset))
if len(pending_job_args)>0:
remaining_results = qp.util.parallel_unpack(
func=self.run_method_dataset,
args=pending_job_args,
n_jobs=self.n_jobs,
seed=0,
asarray=False
)
results += [(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)]
# print results
for method, dataset, result in results:
self.show_report(method, dataset, result)
self.gen_tables(results)
self.gen_plots()
# def gen_plots(self, methods=None):
# if methods is None:
def __add__(self, other: 'Benchmark'):
return CombinedBenchmark(self, other, self.n_jobs)
class CombinedBenchmark(Benchmark):
def __init__(self, benchmark_a:Benchmark, benchmark_b:Benchmark, n_jobs=-1):
self.router = {
**{dataset: benchmark_a for dataset in benchmark_a.list_datasets()},
**{dataset: benchmark_b for dataset in benchmark_b.list_datasets()}
}
self.datasets = benchmark_a.list_datasets() + benchmark_b.list_datasets()
self.n_jobs = n_jobs
def list_datasets(self) -> List[str]:
return self.datasets
def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0) -> pd.DataFrame:
return self.router[dataset].run_method_dataset(method, dataset, random_state)
def _exist_run(self, method: MethodDescriptor, dataset: str):
return self.router[dataset]._exist_run(method, dataset)
class TypicalBenchmark(Benchmark):
# def __init__(self, home_dir, ):
@abstractmethod
def get_sample_size(self)-> int:
...
@abstractmethod
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
(LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
...
@abstractmethod
def get_target_error_for_modsel(self)-> Union[str, Callable]:
...
def run_method_dataset(self, method: MethodDescriptor, dataset: str, random_state=0) -> pd.DataFrame:
print(f'Running method={method.id} in {dataset=}')
sample_size = self.get_sample_size()
qp.environ['SAMPLE_SIZE'] = sample_size
q = deepcopy(method.instance)
optim_for = self.get_target_error_for_modsel()
with qp.util.temp_seed(random_state):
# data split
trModSel, valprotModSel, trEval, teprotEval = self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
# model selection
modsel = GridSearchQ(
model=q,
param_grid=method.hyperparams,
protocol=valprotModSel,
error=optim_for,
refit=False,
n_jobs=-1,
raise_errors=True,
verbose=True
).fit(trModSel)
# fit on the whole training data
optimized_model = modsel.best_model_
optimized_model.fit(trEval)
# evaluation
report = qp.evaluation.evaluation_report(
model=optimized_model,
protocol=teprotEval,
error_metrics=qp.error.QUANTIFICATION_ERROR_NAMES
)
# data persistence
chosen_path, scores_path = self._params_path(method, dataset)
pickle.dump(modsel.best_params_, open(chosen_path, 'wb'), pickle.HIGHEST_PROTOCOL)
pickle.dump(modsel.param_scores_, open(scores_path, 'wb'), pickle.HIGHEST_PROTOCOL)
result_path = self._result_path(method, dataset)
report.to_pickle(result_path)
return report
class UCIBinaryBenchmark(TypicalBenchmark):
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
(LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
data = qp.datasets.fetch_UCIBinaryDataset(dataset)
trEval, teEval = data.train_test
trModsel, vaModsel = trEval.split_stratified()
valprotModsel = APP(vaModsel, n_prevalences=21, repeats=25)
testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
return trModsel, valprotModsel, trEval, testprotModsel
def get_sample_size(self) -> int:
return 100
def get_target_error_for_modsel(self) -> Union[str, Callable]:
return 'mae'
def list_datasets(self)->List[str]:
ignore = ['acute.a', 'acute.b', 'balance.2']
return [d for d in qp.datasets.UCI_BINARY_DATASETS if d not in ignore]
class UCIMultiBenchmark(TypicalBenchmark):
def list_datasets(self) -> List[str]:
return qp.datasets.UCI_MULTICLASS_DATASETS
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
(LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
trEval, teEval = data.train_test
trModsel, vaModsel = trEval.split_stratified()
valprotModsel = UPP(vaModsel, repeats=250)
testprotModsel = UPP(teEval, repeats=1000)
return trModsel, valprotModsel, trEval, testprotModsel
def get_sample_size(self) -> int:
return 500
def get_target_error_for_modsel(self) -> Union[str, Callable]:
return 'mae'
if __name__ == '__main__':
from quapy.benchmarking.typical import *
# from quapy.method.aggregative import BayesianCC
# bayes = MethodDescriptor(
# id='Bayesian',
# name='Bayesian(LR)',
# instance=BayesianCC(LogisticRegression()),
# hyperparams=wrap_cls_params(lr_hyper)
# )
# bench_bin = UCIBinaryBenchmark('../../Benchmarks/UCIbinary')
bench_multi = UCIMultiBenchmark('../../Benchmarks/UCIMulti')
# bench = bench_bin + bench_multi
bench = bench_multi
bench.run(methods=[cc, pcc, acc, pacc, sld, sld_bcts])

View File

@ -0,0 +1,51 @@
import numpy as np
from sklearn.linear_model import LogisticRegression
from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
from quapy.benchmarking._base import MethodDescriptor
lr_hyper = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]}
wrap_cls_params = lambda params: {'classifier__' + key: val for key, val in params.items()}
cc = MethodDescriptor(
id='CC',
name='CC(LR)',
instance=CC(LogisticRegression()),
hyperparams=wrap_cls_params(lr_hyper)
)
pcc = MethodDescriptor(
id='PCC',
name='PCC(LR)',
instance=PCC(LogisticRegression()),
hyperparams=wrap_cls_params(lr_hyper)
)
acc = MethodDescriptor(
id='ACC',
name='ACC(LR)',
instance=ACC(LogisticRegression()),
hyperparams=wrap_cls_params(lr_hyper)
)
pacc = MethodDescriptor(
id='PACC',
name='PACC(LR)',
instance=PACC(LogisticRegression()),
hyperparams=wrap_cls_params(lr_hyper)
)
sld = MethodDescriptor(
id='SLD',
name='SLD',
instance=EMQ(LogisticRegression()),
hyperparams=wrap_cls_params(lr_hyper)
)
sld_bcts = MethodDescriptor(
id='SLD-BCTS',
name='SLD-BCTS',
instance=EMQ(LogisticRegression(), recalib='bcts', exact_train_prev=False),
hyperparams=wrap_cls_params(lr_hyper)
)

View File

@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
if self.method not in ACC.METHODS:
raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
if self.clipping not in ACC.NORMALIZATIONS:
if self.norm not in ACC.NORMALIZATIONS:
raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):

1
result_table Submodule

@ -0,0 +1 @@
Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c