forked from moreo/QuaPy
producing tables in benchmarks
This commit is contained in:
parent
a04723a976
commit
49a8cf3b0d
|
@ -0,0 +1,3 @@
|
|||
[submodule "result_table"]
|
||||
path = result_table
|
||||
url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
|
|
@ -0,0 +1,313 @@
|
|||
import itertools
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from os.path import join
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Union, Callable
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pickle
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import PACC
|
||||
from quapy.protocol import APP, UPP, AbstractProtocol
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.method.base import BaseQuantifier
|
||||
from result_table.src.table import Table
|
||||
|
||||
|
||||
def makedirs(dir):
|
||||
print('creating ', dir)
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MethodDescriptor:
|
||||
id: str
|
||||
name: str
|
||||
instance: BaseQuantifier
|
||||
hyperparams: dict
|
||||
|
||||
|
||||
class Benchmark(ABC):
|
||||
|
||||
ID_SEPARATOR = '__' # used to separate components in a run-ID, cannot be used within the component IDs
|
||||
|
||||
def __init__(self, home_dir, n_jobs=3):
|
||||
self.home_dir = home_dir
|
||||
self.n_jobs = n_jobs
|
||||
assert n_jobs!=-1, ('Setting n_jobs=-1 will probably blow your memory. '
|
||||
'Specify a positive number.')
|
||||
makedirs(home_dir)
|
||||
makedirs(join(home_dir, 'results'))
|
||||
makedirs(join(home_dir, 'params'))
|
||||
makedirs(join(home_dir, 'tables'))
|
||||
makedirs(join(home_dir, 'plots'))
|
||||
|
||||
def _run_id(self, method: MethodDescriptor, dataset: str):
|
||||
sep = Benchmark.ID_SEPARATOR
|
||||
assert sep not in method.id, \
|
||||
(f'separator {sep} cannot be used in method ID ({method.id}), '
|
||||
f'please change the method ID or redefine {Benchmark.ID_SEPARATOR=}')
|
||||
assert sep not in dataset, \
|
||||
(f'separator {sep} cannot be used in dataset name ({dataset}), '
|
||||
f'please redefine {Benchmark.ID_SEPARATOR=}')
|
||||
return sep.join([method.id, dataset])
|
||||
|
||||
def _result_path(self, method: MethodDescriptor, dataset: str):
|
||||
id = self._run_id(method, dataset)
|
||||
return join(self.home_dir, 'results', id + '.pkl')
|
||||
|
||||
def _params_path(self, method: MethodDescriptor, dataset: str):
|
||||
id = self._run_id(method, dataset)
|
||||
chosen = join(self.home_dir, 'params', id + 'chosen.pkl')
|
||||
scores = join(self.home_dir, 'params', id + 'scores.pkl')
|
||||
return chosen, scores
|
||||
|
||||
def _exist_run(self, method: MethodDescriptor, dataset: str):
|
||||
return os.path.exists(self._result_path(method, dataset))
|
||||
|
||||
def _open_method_dataset_result(self, method: MethodDescriptor, dataset: str):
|
||||
if not self._exist_run(method, dataset):
|
||||
raise ValueError(f'cannot open result for method={method.id} and {dataset=}')
|
||||
|
||||
def check_dataset(self, dataset:str):
|
||||
assert dataset in self.list_datasets(), f'unknown dataset {dataset}'
|
||||
|
||||
@abstractmethod
|
||||
def list_datasets(self)-> List[str]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0)-> pd.DataFrame:
|
||||
...
|
||||
|
||||
def gen_tables(self, results, metrics=None):
|
||||
if metrics is None:
|
||||
metrics = ['mae', 'mrae', 'mkld', 'mnkld']
|
||||
tables = {}
|
||||
for (method, dataset, result) in results:
|
||||
col_metrics = result.columns.values[2:]
|
||||
for metric in metrics:
|
||||
if metric not in col_metrics:
|
||||
print(f'error; requested {metric=} not found among the columns in the dataframe')
|
||||
continue
|
||||
if metric not in tables:
|
||||
tables[metric] = Table(name=metric)
|
||||
table = tables[metric]
|
||||
table.add(dataset, method.name, result[metric].values)
|
||||
Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
|
||||
|
||||
|
||||
def gen_plots(self):
|
||||
pass
|
||||
|
||||
def show_report(self, method, dataset, report: pd.DataFrame):
|
||||
id = method.id
|
||||
MAE = report['mae'].mean()
|
||||
mae_std = report['mae'].std()
|
||||
MRAE = report['mrae'].mean()
|
||||
mrae_std = report['mrae'].std()
|
||||
print(f'{id}\t{dataset}:\t{MAE=:.4f}+-{mae_std:.4f}\t{MRAE=:.4f}+-{mrae_std:.4f}')
|
||||
|
||||
def run(self,
|
||||
methods: Union[List[MethodDescriptor], MethodDescriptor],
|
||||
datasets:Union[List[str],str]=None,
|
||||
force=False):
|
||||
|
||||
if not isinstance(methods, list):
|
||||
methods = [methods]
|
||||
|
||||
if datasets is None:
|
||||
datasets = self.list_datasets()
|
||||
elif not isinstance(datasets, list):
|
||||
datasets = [datasets]
|
||||
|
||||
results = []
|
||||
pending_job_args = []
|
||||
for method, dataset in itertools.product(methods, datasets):
|
||||
self.check_dataset(dataset)
|
||||
if not force and self._exist_run(method, dataset):
|
||||
result = pd.read_pickle(self._result_path(method, dataset))
|
||||
results.append((method, dataset, result))
|
||||
else:
|
||||
pending_job_args.append((method, dataset))
|
||||
|
||||
if len(pending_job_args)>0:
|
||||
remaining_results = qp.util.parallel_unpack(
|
||||
func=self.run_method_dataset,
|
||||
args=pending_job_args,
|
||||
n_jobs=self.n_jobs,
|
||||
seed=0,
|
||||
asarray=False
|
||||
)
|
||||
results += [(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)]
|
||||
|
||||
# print results
|
||||
for method, dataset, result in results:
|
||||
self.show_report(method, dataset, result)
|
||||
|
||||
self.gen_tables(results)
|
||||
self.gen_plots()
|
||||
|
||||
# def gen_plots(self, methods=None):
|
||||
# if methods is None:
|
||||
|
||||
|
||||
|
||||
def __add__(self, other: 'Benchmark'):
|
||||
return CombinedBenchmark(self, other, self.n_jobs)
|
||||
|
||||
|
||||
class CombinedBenchmark(Benchmark):
|
||||
|
||||
def __init__(self, benchmark_a:Benchmark, benchmark_b:Benchmark, n_jobs=-1):
|
||||
self.router = {
|
||||
**{dataset: benchmark_a for dataset in benchmark_a.list_datasets()},
|
||||
**{dataset: benchmark_b for dataset in benchmark_b.list_datasets()}
|
||||
}
|
||||
self.datasets = benchmark_a.list_datasets() + benchmark_b.list_datasets()
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
def list_datasets(self) -> List[str]:
|
||||
return self.datasets
|
||||
|
||||
def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0) -> pd.DataFrame:
|
||||
return self.router[dataset].run_method_dataset(method, dataset, random_state)
|
||||
|
||||
def _exist_run(self, method: MethodDescriptor, dataset: str):
|
||||
return self.router[dataset]._exist_run(method, dataset)
|
||||
|
||||
|
||||
class TypicalBenchmark(Benchmark):
|
||||
|
||||
# def __init__(self, home_dir, ):
|
||||
|
||||
@abstractmethod
|
||||
def get_sample_size(self)-> int:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
|
||||
(LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def get_target_error_for_modsel(self)-> Union[str, Callable]:
|
||||
...
|
||||
|
||||
def run_method_dataset(self, method: MethodDescriptor, dataset: str, random_state=0) -> pd.DataFrame:
|
||||
print(f'Running method={method.id} in {dataset=}')
|
||||
|
||||
sample_size = self.get_sample_size()
|
||||
qp.environ['SAMPLE_SIZE'] = sample_size
|
||||
|
||||
q = deepcopy(method.instance)
|
||||
optim_for = self.get_target_error_for_modsel()
|
||||
|
||||
with qp.util.temp_seed(random_state):
|
||||
# data split
|
||||
trModSel, valprotModSel, trEval, teprotEval = self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
|
||||
|
||||
# model selection
|
||||
modsel = GridSearchQ(
|
||||
model=q,
|
||||
param_grid=method.hyperparams,
|
||||
protocol=valprotModSel,
|
||||
error=optim_for,
|
||||
refit=False,
|
||||
n_jobs=-1,
|
||||
raise_errors=True,
|
||||
verbose=True
|
||||
).fit(trModSel)
|
||||
|
||||
# fit on the whole training data
|
||||
optimized_model = modsel.best_model_
|
||||
optimized_model.fit(trEval)
|
||||
|
||||
# evaluation
|
||||
report = qp.evaluation.evaluation_report(
|
||||
model=optimized_model,
|
||||
protocol=teprotEval,
|
||||
error_metrics=qp.error.QUANTIFICATION_ERROR_NAMES
|
||||
)
|
||||
|
||||
# data persistence
|
||||
chosen_path, scores_path = self._params_path(method, dataset)
|
||||
pickle.dump(modsel.best_params_, open(chosen_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||
pickle.dump(modsel.param_scores_, open(scores_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
result_path = self._result_path(method, dataset)
|
||||
report.to_pickle(result_path)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
class UCIBinaryBenchmark(TypicalBenchmark):
|
||||
|
||||
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
|
||||
(LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
|
||||
data = qp.datasets.fetch_UCIBinaryDataset(dataset)
|
||||
trEval, teEval = data.train_test
|
||||
trModsel, vaModsel = trEval.split_stratified()
|
||||
valprotModsel = APP(vaModsel, n_prevalences=21, repeats=25)
|
||||
testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
|
||||
return trModsel, valprotModsel, trEval, testprotModsel
|
||||
|
||||
def get_sample_size(self) -> int:
|
||||
return 100
|
||||
|
||||
def get_target_error_for_modsel(self) -> Union[str, Callable]:
|
||||
return 'mae'
|
||||
|
||||
def list_datasets(self)->List[str]:
|
||||
ignore = ['acute.a', 'acute.b', 'balance.2']
|
||||
return [d for d in qp.datasets.UCI_BINARY_DATASETS if d not in ignore]
|
||||
|
||||
|
||||
class UCIMultiBenchmark(TypicalBenchmark):
|
||||
|
||||
def list_datasets(self) -> List[str]:
|
||||
return qp.datasets.UCI_MULTICLASS_DATASETS
|
||||
|
||||
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
|
||||
(LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
|
||||
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
|
||||
trEval, teEval = data.train_test
|
||||
trModsel, vaModsel = trEval.split_stratified()
|
||||
valprotModsel = UPP(vaModsel, repeats=250)
|
||||
testprotModsel = UPP(teEval, repeats=1000)
|
||||
return trModsel, valprotModsel, trEval, testprotModsel
|
||||
|
||||
def get_sample_size(self) -> int:
|
||||
return 500
|
||||
|
||||
def get_target_error_for_modsel(self) -> Union[str, Callable]:
|
||||
return 'mae'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
from quapy.benchmarking.typical import *
|
||||
|
||||
# from quapy.method.aggregative import BayesianCC
|
||||
# bayes = MethodDescriptor(
|
||||
# id='Bayesian',
|
||||
# name='Bayesian(LR)',
|
||||
# instance=BayesianCC(LogisticRegression()),
|
||||
# hyperparams=wrap_cls_params(lr_hyper)
|
||||
# )
|
||||
|
||||
# bench_bin = UCIBinaryBenchmark('../../Benchmarks/UCIbinary')
|
||||
bench_multi = UCIMultiBenchmark('../../Benchmarks/UCIMulti')
|
||||
# bench = bench_bin + bench_multi
|
||||
bench = bench_multi
|
||||
|
||||
bench.run(methods=[cc, pcc, acc, pacc, sld, sld_bcts])
|
||||
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
|
||||
from quapy.benchmarking._base import MethodDescriptor
|
||||
|
||||
lr_hyper = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]}
|
||||
|
||||
wrap_cls_params = lambda params: {'classifier__' + key: val for key, val in params.items()}
|
||||
|
||||
cc = MethodDescriptor(
|
||||
id='CC',
|
||||
name='CC(LR)',
|
||||
instance=CC(LogisticRegression()),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
||||
|
||||
pcc = MethodDescriptor(
|
||||
id='PCC',
|
||||
name='PCC(LR)',
|
||||
instance=PCC(LogisticRegression()),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
||||
|
||||
acc = MethodDescriptor(
|
||||
id='ACC',
|
||||
name='ACC(LR)',
|
||||
instance=ACC(LogisticRegression()),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
||||
|
||||
pacc = MethodDescriptor(
|
||||
id='PACC',
|
||||
name='PACC(LR)',
|
||||
instance=PACC(LogisticRegression()),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
||||
|
||||
sld = MethodDescriptor(
|
||||
id='SLD',
|
||||
name='SLD',
|
||||
instance=EMQ(LogisticRegression()),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
||||
|
||||
sld_bcts = MethodDescriptor(
|
||||
id='SLD-BCTS',
|
||||
name='SLD-BCTS',
|
||||
instance=EMQ(LogisticRegression(), recalib='bcts', exact_train_prev=False),
|
||||
hyperparams=wrap_cls_params(lr_hyper)
|
||||
)
|
|
@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
|
|||
raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
|
||||
if self.method not in ACC.METHODS:
|
||||
raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
|
||||
if self.clipping not in ACC.NORMALIZATIONS:
|
||||
if self.norm not in ACC.NORMALIZATIONS:
|
||||
raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")
|
||||
|
||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c
|
Loading…
Reference in New Issue