producing tables in benchmarks

2024-04-08 15:25:29 +02:00 · 2024-04-08 15:25:29 +02:00 · 49a8cf3b0d
parent a04723a976
commit 49a8cf3b0d
6 changed files with 369 additions and 1 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "result_table"]
+	path = result_table
+	url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
--- a/quapy/benchmarking/init.py
+++ b/quapy/benchmarking/init.py
--- a/quapy/benchmarking/_base.py
+++ b/quapy/benchmarking/_base.py
@ -0,0 +1,313 @@
+import itertools
+import os
+from copy import deepcopy
+from os.path import join
+from dataclasses import dataclass
+from typing import List, Union, Callable
+from abc import ABC, abstractmethod
+
+import numpy as np
+import pandas as pd
+import pickle
+
+from sklearn.linear_model import LogisticRegression
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import PACC
+from quapy.protocol import APP, UPP, AbstractProtocol
+from quapy.model_selection import GridSearchQ
+from quapy.method.base import BaseQuantifier
+from result_table.src.table import Table
+
+
+def makedirs(dir):
+    print('creating ', dir)
+    os.makedirs(dir, exist_ok=True)
+
+
+@dataclass
+class MethodDescriptor:
+    id: str
+    name: str
+    instance: BaseQuantifier
+    hyperparams: dict
+
+
+class Benchmark(ABC):
+
+    ID_SEPARATOR = '__'  # used to separate components in a run-ID, cannot be used within the component IDs
+
+    def __init__(self, home_dir, n_jobs=3):
+        self.home_dir = home_dir
+        self.n_jobs = n_jobs
+        assert n_jobs!=-1, ('Setting n_jobs=-1 will probably blow your memory. '
+                            'Specify a positive number.')
+        makedirs(home_dir)
+        makedirs(join(home_dir, 'results'))
+        makedirs(join(home_dir, 'params'))
+        makedirs(join(home_dir, 'tables'))
+        makedirs(join(home_dir, 'plots'))
+
+    def _run_id(self, method: MethodDescriptor, dataset: str):
+        sep = Benchmark.ID_SEPARATOR
+        assert sep not in method.id, \
+            (f'separator {sep} cannot be used in method ID ({method.id}), '
+             f'please change the method ID or redefine {Benchmark.ID_SEPARATOR=}')
+        assert sep not in dataset, \
+            (f'separator {sep} cannot be used in dataset name ({dataset}), '
+             f'please redefine {Benchmark.ID_SEPARATOR=}')
+        return sep.join([method.id, dataset])
+
+    def _result_path(self, method: MethodDescriptor, dataset: str):
+        id = self._run_id(method, dataset)
+        return join(self.home_dir, 'results', id + '.pkl')
+
+    def _params_path(self, method: MethodDescriptor, dataset: str):
+        id = self._run_id(method, dataset)
+        chosen = join(self.home_dir, 'params', id + 'chosen.pkl')
+        scores = join(self.home_dir, 'params', id + 'scores.pkl')
+        return chosen, scores
+
+    def _exist_run(self, method: MethodDescriptor, dataset: str):
+        return os.path.exists(self._result_path(method, dataset))
+
+    def _open_method_dataset_result(self, method: MethodDescriptor, dataset: str):
+        if not self._exist_run(method, dataset):
+            raise ValueError(f'cannot open result for method={method.id} and {dataset=}')
+
+    def check_dataset(self, dataset:str):
+        assert dataset in self.list_datasets(), f'unknown dataset {dataset}'
+
+    @abstractmethod
+    def list_datasets(self)-> List[str]:
+        ...
+
+    @abstractmethod
+    def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0)-> pd.DataFrame:
+        ...
+
+    def gen_tables(self, results, metrics=None):
+        if metrics is None:
+            metrics = ['mae', 'mrae', 'mkld', 'mnkld']
+        tables = {}
+        for (method, dataset, result) in results:
+            col_metrics = result.columns.values[2:]
+            for metric in metrics:
+                if metric not in col_metrics:
+                    print(f'error; requested {metric=} not found among the columns in the dataframe')
+                    continue
+                if metric not in tables:
+                    tables[metric] = Table(name=metric)
+                table = tables[metric]
+                table.add(dataset, method.name, result[metric].values)
+        Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
+
+
+    def gen_plots(self):
+        pass
+
+    def show_report(self, method, dataset, report: pd.DataFrame):
+        id = method.id
+        MAE = report['mae'].mean()
+        mae_std = report['mae'].std()
+        MRAE = report['mrae'].mean()
+        mrae_std = report['mrae'].std()
+        print(f'{id}\t{dataset}:\t{MAE=:.4f}+-{mae_std:.4f}\t{MRAE=:.4f}+-{mrae_std:.4f}')
+
+    def run(self,
+            methods: Union[List[MethodDescriptor], MethodDescriptor],
+            datasets:Union[List[str],str]=None,
+            force=False):
+
+        if not isinstance(methods, list):
+            methods = [methods]
+
+        if datasets is None:
+            datasets = self.list_datasets()
+        elif not isinstance(datasets, list):
+            datasets = [datasets]
+
+        results = []
+        pending_job_args = []
+        for method, dataset in itertools.product(methods, datasets):
+            self.check_dataset(dataset)
+            if not force and self._exist_run(method, dataset):
+                result = pd.read_pickle(self._result_path(method, dataset))
+                results.append((method, dataset, result))
+            else:
+                pending_job_args.append((method, dataset))
+
+        if len(pending_job_args)>0:
+            remaining_results = qp.util.parallel_unpack(
+                func=self.run_method_dataset,
+                args=pending_job_args,
+                n_jobs=self.n_jobs,
+                seed=0,
+                asarray=False
+            )
+            results += [(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)]
+
+        # print results
+        for method, dataset, result in results:
+            self.show_report(method, dataset, result)
+
+        self.gen_tables(results)
+        self.gen_plots()
+
+    # def gen_plots(self, methods=None):
+    #     if methods is None:
+
+
+
+    def __add__(self, other: 'Benchmark'):
+        return CombinedBenchmark(self, other, self.n_jobs)
+
+
+class CombinedBenchmark(Benchmark):
+
+    def __init__(self, benchmark_a:Benchmark, benchmark_b:Benchmark, n_jobs=-1):
+        self.router = {
+            **{dataset: benchmark_a for dataset in benchmark_a.list_datasets()},
+            **{dataset: benchmark_b for dataset in benchmark_b.list_datasets()}
+        }
+        self.datasets = benchmark_a.list_datasets() + benchmark_b.list_datasets()
+        self.n_jobs = n_jobs
+
+    def list_datasets(self) -> List[str]:
+        return self.datasets
+
+    def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0) -> pd.DataFrame:
+        return self.router[dataset].run_method_dataset(method, dataset, random_state)
+
+    def _exist_run(self, method: MethodDescriptor, dataset: str):
+        return self.router[dataset]._exist_run(method, dataset)
+
+
+class TypicalBenchmark(Benchmark):
+
+    # def __init__(self, home_dir, ):
+
+    @abstractmethod
+    def get_sample_size(self)-> int:
+        ...
+
+    @abstractmethod
+    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
+            (LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
+        ...
+
+    @abstractmethod
+    def get_target_error_for_modsel(self)-> Union[str, Callable]:
+        ...
+
+    def run_method_dataset(self, method: MethodDescriptor, dataset: str, random_state=0) -> pd.DataFrame:
+        print(f'Running method={method.id} in {dataset=}')
+
+        sample_size = self.get_sample_size()
+        qp.environ['SAMPLE_SIZE'] = sample_size
+
+        q = deepcopy(method.instance)
+        optim_for = self.get_target_error_for_modsel()
+
+        with qp.util.temp_seed(random_state):
+            # data split
+            trModSel, valprotModSel, trEval, teprotEval =  self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
+
+            # model selection
+            modsel = GridSearchQ(
+                model=q,
+                param_grid=method.hyperparams,
+                protocol=valprotModSel,
+                error=optim_for,
+                refit=False,
+                n_jobs=-1,
+                raise_errors=True,
+                verbose=True
+            ).fit(trModSel)
+
+            # fit on the whole training data
+            optimized_model = modsel.best_model_
+            optimized_model.fit(trEval)
+
+            # evaluation
+            report = qp.evaluation.evaluation_report(
+                model=optimized_model,
+                protocol=teprotEval,
+                error_metrics=qp.error.QUANTIFICATION_ERROR_NAMES
+            )
+
+            # data persistence
+            chosen_path, scores_path = self._params_path(method, dataset)
+            pickle.dump(modsel.best_params_, open(chosen_path, 'wb'), pickle.HIGHEST_PROTOCOL)
+            pickle.dump(modsel.param_scores_, open(scores_path, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+            result_path = self._result_path(method, dataset)
+            report.to_pickle(result_path)
+
+        return report
+
+
+class UCIBinaryBenchmark(TypicalBenchmark):
+
+    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
+            (LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
+        data = qp.datasets.fetch_UCIBinaryDataset(dataset)
+        trEval, teEval = data.train_test
+        trModsel, vaModsel = trEval.split_stratified()
+        valprotModsel = APP(vaModsel, n_prevalences=21, repeats=25)
+        testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
+        return trModsel, valprotModsel, trEval, testprotModsel
+
+    def get_sample_size(self) -> int:
+        return 100
+
+    def get_target_error_for_modsel(self) -> Union[str, Callable]:
+        return 'mae'
+
+    def list_datasets(self)->List[str]:
+        ignore = ['acute.a', 'acute.b', 'balance.2']
+        return [d for d in qp.datasets.UCI_BINARY_DATASETS if d not in ignore]
+
+
+class UCIMultiBenchmark(TypicalBenchmark):
+
+    def list_datasets(self) -> List[str]:
+        return qp.datasets.UCI_MULTICLASS_DATASETS
+
+    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
+            (LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
+        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+        trEval, teEval = data.train_test
+        trModsel, vaModsel = trEval.split_stratified()
+        valprotModsel = UPP(vaModsel, repeats=250)
+        testprotModsel = UPP(teEval, repeats=1000)
+        return trModsel, valprotModsel, trEval, testprotModsel
+
+    def get_sample_size(self) -> int:
+        return 500
+
+    def get_target_error_for_modsel(self) -> Union[str, Callable]:
+        return 'mae'
+
+
+if __name__ == '__main__':
+
+    from quapy.benchmarking.typical import *
+
+    # from quapy.method.aggregative import BayesianCC
+    # bayes = MethodDescriptor(
+    #     id='Bayesian',
+    #     name='Bayesian(LR)',
+    #     instance=BayesianCC(LogisticRegression()),
+    #     hyperparams=wrap_cls_params(lr_hyper)
+    # )
+
+    # bench_bin = UCIBinaryBenchmark('../../Benchmarks/UCIbinary')
+    bench_multi = UCIMultiBenchmark('../../Benchmarks/UCIMulti')
+    # bench = bench_bin + bench_multi
+    bench = bench_multi
+
+    bench.run(methods=[cc, pcc, acc, pacc, sld, sld_bcts])
+
+
--- a/quapy/benchmarking/typical.py
+++ b/quapy/benchmarking/typical.py
@ -0,0 +1,51 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
+from quapy.benchmarking._base import MethodDescriptor
+
+lr_hyper = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]}
+
+wrap_cls_params = lambda params: {'classifier__' + key: val for key, val in params.items()}
+
+cc = MethodDescriptor(
+    id='CC',
+    name='CC(LR)',
+    instance=CC(LogisticRegression()),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
+
+pcc = MethodDescriptor(
+    id='PCC',
+    name='PCC(LR)',
+    instance=PCC(LogisticRegression()),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
+
+acc = MethodDescriptor(
+    id='ACC',
+    name='ACC(LR)',
+    instance=ACC(LogisticRegression()),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
+
+pacc = MethodDescriptor(
+    id='PACC',
+    name='PACC(LR)',
+    instance=PACC(LogisticRegression()),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
+
+sld = MethodDescriptor(
+    id='SLD',
+    name='SLD',
+    instance=EMQ(LogisticRegression()),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
+
+sld_bcts = MethodDescriptor(
+    id='SLD-BCTS',
+    name='SLD-BCTS',
+    instance=EMQ(LogisticRegression(), recalib='bcts', exact_train_prev=False),
+    hyperparams=wrap_cls_params(lr_hyper)
+)
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
            raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
        if self.method not in ACC.METHODS:
            raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
-        if self.clipping not in ACC.NORMALIZATIONS:
+        if self.norm not in ACC.NORMALIZATIONS:
            raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")

    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c
				`@ -0,0 +1 @@`
				`Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c`