QuaPy/BayesianKDEy/single_experiment_debug.py

import os
import warnings
from os.path import join
from pathlib import Path

from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
from build.lib.quapy.data import LabelledCollection
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
from quapy.method.base import BinaryQuantifier, BaseQuantifier
from quapy.model_selection import GridSearchQ
from quapy.data import Dataset
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
from quapy.functional import strprev
from quapy.method.aggregative import KDEyML, ACC
from quapy.protocol import UPP
import quapy.functional as F
import numpy as np
from tqdm import tqdm
from scipy.stats import dirichlet
from collections import defaultdict
from time import time
from sklearn.base import clone, BaseEstimator


def method():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    acc_hyper = {}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}

    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}

    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
    return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
                                                                                   explore_CLR=True,
                                                                                   step_size=.15,
                                                                                   # num_warmup = 5000,
                                                                                   # num_samples = 10_000,
                                                                                   # region='ellipse',
                                                                                   **hyper),


if __name__ == '__main__':

    binary = {
        'datasets': qp.datasets.UCI_BINARY_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
        'sample_size': 500
    }

    multiclass = {
        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
        'sample_size': 1000
    }

    result_dir = Path('./results')

    setup = multiclass
    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
    data_name = 'digits'
    print(f'dataset={data_name}')
    data = setup['fetch_fn'](data_name)
    is_binary = data.n_classes==2
    hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
    method_name, method, hyper_params, withconf_constructor = method()
    hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
    report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)

    print(f'dataset={data_name}, '
          f'method={method_name}: '
          f'mae={report["results"]["ae"].mean():.3f}, '
          f'coverage={report["results"]["coverage"].mean():.5f}, '
          f'amplitude={report["results"]["amplitude"].mean():.5f}, ')