96 lines
4.2 KiB
Python
96 lines
4.2 KiB
Python
import os
|
|
import warnings
|
|
from os.path import join
|
|
from pathlib import Path
|
|
|
|
from sklearn.calibration import CalibratedClassifierCV
|
|
from sklearn.linear_model import LogisticRegression as LR
|
|
from sklearn.model_selection import GridSearchCV, StratifiedKFold
|
|
from copy import deepcopy as cp
|
|
import quapy as qp
|
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
|
from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
|
|
from build.lib.quapy.data import LabelledCollection
|
|
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
|
|
from quapy.method.base import BinaryQuantifier, BaseQuantifier
|
|
from quapy.model_selection import GridSearchQ
|
|
from quapy.data import Dataset
|
|
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
|
|
from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
|
|
from quapy.functional import strprev
|
|
from quapy.method.aggregative import KDEyML, ACC
|
|
from quapy.protocol import UPP
|
|
import quapy.functional as F
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
from scipy.stats import dirichlet
|
|
from collections import defaultdict
|
|
from time import time
|
|
from sklearn.base import clone, BaseEstimator
|
|
|
|
|
|
def method():
|
|
"""
|
|
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
|
|
- name: is a str representing the name of the method (e.g., 'BayesianKDEy')
|
|
- quantifier: is the base model (e.g., KDEyML())
|
|
- hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
|
|
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
|
|
quantifier with optimized hyperparameters
|
|
"""
|
|
acc_hyper = {}
|
|
hdy_hyper = {'nbins': [3,4,5,8,16,32]}
|
|
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
|
|
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
|
|
|
|
wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
|
|
|
|
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
|
|
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
|
|
return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
|
|
explore_CLR=True,
|
|
step_size=.15,
|
|
# num_warmup = 5000,
|
|
# num_samples = 10_000,
|
|
# region='ellipse',
|
|
**hyper),
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
binary = {
|
|
'datasets': qp.datasets.UCI_BINARY_DATASETS,
|
|
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
|
|
'sample_size': 500
|
|
}
|
|
|
|
multiclass = {
|
|
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
|
|
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
|
|
'sample_size': 1000
|
|
}
|
|
|
|
result_dir = Path('./results')
|
|
|
|
setup = multiclass
|
|
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
|
|
data_name = 'digits'
|
|
print(f'dataset={data_name}')
|
|
data = setup['fetch_fn'](data_name)
|
|
is_binary = data.n_classes==2
|
|
hyper_subdir = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
|
|
method_name, method, hyper_params, withconf_constructor = method()
|
|
hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
|
|
report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
|
|
|
|
print(f'dataset={data_name}, '
|
|
f'method={method_name}: '
|
|
f'mae={report["results"]["ae"].mean():.3f}, '
|
|
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
|
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
|
|
|
|
|
|
|
|