diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py new file mode 100644 index 0000000..ec5acaa --- /dev/null +++ b/BayesianKDEy/full_experiments.py @@ -0,0 +1,75 @@ +import warnings + +from sklearn.linear_model import LogisticRegression +import quapy as qp +from BayesianKDEy._bayeisan_kdey import BayesianKDEy +from method.aggregative import AggregativeQuantifier +from quapy.model_selection import GridSearchQ +from quapy.data import Dataset +# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot +from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ +from quapy.functional import strprev +from quapy.method.aggregative import KDEyML +from quapy.protocol import UPP +import quapy.functional as F +import numpy as np +from tqdm import tqdm +from scipy.stats import dirichlet + + +def new_classifier(): + lr_hyper = { + 'classifier__C': np.logspace(-3,3,7), + 'classifier__class_weight': ['balanced', None] + } + lr = LogisticRegression() + return lr, lr_hyper + +def methods(): + cls, cls_hyper = new_classifier() + # yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper + # yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]} + yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} + + +def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict): + with qp.util.temp_seed(0): + # model selection + train, test = dataset.train_test + train, val = train.split_stratified(train_prop=0.6, random_state=0) + mod_sel = GridSearchQ( + model=method, + param_grid=grid, + protocol=qp.protocol.UPP(val, repeats=250, random_state=0), + refit=True, + n_jobs=-1, + verbose=True + ).fit(*train.Xy) + optim_quantifier = mod_sel.best_model() + optim_hyper = mod_sel.best_params_ + print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}') + + # test + report = qp.evaluation.evaluation_report( + optim_quantifier, + protocol=UPP(test, repeats=500, random_state=0), + verbose=True + ) + + return report + + + +if __name__ == '__main__': + qp.environ["SAMPLE_SIZE"] = 500 + + datasets = qp.datasets.UCI_BINARY_DATASETS + for dataset in datasets: + data = qp.datasets.fetch_UCIBinaryDataset(dataset) + for method_name, method, hyper_params in methods(): + report = experiment(data, method, method_name, hyper_params) + print(f'{method_name=} got {report.mean(numeric_only=True)}') + + + +