From fdc0560cccb310de13dd46bfe6a0059d28490baa Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 17 Nov 2025 17:47:14 +0100 Subject: [PATCH] no optim classifier --- BayesianKDEy/full_experiments.py | 52 ++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index a2f3c9c..6ec372d 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -3,7 +3,10 @@ import warnings from os.path import join from pathlib import Path -from sklearn.linear_model import LogisticRegression +from sklearn.calibration import CalibratedClassifierCV +from sklearn.linear_model import LogisticRegression as LR +from sklearn.model_selection import GridSearchCV, StratifiedKFold +from copy import deepcopy as cp import quapy as qp from BayesianKDEy._bayeisan_kdey import BayesianKDEy from method.aggregative import DistributionMatchingY as DMy @@ -24,25 +27,35 @@ from time import time from sklearn.base import clone -def new_classifier(): - # lr_hyper = { - # 'classifier__C': np.logspace(-3,3,7), - # 'classifier__class_weight': ['balanced', None] - # } - lr_hyper = {} - lr = LogisticRegression() - return lr, lr_hyper +# def new_classifier(training): +# print('optimizing hyperparameters of Logistic Regression') +# mod_sel = GridSearchCV( +# estimator=LogisticRegression(), +# param_grid={ +# 'C': np.logspace(-4, 4, 9), +# 'class_weight': ['balanced', None] +# }, +# cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0), +# n_jobs=-1, +# refit=False, +# ) +# mod_sel.fit(*training.Xy) +# # optim = LogisticRegression(**mod_sel.best_params_) +# print(f'Done: hyperparameters chosen={mod_sel.best_params_}') +# # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy) +# # return calib +# return LogisticRegression(**mod_sel.best_params_) def methods(): - cls, cls_hyper = new_classifier() - hdy_hyper = {**cls_hyper, 'n_bins': [3,4,5,8,16,32]} - kdey_hyper = {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} - yield 'BootstrapACC', AggregativeBootstrap(ACC(clone(cls)), n_test_samples=1000), cls_hyper - yield 'BootstrapHDy', AggregativeBootstrap(DMy(clone(cls), divergence='HD'), n_test_samples=1000), hdy_hyper - yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(clone(cls)), n_test_samples=1000), kdey_hyper - # yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper - # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), hdy_hyper - # yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), kdey_hyper + acc_hyper = {} + hdy_hyper = {'n_bins': [3,4,5,8,16,32]} + kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} + yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), acc_hyper + # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), hdy_hyper + # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), kdey_hyper + # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper + # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper + yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict): @@ -74,7 +87,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict): # test results = defaultdict(list) test_generator = UPP(test, repeats=500, random_state=0) - for i, (sample_X, true_prevalence) in enumerate(test_generator()): + for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'): t_init = time() point_estimate, region = optim_quantifier.predict_conf(sample_X) ttime = time()-t_init @@ -86,6 +99,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict): results['coverage'].append(region.coverage(true_prevalence)) results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000)) results['test-time'].append(ttime) + results['samples'].append(optim_quantifier.) report = { 'optim_hyper': best_params,