From fdc0560cccb310de13dd46bfe6a0059d28490baa Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 17 Nov 2025 17:47:14 +0100
Subject: [PATCH] no optim classifier

---
 BayesianKDEy/full_experiments.py | 52 ++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index a2f3c9c..6ec372d 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -3,7 +3,10 @@ import warnings
 from os.path import join
 from pathlib import Path
 
-from sklearn.linear_model import LogisticRegression
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression as LR
+from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from method.aggregative import DistributionMatchingY as DMy
@@ -24,25 +27,35 @@ from time import time
 from sklearn.base import clone
 
 
-def new_classifier():
-    # lr_hyper = {
-    #     'classifier__C': np.logspace(-3,3,7),
-    #     'classifier__class_weight': ['balanced', None]
-    # }
-    lr_hyper = {}
-    lr = LogisticRegression()
-    return lr, lr_hyper
+# def new_classifier(training):
+#     print('optimizing hyperparameters of Logistic Regression')
+#     mod_sel = GridSearchCV(
+#         estimator=LogisticRegression(),
+#         param_grid={
+#             'C': np.logspace(-4, 4, 9),
+#             'class_weight': ['balanced', None]
+#         },
+#         cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0),
+#         n_jobs=-1,
+#         refit=False,
+#     )
+#     mod_sel.fit(*training.Xy)
+#     # optim = LogisticRegression(**mod_sel.best_params_)
+#     print(f'Done: hyperparameters chosen={mod_sel.best_params_}')
+#     # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy)
+#     # return calib
+#     return LogisticRegression(**mod_sel.best_params_)
 
 def methods():
-    cls, cls_hyper = new_classifier()
-    hdy_hyper = {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
-    kdey_hyper = {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
-    yield 'BootstrapACC', AggregativeBootstrap(ACC(clone(cls)), n_test_samples=1000), cls_hyper
-    yield 'BootstrapHDy', AggregativeBootstrap(DMy(clone(cls), divergence='HD'), n_test_samples=1000), hdy_hyper
-    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(clone(cls)), n_test_samples=1000), kdey_hyper
-    # yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper
-    # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), hdy_hyper
-    # yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), kdey_hyper
+    acc_hyper = {}
+    hdy_hyper = {'n_bins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), acc_hyper
+    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), hdy_hyper
+    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), kdey_hyper
+    # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
+    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
 
 
 def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
@@ -74,7 +87,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
         # test
         results = defaultdict(list)
         test_generator = UPP(test, repeats=500, random_state=0)
-        for i, (sample_X, true_prevalence) in enumerate(test_generator()):
+        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
             t_init = time()
             point_estimate, region = optim_quantifier.predict_conf(sample_X)
             ttime = time()-t_init
@@ -86,6 +99,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
             results['coverage'].append(region.coverage(true_prevalence))
             results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
             results['test-time'].append(ttime)
+            results['samples'].append(optim_quantifier.)
 
         report = {
             'optim_hyper': best_params,