adding exploration in CLR

2025-12-04 10:24:02 +01:00 · 2025-12-04 10:24:02 +01:00 · 23608f2038
parent 881e1033f1
commit 23608f2038
9 changed files with 343 additions and 84 deletions
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@ -0,0 +1,3 @@
 - Add other methods that natively provide uncertainty quantification methods?
 - Explore neighbourhood in the CLR space instead than in the simplex!
 -
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
-from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
@ -40,6 +40,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                 mcmc_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals',
                 explore_CLR=False,
                 step_size=0.05,
                 verbose: bool = False):
        if num_warmup <= 0:
@ -48,13 +50,15 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
            raise ValueError(f'parameter {num_samples=} must be a positive integer')
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
        self.kernel = self._check_kernel(kernel)
        self.num_warmup = num_warmup
        self.num_samples = num_samples
        self.mcmc_seed = mcmc_seed
        self.confidence_level = confidence_level
        self.region = region
        self.explore_CLR = explore_CLR
        self.step_size = step_size
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
@ -105,10 +109,19 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        def log_prior(prev):
            return 0
-        def sample_neighbour(prev, step_size=0.05):
+        def sample_neighbour(prev, step_size):
            # random-walk Metropolis-Hastings
-            dir_noise = rng.normal(scale=step_size, size=len(prev))
+            d = len(prev)
            if not self.explore_CLR:
                dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
                neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
            else:
                clr = CLRtransformation()
                clr_point = clr(prev)
                dir_noise = rng.normal(scale=step_size, size=d)
                clr_neighbour = clr_point+dir_noise
                neighbour = clr.inverse(clr_neighbour)
                assert in_simplex(neighbour), 'wrong CLR transformation'
            return neighbour
        n_classes = X_probs.shape[1]
@ -116,9 +129,9 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
        # Metropolis-Hastings with adaptive rate
-        step_size = 0.05
+        step_size = self.step_size
        target_acceptance = 0.3
-        adapt_rate = 0.01
+        adapt_rate = 0.05
        acceptance_history = []
        samples = []
@ -142,7 +155,13 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
            if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
                recent_accept_rate = np.mean(acceptance_history[-100:])
                step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
                # step_size = float(np.clip(step_size, min_step, max_step))
                print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
        # remove "warmup" initial iterations
        samples = np.asarray(samples[self.num_warmup:])
        return samples
 def in_simplex(x):
    return np.all(x >= 0) and np.isclose(x.sum(), 1)
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -9,8 +9,9 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
-from quapy.method.aggregative import DistributionMatchingY as DMy
+from build.lib.quapy.data import LabelledCollection
-from quapy.method.base import BinaryQuantifier
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
 from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
@ -24,73 +25,95 @@ from tqdm import tqdm
 from scipy.stats import dirichlet
 from collections import defaultdict
 from time import time
-from sklearn.base import clone
+from sklearn.base import clone, BaseEstimator
-# def new_classifier(training):
+class KDEyCLR(KDEyML):
-#     print('optimizing hyperparameters of Logistic Regression')
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
-#     mod_sel = GridSearchCV(
+        super().__init__(
-#         estimator=LogisticRegression(),
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
-#         param_grid={
+            random_state=random_state, kernel='aitchison'
-#             'C': np.logspace(-4, 4, 9),
+        )
 #             'class_weight': ['balanced', None]
 #         },
 #         cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0),
 #         n_jobs=-1,
 #         refit=False,
 #     )
 #     mod_sel.fit(*training.Xy)
 #     # optim = LogisticRegression(**mod_sel.best_params_)
 #     print(f'Done: hyperparameters chosen={mod_sel.best_params_}')
 #     # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy)
 #     # return calib
 #     return LogisticRegression(**mod_sel.best_params_)
-def methods():
+def methods__():
    acc_hyper = {}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
-    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2], 'classifier__C':[1]}
    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
    # yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), wrap_hyper(acc_hyper)
    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
-    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
+    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
    # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
-    yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
    # yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
-def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
+def methods():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    acc_hyper = {}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0),
    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0),
    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper),
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
    with qp.util.temp_seed(0):
        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
        # model selection
        train, test = dataset.train_test
        train_prevalence = train.prevalence()
        if len(grid)>0:
            train, val  = train.split_stratified(train_prop=0.6, random_state=0)
            mod_sel = GridSearchQ(
-                model=method,
+                model=point_quantifier,
                param_grid=grid,
                protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
-                refit=True,
+                refit=False,
                n_jobs=-1,
                verbose=True
            ).fit(*train.Xy)
            optim_quantifier = mod_sel.best_model()
            best_params = mod_sel.best_params_
            best_score = mod_sel.best_score_
            tr_time = mod_sel.refit_time_
        else:
            best_params = {}
        return best_params
 def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path):
    with qp.util.temp_seed(0):
        training, test = dataset.train_test
        # model selection
        best_hyperparams = qp.util.pickled_resource(
            hyper_choice_path, model_selection, training, cp(point_quantifier), grid
        )
        t_init = time()
-            method.fit(*train.Xy)
+        withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
        tr_time = time() - t_init
            best_params, best_score = {}, -1
            optim_quantifier = method
        # test
        train_prevalence = training.prevalence()
        results = defaultdict(list)
-        test_generator = UPP(test, repeats=500, random_state=0)
+        test_generator = UPP(test, repeats=100, random_state=0)
        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
            t_init = time()
-            point_estimate, region = optim_quantifier.predict_conf(sample_X)
+            point_estimate, region = withconf_quantifier.predict_conf(sample_X)
            ttime = time()-t_init
            results['true-prevs'].append(true_prevalence)
            results['point-estim'].append(point_estimate)
@ -103,9 +126,8 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
            results['samples'].append(region.samples)
        report = {
-            'optim_hyper': best_params,
+            'optim_hyper': best_hyperparams,
-            'optim_score': best_score,
+            'train_time': tr_time,
            'refit_time': tr_time,
            'train-prev': train_prevalence,
            'results': {k:np.asarray(v) for k,v in results.items()}
        }
@ -134,26 +156,30 @@ if __name__ == '__main__':
    result_dir = Path('./results')
-    for setup in [binary, multiclass]:
+    for setup in [binary, multiclass]: # [binary, multiclass]:
        qp.environ['SAMPLE_SIZE'] = setup['sample_size']
        for data_name in setup['datasets']:
            print(f'dataset={data_name}')
-            if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
+            # if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
-                print(f'skipping dataset: {data_name}')
+            #     print(f'skipping dataset: {data_name}')
-                continue
+            #     continue
            data = setup['fetch_fn'](data_name)
            is_binary = data.n_classes==2
            result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
-            for method_name, method, hyper_params in methods():
+            hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
            for method_name, method, hyper_params, withconf_constructor in methods():
                if isinstance(method, BinaryQuantifier) and not is_binary:
                    continue
                result_path = experiment_path(result_subdir, data_name, method_name)
-                report = qp.util.pickled_resource(result_path, experiment, data, method, hyper_params)
+                hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
                report = qp.util.pickled_resource(
                    result_path, experiment, data, method, method_name, hyper_params, withconf_constructor, hyper_path
                )
                print(f'dataset={data_name}, '
                      f'method={method_name}: '
                      f'mae={report["results"]["ae"].mean():.3f}, '
-                      f'coverage={report["results"]["coverage"].mean():.3f}, '
+                      f'coverage={report["results"]["coverage"].mean():.5f}, '
-                      f'amplitude={report["results"]["amplitude"].mean():.3f}, ')
+                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -1,31 +1,112 @@
 import pickle
 from collections import defaultdict
 from joblib import Parallel, delayed
 from tqdm import tqdm
 import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
 pd.set_option('display.max_rows', None)
 pd.set_option("display.expand_frame_repr", False)
 pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 def compute_coverage_amplitude(region_constructor):
    all_samples = results['samples']
    all_true_prevs = results['true-prevs']
    def process_one(samples, true_prevs):
        ellipse = region_constructor(samples)
        return ellipse.coverage(true_prevs), ellipse.montecarlo_proportion()
    out = Parallel(n_jobs=3)(
        delayed(process_one)(samples, true_prevs)
        for samples, true_prevs in tqdm(
            zip(all_samples, all_true_prevs),
            total=len(all_samples),
            desc='constructing ellipses'
        )
    )
    # unzip results
    coverage, amplitude = zip(*out)
    return list(coverage), list(amplitude)
 def update_pickle(report, pickle_path, updated_dict:dict):
    for k,v in updated_dict.items():
        report[k]=v
    pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 for setup in ['binary', 'multiclass']:
    path = f'./results/{setup}/*.pkl'
    table = defaultdict(list)
-    for file in glob(path):
+    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
        file = Path(file)
        dataset, method = file.name.replace('.pkl', '').split('__')
        report = pickle.load(open(file, 'rb'))
        results = report['results']
        n_samples = len(results['ae'])
-        table['method'].extend([method] * n_samples)
+        table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
        table['dataset'].extend([dataset] * n_samples)
        table['ae'].extend(results['ae'])
-        table['coverage'].extend(results['coverage'])
+        table['c-CI'].extend(results['coverage'])
-        table['amplitude'].extend(results['amplitude'])
+        table['a-CI'].extend(results['amplitude'])
        if 'coverage-CE' not in report:
            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
            update_fields = {
                'coverage-CE': covCE,
                'amplitude-CE': ampCE,
                'coverage-CLR': covCLR,
                'amplitude-CLR': ampCLR
            }
            update_pickle(report, file, update_fields)
        table['c-CE'].extend(report['coverage-CE'])
        table['a-CE'].extend(report['amplitude-CE'])
        table['c-CLR'].extend(report['coverage-CLR'])
        table['a-CLR'].extend(report['amplitude-CLR'])
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    pd.set_option('display.max_rows', None)
    df = pd.DataFrame(table)
-    pv = pd.pivot_table(df, index='dataset', columns='method', values=['ae', 'coverage', 'amplitude'])
+
    n_classes = {}
    tr_size   = {}
    for dataset in df['dataset'].unique():
        fetch_fn = {
            'binary': qp.datasets.fetch_UCIBinaryDataset,
            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
        }[setup]
        data = fetch_fn(dataset)
        n_classes[dataset] = data.n_classes
        tr_size[dataset] = len(data.training)
    # remove datasets with more than max_classes classes
    max_classes = 30
    for data_name, n in n_classes.items():
        if n > max_classes:
            df = df[df["dataset"] != data_name]
    for region in ['CI', 'CE', 'CLR']:
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
        )
        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
        print(f'{setup=}')
        print(pv)
-    print()
+        print('-'*80)
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@ -0,0 +1,95 @@
 import os
 import warnings
 from os.path import join
 from pathlib import Path
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression as LR
 from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
 from build.lib.quapy.data import LabelledCollection
 from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
 from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
 from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML, ACC
 from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
 from scipy.stats import dirichlet
 from collections import defaultdict
 from time import time
 from sklearn.base import clone, BaseEstimator
 def method():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    acc_hyper = {}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
    return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
                                                                                   explore_CLR=True,
                                                                                   step_size=.15,
                                                                                   # num_warmup = 5000,
                                                                                   # num_samples = 10_000,
                                                                                   # region='ellipse',
                                                                                   **hyper),
 if __name__ == '__main__':
    binary = {
        'datasets': qp.datasets.UCI_BINARY_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
        'sample_size': 500
    }
    multiclass = {
        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
        'sample_size': 1000
    }
    result_dir = Path('./results')
    setup = multiclass
    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
    data_name = 'digits'
    print(f'dataset={data_name}')
    data = setup['fetch_fn'](data_name)
    is_binary = data.n_classes==2
    hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
    method_name, method, hyper_params, withconf_constructor = method()
    hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
    report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
    print(f'dataset={data_name}, '
          f'method={method_name}: '
          f'mae={report["results"]["ae"].mean():.3f}, '
          f'coverage={report["results"]["coverage"].mean():.5f}, '
          f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -583,8 +583,8 @@ def solve_adjustment(
    """
    Function that tries to solve for :math:`p` the equation :math:`q = M p`, where :math:`q` is the vector of
    `unadjusted counts` (as estimated, e.g., via classify and count) with :math:`q_i` an estimate of
-    :math:`P(\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
+    :math:`P(\\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
-    estimate of :math:`P(\hat{Y}=y_i|Y=y_j)`.
+    estimate of :math:`P(\\hat{Y}=y_i|Y=y_j)`.
    :param class_conditional_rates: array of shape `(n_classes, n_classes,)` with entry `(i,j)` being the estimate
        of :math:`P(\hat{Y}=y_i|Y=y_j)`, that is, the probability that an instance that belongs to class :math:`y_j`
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@ -33,7 +33,7 @@ class KDEBase:
    @classmethod
-    def _check_bandwidth(cls, bandwidth):
+    def _check_bandwidth(cls, bandwidth, kernel):
        """
        Checks that the bandwidth parameter is correct
@ -43,8 +43,9 @@ class KDEBase:
        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
        if isinstance(bandwidth, float):
-            assert 0 < bandwidth < 1,  \
+            assert kernel!='gaussian' or (0 < bandwidth < 1),  \
-                "the bandwidth for KDEy should be in (0,1), since this method models the unit simplex"
+                ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
                 "since this method models the unit simplex")
        return bandwidth
    @classmethod
@ -166,7 +167,7 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1, kernel='gaussian',
                 random_state=None):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
        self.kernel = self._check_kernel(kernel)
        self.random_state=random_state
@ -246,7 +247,7 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):
        super().__init__(classifier, fit_classifier, val_split)
        self.divergence = divergence
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
        self.random_state=random_state
        self.montecarlo_trials = montecarlo_trials
@ -333,7 +334,7 @@ class KDEyCS(AggregativeSoftQuantifier):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
    def gram_matrix_mix_sum(self, X, Y=None):
        # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -1,4 +1,5 @@
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix
@ -13,6 +14,7 @@ from abc import ABC, abstractmethod
 from scipy.special import softmax, factorial
 import copy
 from functools import lru_cache
 from tqdm import tqdm
 """
 This module provides implementation of different types of confidence regions, and the implementation of Bootstrap
@ -399,7 +401,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                 n_test_samples=500,
                 confidence_level=0.95,
                 region='intervals',
-                 random_state=None):
+                 random_state=None,
                 verbose=False):
        assert isinstance(quantifier, AggregativeQuantifier), \
            f'base quantifier does not seem to be an instance of {AggregativeQuantifier.__name__}'
@ -416,6 +419,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.confidence_level = confidence_level
        self.region = region
        self.random_state = random_state
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
        data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
@ -441,6 +445,24 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prev_mean, self.confidence = self.aggregate_conf(classif_predictions)
        return prev_mean
    def aggregate_conf_sequential__(self, classif_predictions: np.ndarray, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
        n_samples = classif_predictions.shape[0]
        prevs = []
        with qp.util.temp_seed(self.random_state):
            for quantifier in self.quantifiers:
                for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
                    sample_i = resample(classif_predictions, n_samples=n_samples)
                    prev_i = quantifier.aggregate(sample_i)
                    prevs.append(prev_i)
        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
        return prev_estim, conf
    def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
@ -449,10 +471,15 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prevs = []
        with qp.util.temp_seed(self.random_state):
            for quantifier in self.quantifiers:
-                for i in range(self.n_test_samples):
+                results = Parallel(n_jobs=-1)(
-                    sample_i = resample(classif_predictions, n_samples=n_samples)
+                    delayed(bootstrap_once)(i, classif_predictions, quantifier, n_samples)
-                    prev_i = quantifier.aggregate(sample_i)
+                    for i in range(self.n_test_samples)
-                    prevs.append(prev_i)
+                )
                prevs.extend(results)
                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
                #     sample_i = resample(classif_predictions, n_samples=n_samples)
                #     prev_i = quantifier.aggregate(sample_i)
                #     prevs.append(prev_i)
        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
@ -477,6 +504,13 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        return self.quantifier._classifier_method()
 def bootstrap_once(i, classif_predictions, quantifier, n_samples):
    idx = np.random.randint(0, len(classif_predictions), n_samples)
    sample = classif_predictions[idx]
    prev = quantifier.aggregate(sample)
    return prev
 class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
    """
    `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method (by Albert Ziegler and Paweł Czyż),
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -410,7 +410,7 @@ def group_params(param_grid: dict):
    """
    classifier_params, quantifier_params = {}, {}
    for key, values in param_grid.items():
-        if key.startswith('classifier__') or key == 'val_split':
+        if 'classifier__' in key or key == 'val_split':
            classifier_params[key] = values
        else:
            quantifier_params[key] = values