import warnings from sklearn.linear_model import LogisticRegression import quapy as qp from BayesianKDEy._bayeisan_kdey import BayesianKDEy from method.aggregative import AggregativeQuantifier from quapy.model_selection import GridSearchQ from quapy.data import Dataset # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ from quapy.functional import strprev from quapy.method.aggregative import KDEyML from quapy.protocol import UPP import quapy.functional as F import numpy as np from tqdm import tqdm from scipy.stats import dirichlet def new_classifier(): lr_hyper = { 'classifier__C': np.logspace(-3,3,7), 'classifier__class_weight': ['balanced', None] } lr = LogisticRegression() return lr, lr_hyper def methods(): cls, cls_hyper = new_classifier() # yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper # yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]} yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict): with qp.util.temp_seed(0): # model selection train, test = dataset.train_test train, val = train.split_stratified(train_prop=0.6, random_state=0) mod_sel = GridSearchQ( model=method, param_grid=grid, protocol=qp.protocol.UPP(val, repeats=250, random_state=0), refit=True, n_jobs=-1, verbose=True ).fit(*train.Xy) optim_quantifier = mod_sel.best_model() optim_hyper = mod_sel.best_params_ print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}') # test report = qp.evaluation.evaluation_report( optim_quantifier, protocol=UPP(test, repeats=500, random_state=0), verbose=True ) return report if __name__ == '__main__': qp.environ["SAMPLE_SIZE"] = 500 datasets = qp.datasets.UCI_BINARY_DATASETS for dataset in datasets: data = qp.datasets.fetch_UCIBinaryDataset(dataset) for method_name, method, hyper_params in methods(): report = experiment(data, method, method_name, hyper_params) print(f'{method_name=} got {report.mean(numeric_only=True)}')