drafting experiments

This commit is contained in:
Alejandro Moreo Fernandez 2025-11-16 12:42:26 +01:00
parent 4255098ba7
commit 2f83a520c7
1 changed files with 75 additions and 0 deletions

View File

@ -0,0 +1,75 @@
import warnings
from sklearn.linear_model import LogisticRegression
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from method.aggregative import AggregativeQuantifier
from quapy.model_selection import GridSearchQ
from quapy.data import Dataset
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ
from quapy.functional import strprev
from quapy.method.aggregative import KDEyML
from quapy.protocol import UPP
import quapy.functional as F
import numpy as np
from tqdm import tqdm
from scipy.stats import dirichlet
def new_classifier():
lr_hyper = {
'classifier__C': np.logspace(-3,3,7),
'classifier__class_weight': ['balanced', None]
}
lr = LogisticRegression()
return lr, lr_hyper
def methods():
cls, cls_hyper = new_classifier()
# yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper
# yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict):
with qp.util.temp_seed(0):
# model selection
train, test = dataset.train_test
train, val = train.split_stratified(train_prop=0.6, random_state=0)
mod_sel = GridSearchQ(
model=method,
param_grid=grid,
protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
refit=True,
n_jobs=-1,
verbose=True
).fit(*train.Xy)
optim_quantifier = mod_sel.best_model()
optim_hyper = mod_sel.best_params_
print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}')
# test
report = qp.evaluation.evaluation_report(
optim_quantifier,
protocol=UPP(test, repeats=500, random_state=0),
verbose=True
)
return report
if __name__ == '__main__':
qp.environ["SAMPLE_SIZE"] = 500
datasets = qp.datasets.UCI_BINARY_DATASETS
for dataset in datasets:
data = qp.datasets.fetch_UCIBinaryDataset(dataset)
for method_name, method, hyper_params in methods():
report = experiment(data, method, method_name, hyper_params)
print(f'{method_name=} got {report.mean(numeric_only=True)}')