76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
import warnings
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
import quapy as qp
|
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
|
from method.aggregative import AggregativeQuantifier
|
|
from quapy.model_selection import GridSearchQ
|
|
from quapy.data import Dataset
|
|
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
|
|
from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ
|
|
from quapy.functional import strprev
|
|
from quapy.method.aggregative import KDEyML
|
|
from quapy.protocol import UPP
|
|
import quapy.functional as F
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
from scipy.stats import dirichlet
|
|
|
|
|
|
def new_classifier():
|
|
lr_hyper = {
|
|
'classifier__C': np.logspace(-3,3,7),
|
|
'classifier__class_weight': ['balanced', None]
|
|
}
|
|
lr = LogisticRegression()
|
|
return lr, lr_hyper
|
|
|
|
def methods():
|
|
cls, cls_hyper = new_classifier()
|
|
# yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper
|
|
# yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
|
|
yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
|
|
|
|
|
|
def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict):
|
|
with qp.util.temp_seed(0):
|
|
# model selection
|
|
train, test = dataset.train_test
|
|
train, val = train.split_stratified(train_prop=0.6, random_state=0)
|
|
mod_sel = GridSearchQ(
|
|
model=method,
|
|
param_grid=grid,
|
|
protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
|
|
refit=True,
|
|
n_jobs=-1,
|
|
verbose=True
|
|
).fit(*train.Xy)
|
|
optim_quantifier = mod_sel.best_model()
|
|
optim_hyper = mod_sel.best_params_
|
|
print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}')
|
|
|
|
# test
|
|
report = qp.evaluation.evaluation_report(
|
|
optim_quantifier,
|
|
protocol=UPP(test, repeats=500, random_state=0),
|
|
verbose=True
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
qp.environ["SAMPLE_SIZE"] = 500
|
|
|
|
datasets = qp.datasets.UCI_BINARY_DATASETS
|
|
for dataset in datasets:
|
|
data = qp.datasets.fetch_UCIBinaryDataset(dataset)
|
|
for method_name, method, hyper_params in methods():
|
|
report = experiment(data, method, method_name, hyper_params)
|
|
print(f'{method_name=} got {report.mean(numeric_only=True)}')
|
|
|
|
|
|
|
|
|