no optim classifier
This commit is contained in:
parent
fd62e73d2d
commit
fdc0560ccc
|
|
@ -3,7 +3,10 @@ import warnings
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
from sklearn.linear_model import LogisticRegression as LR
|
||||||
|
from sklearn.model_selection import GridSearchCV, StratifiedKFold
|
||||||
|
from copy import deepcopy as cp
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||||
from method.aggregative import DistributionMatchingY as DMy
|
from method.aggregative import DistributionMatchingY as DMy
|
||||||
|
|
@ -24,25 +27,35 @@ from time import time
|
||||||
from sklearn.base import clone
|
from sklearn.base import clone
|
||||||
|
|
||||||
|
|
||||||
def new_classifier():
|
# def new_classifier(training):
|
||||||
# lr_hyper = {
|
# print('optimizing hyperparameters of Logistic Regression')
|
||||||
# 'classifier__C': np.logspace(-3,3,7),
|
# mod_sel = GridSearchCV(
|
||||||
# 'classifier__class_weight': ['balanced', None]
|
# estimator=LogisticRegression(),
|
||||||
# }
|
# param_grid={
|
||||||
lr_hyper = {}
|
# 'C': np.logspace(-4, 4, 9),
|
||||||
lr = LogisticRegression()
|
# 'class_weight': ['balanced', None]
|
||||||
return lr, lr_hyper
|
# },
|
||||||
|
# cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0),
|
||||||
|
# n_jobs=-1,
|
||||||
|
# refit=False,
|
||||||
|
# )
|
||||||
|
# mod_sel.fit(*training.Xy)
|
||||||
|
# # optim = LogisticRegression(**mod_sel.best_params_)
|
||||||
|
# print(f'Done: hyperparameters chosen={mod_sel.best_params_}')
|
||||||
|
# # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy)
|
||||||
|
# # return calib
|
||||||
|
# return LogisticRegression(**mod_sel.best_params_)
|
||||||
|
|
||||||
def methods():
|
def methods():
|
||||||
cls, cls_hyper = new_classifier()
|
acc_hyper = {}
|
||||||
hdy_hyper = {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
|
hdy_hyper = {'n_bins': [3,4,5,8,16,32]}
|
||||||
kdey_hyper = {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
|
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
|
||||||
yield 'BootstrapACC', AggregativeBootstrap(ACC(clone(cls)), n_test_samples=1000), cls_hyper
|
yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), acc_hyper
|
||||||
yield 'BootstrapHDy', AggregativeBootstrap(DMy(clone(cls), divergence='HD'), n_test_samples=1000), hdy_hyper
|
# yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), hdy_hyper
|
||||||
yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(clone(cls)), n_test_samples=1000), kdey_hyper
|
# yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), kdey_hyper
|
||||||
# yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper
|
# yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
|
||||||
# yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), hdy_hyper
|
# yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
|
||||||
# yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), kdey_hyper
|
yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
|
||||||
|
|
||||||
|
|
||||||
def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
|
def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
|
||||||
|
|
@ -74,7 +87,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
|
||||||
# test
|
# test
|
||||||
results = defaultdict(list)
|
results = defaultdict(list)
|
||||||
test_generator = UPP(test, repeats=500, random_state=0)
|
test_generator = UPP(test, repeats=500, random_state=0)
|
||||||
for i, (sample_X, true_prevalence) in enumerate(test_generator()):
|
for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
|
||||||
t_init = time()
|
t_init = time()
|
||||||
point_estimate, region = optim_quantifier.predict_conf(sample_X)
|
point_estimate, region = optim_quantifier.predict_conf(sample_X)
|
||||||
ttime = time()-t_init
|
ttime = time()-t_init
|
||||||
|
|
@ -86,6 +99,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
|
||||||
results['coverage'].append(region.coverage(true_prevalence))
|
results['coverage'].append(region.coverage(true_prevalence))
|
||||||
results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
|
results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
|
||||||
results['test-time'].append(ttime)
|
results['test-time'].append(ttime)
|
||||||
|
results['samples'].append(optim_quantifier.)
|
||||||
|
|
||||||
report = {
|
report = {
|
||||||
'optim_hyper': best_params,
|
'optim_hyper': best_params,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue