forked from moreo/QuaPy
testing new models
This commit is contained in:
parent
27124d0d00
commit
06d36a132d
|
@ -1,3 +1,4 @@
|
|||
from sklearn import clone
|
||||
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||
import numpy as np
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
@ -27,12 +28,14 @@ Possible extensions:
|
|||
- better investigate the "iterative" nature of the method.
|
||||
- better investigate the implications with other learners. E.g., using EMQ as a prompt, or using EMQ in the second
|
||||
stage (test).
|
||||
- test with SVM
|
||||
"""
|
||||
|
||||
class ClassWeightPCC(BaseQuantifier):
|
||||
|
||||
def __init__(self, **pcc_param_grid):
|
||||
self.learner = PACC(LogisticRegression())
|
||||
def __init__(self, estimator=LogisticRegression, **pcc_param_grid):
|
||||
self.estimator = estimator
|
||||
self.learner = PACC(self.estimator())
|
||||
if 'class_weight' in pcc_param_grid:
|
||||
raise ValueError('parameter "class_weight" cannot be included in "pcc_param_grid"')
|
||||
self.pcc_param_grid = dict(pcc_param_grid)
|
||||
|
@ -43,9 +46,6 @@ class ClassWeightPCC(BaseQuantifier):
|
|||
self.learner.fit(self.train)
|
||||
return self
|
||||
|
||||
def deploy(self, deployed=True):
|
||||
self.deployed = deployed
|
||||
|
||||
def quantify(self, instances):
|
||||
guessed_prevalence = self.learner.quantify(instances)
|
||||
class_weight = self._get_class_weight(guessed_prevalence)
|
||||
|
@ -55,11 +55,18 @@ class ClassWeightPCC(BaseQuantifier):
|
|||
for classification (and not for quantification)"""
|
||||
# pcc = PCC(GridSearchCV(LogisticRegression(class_weight=class_weight), param_grid=self.pcc_param_grid, n_jobs=-1))
|
||||
pcc = PCC(LogisticRegressionCV(Cs=self.pcc_param_grid['C'], class_weight=class_weight, n_jobs=-1, cv=3))
|
||||
raise ValueError('this cannot work...')
|
||||
else:
|
||||
"""If the param grid has not been specified, we take the best parameters found for the base quantifier"""
|
||||
base_parameters = dict(self.learner.get_params())
|
||||
base_parameters['class_weight'] = class_weight # override the class_weight parameter
|
||||
pcc = PCC(LogisticRegression(**base_parameters))
|
||||
for p,v in self.learner.get_params().items():
|
||||
# this search is in order to allow for quantifiers that work with a CalibratedClassifierCV to work
|
||||
if 'class_weight' in p:
|
||||
base_parameters[p] = class_weight
|
||||
break
|
||||
base_estimator = clone(self.learner.learner)
|
||||
base_estimator.set_params(**base_parameters)
|
||||
pcc = PCC(base_estimator)
|
||||
return pcc.fit(self.train).quantify(instances)
|
||||
|
||||
def _get_class_weight(self, prevalence):
|
||||
|
@ -74,6 +81,8 @@ class ClassWeightPCC(BaseQuantifier):
|
|||
return {0:weights[0], 1:weights[1]}
|
||||
|
||||
def set_params(self, **parameters):
|
||||
# parameters = {p:v for p,v in parameters.items()}
|
||||
# print(parameters)
|
||||
self.learner.set_params(**parameters)
|
||||
|
||||
def get_params(self, deep=True):
|
||||
|
|
|
@ -39,6 +39,12 @@ def is_already_computed(result_dir, dataset_name, model_name, run, optim_loss):
|
|||
|
||||
|
||||
nice = {
|
||||
'pacc.opt': 'PACC(LR)',
|
||||
'pacc.opt.svm': 'PACC(SVM)',
|
||||
'pcc.opt': 'PCC(LR)',
|
||||
'pcc.opt.svm': 'PCC(SVM)',
|
||||
'wpacc.opt': 'R-PCC(LR)',
|
||||
'wpacc.opt.svm': 'R-PCC(SVM)',
|
||||
'mae':'AE',
|
||||
'ae':'AE',
|
||||
'svmkld': 'SVM(KLD)',
|
||||
|
|
|
@ -8,6 +8,7 @@ from class_weight_model import ClassWeightPCC
|
|||
# from method.experimental import ExpMax, VarExpMax
|
||||
from common import *
|
||||
from method.meta import QuaNet
|
||||
from quantification_stumps_model import QuantificationStumpRegressor
|
||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
|
||||
from quapy.method.meta import EHDy
|
||||
import numpy as np
|
||||
|
@ -35,12 +36,14 @@ svmperf_params = {'C': __C_range}
|
|||
def quantification_models():
|
||||
# yield 'cc', CC(newLR()), lr_params
|
||||
# yield 'acc', ACC(newLR()), lr_params
|
||||
yield 'pcc', PCC(newLR()), None
|
||||
yield 'pacc', PACC(newLR()), None
|
||||
yield 'wpacc', ClassWeightPCC(), None
|
||||
yield 'pcc.opt', PCC(newLR()), lr_params
|
||||
yield 'pacc.opt', PACC(newLR()), lr_params
|
||||
yield 'wpacc.opt', ClassWeightPCC(), lr_params
|
||||
# yield 'pcc', PCC(newLR()), None
|
||||
# yield 'pacc', PACC(newLR()), None
|
||||
# yield 'wpacc', ClassWeightPCC(), None
|
||||
# yield 'pcc.opt', PCC(newLR()), lr_params
|
||||
# yield 'pacc.opt', PACC(newLR()), lr_params
|
||||
# yield 'wpacc.opt', ClassWeightPCC(), lr_params
|
||||
yield 'ds', QuantificationStumpRegressor(SAMPLE_SIZE), None
|
||||
# yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
|
||||
# yield 'MAX', MAX(newLR()), lr_params
|
||||
# yield 'MS', MS(newLR()), lr_params
|
||||
# yield 'MS2', MS2(newLR()), lr_params
|
||||
|
@ -92,7 +95,8 @@ def quantification_ensembles():
|
|||
|
||||
def run(experiment):
|
||||
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
|
||||
|
||||
if dataset_name == 'imdb':
|
||||
return
|
||||
data = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=5)
|
||||
run=0
|
||||
|
||||
|
@ -127,7 +131,7 @@ def run(experiment):
|
|||
test=data.test,
|
||||
sample_size=SAMPLE_SIZE,
|
||||
n_prevpoints=21,
|
||||
n_repetitions=100,
|
||||
n_repetitions=1,
|
||||
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1,
|
||||
verbose=True
|
||||
)
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
from sklearn.svm import LinearSVC
|
||||
|
||||
from class_weight_model import ClassWeightPCC
|
||||
# from classification.methods import LowRankLogisticRegression
|
||||
# from method.experimental import ExpMax, VarExpMax
|
||||
from common import *
|
||||
from method.meta import QuaNet
|
||||
from quantification_stumps_model import QuantificationStumpRegressor
|
||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
|
||||
from quapy.method.meta import EHDy
|
||||
import numpy as np
|
||||
|
@ -36,6 +39,10 @@ def quantification_models():
|
|||
yield 'pcc.opt', PCC(newLR()), lr_params
|
||||
yield 'pacc.opt', PACC(newLR()), lr_params
|
||||
yield 'wpacc.opt', ClassWeightPCC(), lr_params
|
||||
yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
|
||||
# yield 'pcc.opt.svm', PCC(LinearSVC()), lr_params
|
||||
# yield 'pacc.opt.svm', PACC(LinearSVC()), lr_params
|
||||
# yield 'wpacc.opt.svm', ClassWeightPCC(LinearSVC), lr_params
|
||||
# yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params # this cannot work in its current version (see notes in the class_weight_model.py file)
|
||||
# yield 'MAX', MAX(newLR()), lr_params
|
||||
# yield 'MS', MS(newLR()), lr_params
|
||||
|
@ -60,29 +67,29 @@ def quantification_models():
|
|||
# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
|
||||
|
||||
|
||||
def quantification_ensembles():
|
||||
param_mod_sel = {
|
||||
'sample_size': SAMPLE_SIZE,
|
||||
'n_prevpoints': 21,
|
||||
'n_repetitions': 5,
|
||||
'refit': True,
|
||||
'verbose': False
|
||||
}
|
||||
common = {
|
||||
'size': 30,
|
||||
'red_size': 15,
|
||||
'max_sample_size': None, # same as training set
|
||||
'n_jobs': ENSEMBLE_N_JOBS,
|
||||
'param_grid': lr_params,
|
||||
'param_mod_sel': param_mod_sel,
|
||||
'val_split': 0.4,
|
||||
'min_pos': 5
|
||||
}
|
||||
|
||||
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
|
||||
# will be skipped (by setting hyperparameters to None)
|
||||
hyper_none = None
|
||||
yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
|
||||
# def quantification_ensembles():
|
||||
# param_mod_sel = {
|
||||
# 'sample_size': SAMPLE_SIZE,
|
||||
# 'n_prevpoints': 21,
|
||||
# 'n_repetitions': 5,
|
||||
# 'refit': True,
|
||||
# 'verbose': False
|
||||
# }
|
||||
# common = {
|
||||
# 'size': 30,
|
||||
# 'red_size': 15,
|
||||
# 'max_sample_size': None, # same as training set
|
||||
# 'n_jobs': ENSEMBLE_N_JOBS,
|
||||
# 'param_grid': lr_params,
|
||||
# 'param_mod_sel': param_mod_sel,
|
||||
# 'val_split': 0.4,
|
||||
# 'min_pos': 5
|
||||
# }
|
||||
#
|
||||
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
|
||||
# will be skipped (by setting hyperparameters to None)
|
||||
# hyper_none = None
|
||||
# yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
|
||||
|
||||
|
||||
def run(experiment):
|
||||
|
@ -116,9 +123,6 @@ def run(experiment):
|
|||
model.fit(data.training)
|
||||
best_params = {}
|
||||
|
||||
if hasattr(model, "deploy"):
|
||||
model.deploy()
|
||||
|
||||
# model evaluation
|
||||
true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
|
||||
model,
|
||||
|
@ -130,9 +134,6 @@ def run(experiment):
|
|||
)
|
||||
test_true_prevalence = data.test.prevalence()
|
||||
|
||||
if hasattr(model, "deploy"):
|
||||
model.deploy(False)
|
||||
|
||||
evaluate_experiment(true_prevalences, estim_prevalences)
|
||||
save_results(args.results, dataset_name, model_name, run, optim_loss,
|
||||
true_prevalences, estim_prevalences,
|
||||
|
@ -141,7 +142,7 @@ def run(experiment):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
|
||||
parser = argparse.ArgumentParser(description='Run experiments for UCI ML Quantification')
|
||||
parser.add_argument('results', metavar='RESULT_PATH', type=str,
|
||||
help='path to the directory where to store the results')
|
||||
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
|
||||
|
|
|
@ -4,7 +4,8 @@ import pathlib
|
|||
import pickle
|
||||
from glob import glob
|
||||
import sys
|
||||
from uci_common import *
|
||||
from uci_experiments import *
|
||||
from uci_tables import METHODS
|
||||
from os.path import join
|
||||
|
||||
|
||||
|
@ -42,7 +43,7 @@ def plot_error_by_drift(methods, error_name, logscale=False, path=None):
|
|||
tr_prevs,
|
||||
n_bins=20,
|
||||
error_name=error_name,
|
||||
show_std=False,
|
||||
show_std=True,
|
||||
logscale=logscale,
|
||||
title=f'Quantification error as a function of distribution shift',
|
||||
savepath=path
|
||||
|
@ -54,7 +55,7 @@ def diagonal_plot(methods, error_name, path=None):
|
|||
if path is not None:
|
||||
path = join(path, f'diag_{error_name}')
|
||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, show_std=True, savepath=f'{path}_pos.{plotext}')
|
||||
|
||||
|
||||
def binary_bias_global(methods, error_name, path=None):
|
||||
|
|
|
@ -18,16 +18,18 @@ makedirs(tables_path, exist_ok=True)
|
|||
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
|
||||
|
||||
|
||||
|
||||
|
||||
METHODS = [#'cc', 'acc',
|
||||
'pcc',
|
||||
'pacc',
|
||||
'wpacc',
|
||||
# 'pcc',
|
||||
# 'pacc',
|
||||
# 'wpacc',
|
||||
'pcc.opt',
|
||||
'pacc.opt',
|
||||
'wpacc.opt',
|
||||
'wpacc.opt2',
|
||||
'ds.opt',
|
||||
# 'pcc.opt.svm',
|
||||
# 'pacc.opt.svm',
|
||||
# 'wpacc.opt.svm',
|
||||
# 'wpacc.opt2',
|
||||
# 'MAX', 'MS', 'MS2',
|
||||
'sldc',
|
||||
# 'svmmae',
|
||||
|
|
Loading…
Reference in New Issue