forked from moreo/QuaPy
fixing quanet
This commit is contained in:
parent
75a95adfa6
commit
f0e93692cc
|
@ -1,8 +1,8 @@
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
import quapy as qp
|
||||
from classification.methods import PCALR
|
||||
from method.meta import QuaNet
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.classification.methods import PCALR
|
||||
from quapy.method.meta import QuaNet
|
||||
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
|
||||
from quapy.method.meta import EPACC, EEMQ
|
||||
import quapy.functional as F
|
||||
|
@ -19,12 +19,16 @@ import shutil
|
|||
|
||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||
|
||||
|
||||
__C_range = np.logspace(-4, 5, 10)
|
||||
|
||||
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||
svmperf_params = {'C': __C_range}
|
||||
|
||||
|
||||
def newLR():
|
||||
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
||||
|
||||
__C_range = np.logspace(-4, 5, 10)
|
||||
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||
svmperf_params = {'C': __C_range}
|
||||
|
||||
def quantification_models():
|
||||
# methods tested in Gao & Sebastiani 2016
|
||||
|
@ -33,9 +37,9 @@ def quantification_models():
|
|||
yield 'pcc', PCC(newLR()), lr_params
|
||||
yield 'pacc', PACC(newLR()), lr_params
|
||||
yield 'sld', EMQ(newLR()), lr_params
|
||||
# yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
|
||||
# yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
|
||||
# yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
|
||||
yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
|
||||
yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
|
||||
yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
|
||||
|
||||
# methods added
|
||||
# yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
|
||||
|
@ -53,11 +57,10 @@ def quantification_cuda_models():
|
|||
def quantification_ensembles():
|
||||
param_mod_sel = {
|
||||
'sample_size': settings.SAMPLE_SIZE,
|
||||
'n_prevpoints': 21,
|
||||
'n_repetitions': 5,
|
||||
'n_repetitions': 1000,
|
||||
'verbose': False
|
||||
}
|
||||
common={
|
||||
common = {
|
||||
'max_sample_size': 1000,
|
||||
'n_jobs': settings.ENSEMBLE_N_JOBS,
|
||||
'param_grid': lr_params,
|
||||
|
@ -137,8 +140,8 @@ def run(experiment):
|
|||
model,
|
||||
param_grid=hyperparams,
|
||||
sample_size=settings.SAMPLE_SIZE,
|
||||
n_prevpoints=21,
|
||||
n_repetitions=5,
|
||||
protocol='npp',
|
||||
n_repetitions=1000,
|
||||
error=optim_loss,
|
||||
refit=False,
|
||||
timeout=60*60,
|
||||
|
@ -159,12 +162,11 @@ def run(experiment):
|
|||
# fits the model only the first time
|
||||
model.fit(benchmark_eval.training)
|
||||
|
||||
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
|
||||
true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction(
|
||||
model,
|
||||
test=benchmark_eval.test,
|
||||
sample_size=settings.SAMPLE_SIZE,
|
||||
n_prevpoints=21,
|
||||
n_repetitions=25,
|
||||
n_repetitions=5000,
|
||||
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
|
||||
)
|
||||
test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
|
||||
|
@ -182,7 +184,7 @@ def run(experiment):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
|
||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP')
|
||||
parser.add_argument('results', metavar='RESULT_PATH', type=str,
|
||||
help='path to the directory where to store the results')
|
||||
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
|
||||
|
@ -197,17 +199,14 @@ if __name__ == '__main__':
|
|||
optim_losses = ['mae', 'mrae']
|
||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
||||
|
||||
models = quantification_models()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
|
||||
# models = quantification_models()
|
||||
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
|
||||
|
||||
models = quantification_cuda_models()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
|
||||
|
||||
models = quantification_ensembles()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
|
||||
# Parallel(n_jobs=1)(
|
||||
# delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
|
||||
# )
|
||||
# models = quantification_ensembles()
|
||||
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
|
||||
|
||||
#shutil.rmtree(args.checkpointdir, ignore_errors=True)
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ from os.path import join
|
|||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||
plotext='png'
|
||||
|
||||
resultdir = './results'
|
||||
plotdir = './plots'
|
||||
resultdir = './results_npp'
|
||||
plotdir = './plots_npp'
|
||||
os.makedirs(plotdir, exist_ok=True)
|
||||
|
||||
def gather_results(methods, error_name):
|
||||
|
|
|
@ -6,10 +6,10 @@ import pickle
|
|||
import argparse
|
||||
from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
|
||||
import settings
|
||||
from experiments import result_path
|
||||
from experiments_NPP import result_path
|
||||
from tabular import Table
|
||||
|
||||
tables_path = './tables'
|
||||
tables_path = './tables_npp'
|
||||
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
|
||||
|
||||
makedirs(tables_path, exist_ok=True)
|
||||
|
@ -85,7 +85,7 @@ if __name__ == '__main__':
|
|||
}
|
||||
"""
|
||||
|
||||
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
|
||||
save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular)
|
||||
|
||||
# Tables ranks for AE and RAE (two tables)
|
||||
# ----------------------------------------------------
|
||||
|
@ -140,6 +140,6 @@ if __name__ == '__main__':
|
|||
}
|
||||
"""
|
||||
|
||||
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
|
||||
save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular)
|
||||
|
||||
print("[Done]")
|
||||
|
|
|
@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier):
|
|||
train_posteriors = self.learner.predict_proba(train_data.instances)
|
||||
|
||||
# turn instances' original representations into embeddings
|
||||
valid_data.instances = self.learner.transform(valid_data.instances)
|
||||
train_data.instances = self.learner.transform(train_data.instances)
|
||||
|
||||
valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_)
|
||||
train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_)
|
||||
|
||||
self.quantifiers = {
|
||||
'cc': CC(self.learner).fit(None, fit_learner=False),
|
||||
|
@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier):
|
|||
nQ = len(self.quantifiers)
|
||||
nC = data.n_classes
|
||||
self.quanet = QuaNetModule(
|
||||
doc_embedding_size=train_data.instances.shape[1],
|
||||
doc_embedding_size=train_data_embed.instances.shape[1],
|
||||
n_classes=data.n_classes,
|
||||
stats_size=nQ*nC, #+ 2*nC*nC,
|
||||
stats_size=nQ*nC,
|
||||
order_by=0 if data.binary else None,
|
||||
**self.quanet_params
|
||||
).to(self.device)
|
||||
|
@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier):
|
|||
checkpoint = self.checkpoint
|
||||
|
||||
for epoch_i in range(1, self.n_epochs):
|
||||
self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
|
||||
self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
|
||||
self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
|
||||
self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
|
||||
|
||||
early_stop(self.status['va-loss'], epoch_i)
|
||||
if early_stop.IMPROVED:
|
||||
|
|
Loading…
Reference in New Issue