1
0
Fork 0

fixing quanet

This commit is contained in:
Alejandro Moreo Fernandez 2021-07-02 10:19:00 +02:00
parent 75a95adfa6
commit f0e93692cc
4 changed files with 37 additions and 37 deletions

View File

@ -1,8 +1,8 @@
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
import quapy as qp import quapy as qp
from classification.methods import PCALR from quapy.classification.methods import PCALR
from method.meta import QuaNet from quapy.method.meta import QuaNet
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
from quapy.method.meta import EPACC, EEMQ from quapy.method.meta import EPACC, EEMQ
import quapy.functional as F import quapy.functional as F
@ -19,12 +19,16 @@ import shutil
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def newLR(): def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def quantification_models(): def quantification_models():
# methods tested in Gao & Sebastiani 2016 # methods tested in Gao & Sebastiani 2016
@ -33,9 +37,9 @@ def quantification_models():
yield 'pcc', PCC(newLR()), lr_params yield 'pcc', PCC(newLR()), lr_params
yield 'pacc', PACC(newLR()), lr_params yield 'pacc', PACC(newLR()), lr_params
yield 'sld', EMQ(newLR()), lr_params yield 'sld', EMQ(newLR()), lr_params
# yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
# yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
# yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
# methods added # methods added
# yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params # yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
@ -53,11 +57,10 @@ def quantification_cuda_models():
def quantification_ensembles(): def quantification_ensembles():
param_mod_sel = { param_mod_sel = {
'sample_size': settings.SAMPLE_SIZE, 'sample_size': settings.SAMPLE_SIZE,
'n_prevpoints': 21, 'n_repetitions': 1000,
'n_repetitions': 5,
'verbose': False 'verbose': False
} }
common={ common = {
'max_sample_size': 1000, 'max_sample_size': 1000,
'n_jobs': settings.ENSEMBLE_N_JOBS, 'n_jobs': settings.ENSEMBLE_N_JOBS,
'param_grid': lr_params, 'param_grid': lr_params,
@ -137,8 +140,8 @@ def run(experiment):
model, model,
param_grid=hyperparams, param_grid=hyperparams,
sample_size=settings.SAMPLE_SIZE, sample_size=settings.SAMPLE_SIZE,
n_prevpoints=21, protocol='npp',
n_repetitions=5, n_repetitions=1000,
error=optim_loss, error=optim_loss,
refit=False, refit=False,
timeout=60*60, timeout=60*60,
@ -159,12 +162,11 @@ def run(experiment):
# fits the model only the first time # fits the model only the first time
model.fit(benchmark_eval.training) model.fit(benchmark_eval.training)
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction( true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction(
model, model,
test=benchmark_eval.test, test=benchmark_eval.test,
sample_size=settings.SAMPLE_SIZE, sample_size=settings.SAMPLE_SIZE,
n_prevpoints=21, n_repetitions=5000,
n_repetitions=25,
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1 n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
) )
test_estim_prevalence = model.quantify(benchmark_eval.test.instances) test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
@ -182,7 +184,7 @@ def run(experiment):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification') parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP')
parser.add_argument('results', metavar='RESULT_PATH', type=str, parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results') help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
@ -197,17 +199,14 @@ if __name__ == '__main__':
optim_losses = ['mae', 'mrae'] optim_losses = ['mae', 'mrae']
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
models = quantification_models() # models = quantification_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS) # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
models = quantification_cuda_models() models = quantification_cuda_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS) qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
models = quantification_ensembles() # models = quantification_ensembles()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
# Parallel(n_jobs=1)(
# delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
# )
#shutil.rmtree(args.checkpointdir, ignore_errors=True) #shutil.rmtree(args.checkpointdir, ignore_errors=True)

View File

@ -12,8 +12,8 @@ from os.path import join
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
plotext='png' plotext='png'
resultdir = './results' resultdir = './results_npp'
plotdir = './plots' plotdir = './plots_npp'
os.makedirs(plotdir, exist_ok=True) os.makedirs(plotdir, exist_ok=True)
def gather_results(methods, error_name): def gather_results(methods, error_name):

View File

@ -6,10 +6,10 @@ import pickle
import argparse import argparse
from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
import settings import settings
from experiments import result_path from experiments_NPP import result_path
from tabular import Table from tabular import Table
tables_path = './tables' tables_path = './tables_npp'
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
makedirs(tables_path, exist_ok=True) makedirs(tables_path, exist_ok=True)
@ -85,7 +85,7 @@ if __name__ == '__main__':
} }
""" """
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular) save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular)
# Tables ranks for AE and RAE (two tables) # Tables ranks for AE and RAE (two tables)
# ---------------------------------------------------- # ----------------------------------------------------
@ -140,6 +140,6 @@ if __name__ == '__main__':
} }
""" """
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular) save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular)
print("[Done]") print("[Done]")

View File

@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier):
train_posteriors = self.learner.predict_proba(train_data.instances) train_posteriors = self.learner.predict_proba(train_data.instances)
# turn instances' original representations into embeddings # turn instances' original representations into embeddings
valid_data.instances = self.learner.transform(valid_data.instances)
train_data.instances = self.learner.transform(train_data.instances) valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_)
train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_)
self.quantifiers = { self.quantifiers = {
'cc': CC(self.learner).fit(None, fit_learner=False), 'cc': CC(self.learner).fit(None, fit_learner=False),
@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier):
nQ = len(self.quantifiers) nQ = len(self.quantifiers)
nC = data.n_classes nC = data.n_classes
self.quanet = QuaNetModule( self.quanet = QuaNetModule(
doc_embedding_size=train_data.instances.shape[1], doc_embedding_size=train_data_embed.instances.shape[1],
n_classes=data.n_classes, n_classes=data.n_classes,
stats_size=nQ*nC, #+ 2*nC*nC, stats_size=nQ*nC,
order_by=0 if data.binary else None, order_by=0 if data.binary else None,
**self.quanet_params **self.quanet_params
).to(self.device) ).to(self.device)
@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier):
checkpoint = self.checkpoint checkpoint = self.checkpoint
for epoch_i in range(1, self.n_epochs): for epoch_i in range(1, self.n_epochs):
self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True) self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False) self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
early_stop(self.status['va-loss'], epoch_i) early_stop(self.status['va-loss'], epoch_i)
if early_stop.IMPROVED: if early_stop.IMPROVED: