forked from moreo/QuaPy
fixing quanet
This commit is contained in:
parent
75a95adfa6
commit
f0e93692cc
|
@ -1,8 +1,8 @@
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from classification.methods import PCALR
|
from quapy.classification.methods import PCALR
|
||||||
from method.meta import QuaNet
|
from quapy.method.meta import QuaNet
|
||||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
|
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
|
||||||
from quapy.method.meta import EPACC, EEMQ
|
from quapy.method.meta import EPACC, EEMQ
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
@ -19,12 +19,16 @@ import shutil
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
__C_range = np.logspace(-4, 5, 10)
|
||||||
|
|
||||||
|
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||||
|
svmperf_params = {'C': __C_range}
|
||||||
|
|
||||||
|
|
||||||
def newLR():
|
def newLR():
|
||||||
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
||||||
|
|
||||||
__C_range = np.logspace(-4, 5, 10)
|
|
||||||
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
|
||||||
svmperf_params = {'C': __C_range}
|
|
||||||
|
|
||||||
def quantification_models():
|
def quantification_models():
|
||||||
# methods tested in Gao & Sebastiani 2016
|
# methods tested in Gao & Sebastiani 2016
|
||||||
|
@ -33,9 +37,9 @@ def quantification_models():
|
||||||
yield 'pcc', PCC(newLR()), lr_params
|
yield 'pcc', PCC(newLR()), lr_params
|
||||||
yield 'pacc', PACC(newLR()), lr_params
|
yield 'pacc', PACC(newLR()), lr_params
|
||||||
yield 'sld', EMQ(newLR()), lr_params
|
yield 'sld', EMQ(newLR()), lr_params
|
||||||
# yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
|
yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
|
||||||
# yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
|
yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
|
||||||
# yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
|
yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
|
||||||
|
|
||||||
# methods added
|
# methods added
|
||||||
# yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
|
# yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
|
||||||
|
@ -53,11 +57,10 @@ def quantification_cuda_models():
|
||||||
def quantification_ensembles():
|
def quantification_ensembles():
|
||||||
param_mod_sel = {
|
param_mod_sel = {
|
||||||
'sample_size': settings.SAMPLE_SIZE,
|
'sample_size': settings.SAMPLE_SIZE,
|
||||||
'n_prevpoints': 21,
|
'n_repetitions': 1000,
|
||||||
'n_repetitions': 5,
|
|
||||||
'verbose': False
|
'verbose': False
|
||||||
}
|
}
|
||||||
common={
|
common = {
|
||||||
'max_sample_size': 1000,
|
'max_sample_size': 1000,
|
||||||
'n_jobs': settings.ENSEMBLE_N_JOBS,
|
'n_jobs': settings.ENSEMBLE_N_JOBS,
|
||||||
'param_grid': lr_params,
|
'param_grid': lr_params,
|
||||||
|
@ -137,8 +140,8 @@ def run(experiment):
|
||||||
model,
|
model,
|
||||||
param_grid=hyperparams,
|
param_grid=hyperparams,
|
||||||
sample_size=settings.SAMPLE_SIZE,
|
sample_size=settings.SAMPLE_SIZE,
|
||||||
n_prevpoints=21,
|
protocol='npp',
|
||||||
n_repetitions=5,
|
n_repetitions=1000,
|
||||||
error=optim_loss,
|
error=optim_loss,
|
||||||
refit=False,
|
refit=False,
|
||||||
timeout=60*60,
|
timeout=60*60,
|
||||||
|
@ -159,12 +162,11 @@ def run(experiment):
|
||||||
# fits the model only the first time
|
# fits the model only the first time
|
||||||
model.fit(benchmark_eval.training)
|
model.fit(benchmark_eval.training)
|
||||||
|
|
||||||
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
|
true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction(
|
||||||
model,
|
model,
|
||||||
test=benchmark_eval.test,
|
test=benchmark_eval.test,
|
||||||
sample_size=settings.SAMPLE_SIZE,
|
sample_size=settings.SAMPLE_SIZE,
|
||||||
n_prevpoints=21,
|
n_repetitions=5000,
|
||||||
n_repetitions=25,
|
|
||||||
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
|
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
|
||||||
)
|
)
|
||||||
test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
|
test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
|
||||||
|
@ -182,7 +184,7 @@ def run(experiment):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
|
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP')
|
||||||
parser.add_argument('results', metavar='RESULT_PATH', type=str,
|
parser.add_argument('results', metavar='RESULT_PATH', type=str,
|
||||||
help='path to the directory where to store the results')
|
help='path to the directory where to store the results')
|
||||||
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
|
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
|
||||||
|
@ -197,17 +199,14 @@ if __name__ == '__main__':
|
||||||
optim_losses = ['mae', 'mrae']
|
optim_losses = ['mae', 'mrae']
|
||||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
||||||
|
|
||||||
models = quantification_models()
|
# models = quantification_models()
|
||||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
|
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
|
||||||
|
|
||||||
models = quantification_cuda_models()
|
models = quantification_cuda_models()
|
||||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
|
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
|
||||||
|
|
||||||
models = quantification_ensembles()
|
# models = quantification_ensembles()
|
||||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
|
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
|
||||||
# Parallel(n_jobs=1)(
|
|
||||||
# delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
|
|
||||||
# )
|
|
||||||
|
|
||||||
#shutil.rmtree(args.checkpointdir, ignore_errors=True)
|
#shutil.rmtree(args.checkpointdir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
|
@ -12,8 +12,8 @@ from os.path import join
|
||||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||||
plotext='png'
|
plotext='png'
|
||||||
|
|
||||||
resultdir = './results'
|
resultdir = './results_npp'
|
||||||
plotdir = './plots'
|
plotdir = './plots_npp'
|
||||||
os.makedirs(plotdir, exist_ok=True)
|
os.makedirs(plotdir, exist_ok=True)
|
||||||
|
|
||||||
def gather_results(methods, error_name):
|
def gather_results(methods, error_name):
|
||||||
|
|
|
@ -6,10 +6,10 @@ import pickle
|
||||||
import argparse
|
import argparse
|
||||||
from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
|
from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
|
||||||
import settings
|
import settings
|
||||||
from experiments import result_path
|
from experiments_NPP import result_path
|
||||||
from tabular import Table
|
from tabular import Table
|
||||||
|
|
||||||
tables_path = './tables'
|
tables_path = './tables_npp'
|
||||||
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
|
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
|
||||||
|
|
||||||
makedirs(tables_path, exist_ok=True)
|
makedirs(tables_path, exist_ok=True)
|
||||||
|
@ -85,7 +85,7 @@ if __name__ == '__main__':
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
|
save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular)
|
||||||
|
|
||||||
# Tables ranks for AE and RAE (two tables)
|
# Tables ranks for AE and RAE (two tables)
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
@ -140,6 +140,6 @@ if __name__ == '__main__':
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
|
save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular)
|
||||||
|
|
||||||
print("[Done]")
|
print("[Done]")
|
||||||
|
|
|
@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier):
|
||||||
train_posteriors = self.learner.predict_proba(train_data.instances)
|
train_posteriors = self.learner.predict_proba(train_data.instances)
|
||||||
|
|
||||||
# turn instances' original representations into embeddings
|
# turn instances' original representations into embeddings
|
||||||
valid_data.instances = self.learner.transform(valid_data.instances)
|
|
||||||
train_data.instances = self.learner.transform(train_data.instances)
|
valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_)
|
||||||
|
train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_)
|
||||||
|
|
||||||
self.quantifiers = {
|
self.quantifiers = {
|
||||||
'cc': CC(self.learner).fit(None, fit_learner=False),
|
'cc': CC(self.learner).fit(None, fit_learner=False),
|
||||||
|
@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier):
|
||||||
nQ = len(self.quantifiers)
|
nQ = len(self.quantifiers)
|
||||||
nC = data.n_classes
|
nC = data.n_classes
|
||||||
self.quanet = QuaNetModule(
|
self.quanet = QuaNetModule(
|
||||||
doc_embedding_size=train_data.instances.shape[1],
|
doc_embedding_size=train_data_embed.instances.shape[1],
|
||||||
n_classes=data.n_classes,
|
n_classes=data.n_classes,
|
||||||
stats_size=nQ*nC, #+ 2*nC*nC,
|
stats_size=nQ*nC,
|
||||||
order_by=0 if data.binary else None,
|
order_by=0 if data.binary else None,
|
||||||
**self.quanet_params
|
**self.quanet_params
|
||||||
).to(self.device)
|
).to(self.device)
|
||||||
|
@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier):
|
||||||
checkpoint = self.checkpoint
|
checkpoint = self.checkpoint
|
||||||
|
|
||||||
for epoch_i in range(1, self.n_epochs):
|
for epoch_i in range(1, self.n_epochs):
|
||||||
self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
|
self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
|
||||||
self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
|
self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
|
||||||
|
|
||||||
early_stop(self.status['va-loss'], epoch_i)
|
early_stop(self.status['va-loss'], epoch_i)
|
||||||
if early_stop.IMPROVED:
|
if early_stop.IMPROVED:
|
||||||
|
|
Loading…
Reference in New Issue