QuaPy/TweetSentQuant/experiments.py

from sklearn.linear_model import LogisticRegression
import quapy as qp
from classification.methods import PCALR
from method.meta import QuaNet
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
import quapy.functional as F
import numpy as np
import os
import pickle
import itertools
from joblib import Parallel, delayed
import settings
import argparse
import torch
import shutil

parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str,default='./svm_perf_quantification',
                    help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str,default='./checkpoint',
                    help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()

SAMPLE_SIZE = 100


def quantification_models():
    def newLR():
        return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
    __C_range = np.logspace(-4, 5, 10)
    lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
    svmperf_params = {'C': __C_range}

    # methods tested in Gao & Sebastiani 2016
    yield 'cc', CC(newLR()), lr_params
    yield 'acc', ACC(newLR()), lr_params
    yield 'pcc', PCC(newLR()), lr_params
    yield 'pacc', PACC(newLR()), lr_params
    yield 'sld', EMQ(newLR()), lr_params
    yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
    yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
    yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params

    # methods added
    yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
    yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params
    yield 'hdy', OneVsAll(HDy(newLR())), lr_params

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Running QuaNet in {device}')
    yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params

    # to add:
    # quapy
    # ensembles
    #
    # 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),


def evaluate_experiment(true_prevalences, estim_prevalences):
    print('\nEvaluation Metrics:\n'+'='*22)
    for eval_measure in [qp.error.mae, qp.error.mrae]:
        err = eval_measure(true_prevalences, estim_prevalences)
        print(f'\t{eval_measure.__name__}={err:.4f}')
    print()


def evaluate_method_point_test(true_prev, estim_prev):
    print('\nPoint-Test evaluation:\n' + '=' * 22)
    print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}')
    for eval_measure in [qp.error.mae, qp.error.mrae]:
        err = eval_measure(true_prev, estim_prev)
        print(f'\t{eval_measure.__name__}={err:.4f}')


def result_path(dataset_name, model_name, optim_loss):
    return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')


def is_already_computed(dataset_name, model_name, optim_loss):
    if dataset_name=='semeval':
        check_datasets = ['semeval13', 'semeval14', 'semeval15']
    else:
        check_datasets = [dataset_name]
    return all(os.path.exists(result_path(name, model_name, optim_loss)) for name in check_datasets)


def save_results(dataset_name, model_name, optim_loss, *results):
    rpath = result_path(dataset_name, model_name, optim_loss)
    qp.util.create_parent_dir(rpath)
    with open(rpath, 'wb') as foo:
        pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)


def run(experiment):

    qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE

    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment

    if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
        print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
        return
    elif (optim_loss=='mae' and model_name=='svmmrae') or (optim_loss=='mrae' and model_name=='svmmae'):
        print(f'skipping model={model_name} for optim_loss={optim_loss}')
        return
    else:
        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')

    benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
    benchmark_devel.stats()

    # model selection (hyperparameter optimization for a quantification-oriented loss)
    model_selection = qp.model_selection.GridSearchQ(
        model,
        param_grid=hyperparams,
        sample_size=SAMPLE_SIZE,
        n_prevpoints=21,
        n_repetitions=5,
        error=optim_loss,
        refit=False,
        timeout=60*60,
        verbose=True
    )
    model_selection.fit(benchmark_devel.training, benchmark_devel.test)
    model = model_selection.best_model()

    # model evaluation
    test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15']
    for test_no, test_name in enumerate(test_names):
        benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True)
        if test_no == 0:
            # fits the model only the first time
            model.fit(benchmark_eval.training)

        true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
            model,
            test=benchmark_eval.test,
            sample_size=SAMPLE_SIZE,
            n_prevpoints=21,
            n_repetitions=25
        )
        test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
        test_true_prevalence = benchmark_eval.test.prevalence()

        evaluate_experiment(true_prevalences, estim_prevalences)
        evaluate_method_point_test(test_true_prevalence, test_estim_prevalence)
        save_results(test_name, model_name, optim_loss,
                     true_prevalences, estim_prevalences,
                     benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,
                     model_selection.best_params_)


if __name__ == '__main__':

    print(f'Result folder: {args.results}')
    np.random.seed(0)

    optim_losses = ['mae', 'mrae']
    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
    models = quantification_models()

    results = Parallel(n_jobs=settings.N_JOBS)(
        delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
    )

    shutil.rmtree(args.checkpointdir, ignore_errors=True)
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`from sklearn.linear_model import LogisticRegression`
			`import quapy as qp`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`from classification.methods import PCALR`
			`from method.meta import QuaNet`
some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00			`from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`import quapy.functional as F`
			`import numpy as np`
			`import os`
			`import pickle`
			`import itertools`
			`from joblib import Parallel, delayed`
adding table manager 2021-01-15 08:33:39 +01:00			`import settings`
import fixes 2021-01-15 18:32:32 +01:00			`import argparse`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`import torch`
			`import shutil`
import fixes 2021-01-15 18:32:32 +01:00
			`parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')`
			`parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str,default='./svm_perf_quantification',`
			`help='path to the directory with svmperf')`
			`parser.add_argument('--checkpointdir', metavar='PATH', type=str,default='./checkpoint',`
			`help='path to the directory where to dump QuaNet checkpoints')`
import fixes 2021-01-15 18:32:32 +01:00			`args = parser.parse_args()`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`SAMPLE_SIZE = 100`

testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00
			`def quantification_models():`
			`def newLR():`
			`return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)`
			`__C_range = np.logspace(-4, 5, 10)`
adding experiments for svmmae and svmmrae 2021-01-15 18:37:37 +01:00			`lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}`
adding table manager 2021-01-15 08:33:39 +01:00			`svmperf_params = {'C': __C_range}`
some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00
			`# methods tested in Gao & Sebastiani 2016`
			`yield 'cc', CC(newLR()), lr_params`
			`yield 'acc', ACC(newLR()), lr_params`
			`yield 'pcc', PCC(newLR()), lr_params`
			`yield 'pacc', PACC(newLR()), lr_params`
			`yield 'sld', EMQ(newLR()), lr_params`
			`yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params`
			`yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params`
			`yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params`

			`# methods added`
			`yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params`
			`yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params`
			`yield 'hdy', OneVsAll(HDy(newLR())), lr_params`

adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`device = 'cuda' if torch.cuda.is_available() else 'cpu'`
			`print(f'Running QuaNet in {device}')`
			`yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params`

some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00			`# to add:`
			`# quapy`
			`# ensembles`
			`#`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`# 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00

			`def evaluate_experiment(true_prevalences, estim_prevalences):`
			`print('\nEvaluation Metrics:\n'+'='*22)`
			`for eval_measure in [qp.error.mae, qp.error.mrae]:`
			`err = eval_measure(true_prevalences, estim_prevalences)`
			`print(f'\t{eval_measure.__name__}={err:.4f}')`
			`print()`


			`def evaluate_method_point_test(true_prev, estim_prev):`
			`print('\nPoint-Test evaluation:\n' + '=' * 22)`
			`print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}')`
			`for eval_measure in [qp.error.mae, qp.error.mrae]:`
			`err = eval_measure(true_prev, estim_prev)`
			`print(f'\t{eval_measure.__name__}={err:.4f}')`


			`def result_path(dataset_name, model_name, optim_loss):`
import fixes 2021-01-15 18:32:32 +01:00			`return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00

			`def is_already_computed(dataset_name, model_name, optim_loss):`
			`if dataset_name=='semeval':`
			`check_datasets = ['semeval13', 'semeval14', 'semeval15']`
			`else:`
			`check_datasets = [dataset_name]`
			`return all(os.path.exists(result_path(name, model_name, optim_loss)) for name in check_datasets)`


			`def save_results(dataset_name, model_name, optim_loss, *results):`
			`rpath = result_path(dataset_name, model_name, optim_loss)`
			`qp.util.create_parent_dir(rpath)`
			`with open(rpath, 'wb') as foo:`
			`pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)`


			`def run(experiment):`

adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00
			`optim_loss, dataset_name, (model_name, model, hyperparams) = experiment`

			`if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):`
			`print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')`
			`return`
adding experiments for svmmae and svmmrae 2021-01-15 18:37:37 +01:00			`elif (optim_loss=='mae' and model_name=='svmmrae') or (optim_loss=='mrae' and model_name=='svmmae'):`
			`print(f'skipping model={model_name} for optim_loss={optim_loss}')`
			`return`
info added 2021-01-13 11:55:56 +01:00			`else:`
			`print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')`
adding all svm-perf-based quantifiers with timeout 2021-01-15 18:54:03 +01:00
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)`
adding table manager 2021-01-15 08:33:39 +01:00			`benchmark_devel.stats()`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00
			`# model selection (hyperparameter optimization for a quantification-oriented loss)`
			`model_selection = qp.model_selection.GridSearchQ(`
			`model,`
			`param_grid=hyperparams,`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`sample_size=SAMPLE_SIZE,`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`n_prevpoints=21,`
			`n_repetitions=5,`
bugfix and tables generation with ResultSet 2021-01-13 11:52:50 +01:00			`error=optim_loss,`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`refit=False,`
setting a timeout for model_selection combinations in order to prevent some combinations to stuck the model selection 2021-01-15 17:42:19 +01:00			`timeout=60*60,`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`verbose=True`
			`)`
			`model_selection.fit(benchmark_devel.training, benchmark_devel.test)`
			`model = model_selection.best_model()`

			`# model evaluation`
			`test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15']`
			`for test_no, test_name in enumerate(test_names):`
			`benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True)`
			`if test_no == 0:`
			`# fits the model only the first time`
			`model.fit(benchmark_eval.training)`

			`true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(`
			`model,`
			`test=benchmark_eval.test,`
adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`sample_size=SAMPLE_SIZE,`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`n_prevpoints=21,`
			`n_repetitions=25`
			`)`
			`test_estim_prevalence = model.quantify(benchmark_eval.test.instances)`
			`test_true_prevalence = benchmark_eval.test.prevalence()`

			`evaluate_experiment(true_prevalences, estim_prevalences)`
			`evaluate_method_point_test(test_true_prevalence, test_estim_prevalence)`
			`save_results(test_name, model_name, optim_loss,`
			`true_prevalences, estim_prevalences,`
			`benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,`
			`model_selection.best_params_)`


			`if __name__ == '__main__':`

import fixes 2021-01-15 18:32:32 +01:00			`print(f'Result folder: {args.results}')`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`np.random.seed(0)`

running all experiments 2021-01-18 10:54:25 +01:00			`optim_losses = ['mae', 'mrae']`
			`datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`models = quantification_models()`

adding table manager 2021-01-15 08:33:39 +01:00			`results = Parallel(n_jobs=settings.N_JOBS)(`
testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00			`delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)`
			`)`

adding QuaNet to experiments of Twitter; trying new stuff in 'NewMethods' 2021-01-18 19:14:04 +01:00			`shutil.rmtree(args.checkpointdir, ignore_errors=True)`

testing quapy via replicating Tweet Quantification experiments 2021-01-12 17:39:00 +01:00