From 482e4453a8c2b9fb08c1046b523f52aeeb868cd7 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 19 Jan 2021 18:26:40 +0100
Subject: [PATCH] refactor of ensembles, launching EPACC with Ptr policy

---
 NewMethods/new_experiments.py |  12 +-
 NewMethods/new_gen_tables.py  | 224 +++++++++++++---------------------
 NewMethods/settings.py        |   1 +
 TweetSentQuant/experiments.py |  83 +++++++------
 TweetSentQuant/gen_tables.py  | 176 ++++++++++++++------------
 TweetSentQuant/settings.py    |   6 +-
 quapy/evaluation.py           |   8 +-
 quapy/method/aggregative.py   |   6 +-
 quapy/method/meta.py          | 157 ++++++++++++------------
 quapy/method/neural.py        |   1 +
 quapy/model_selection.py      |  19 +--
 test.py                       |  26 ++--
 12 files changed, 355 insertions(+), 364 deletions(-)

diff --git a/NewMethods/new_experiments.py b/NewMethods/new_experiments.py
index 26d3a77..d60b158 100644
--- a/NewMethods/new_experiments.py
+++ b/NewMethods/new_experiments.py
@@ -14,7 +14,7 @@ import torch
 
 parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
 parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
-parser.add_argument('svmperfpath', metavar='SVMPERF_PATH', type=str, help='path to the directory with svmperf')
+#parser.add_argument('svmperfpath', metavar='SVMPERF_PATH', type=str, help='path to the directory with svmperf')
 args = parser.parse_args()
 
 
@@ -25,11 +25,11 @@ def quantification_models():
     lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
     svmperf_params = {'C': __C_range}
     #yield 'paccsld', PACCSLD(newLR()), lr_params
-    #yield 'hdysld', OneVsAll(HDySLD(newLR())), lr_params  # <-- promising!
+    yield 'hdysld', OneVsAll(HDySLD(newLR())), lr_params  # <-- promising!
 
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    print(f'Running QuaNet in {device}')
-    yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, device=device), lr_params
+    #device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    #print(f'Running QuaNet in {device}')
+    #yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, device=device), lr_params
 
 
 if __name__ == '__main__':
@@ -38,7 +38,7 @@ if __name__ == '__main__':
     np.random.seed(0)
 
     optim_losses = ['mae']
-    datasets = ['hcr']  # qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
+    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
     models = quantification_models()
 
     results = Parallel(n_jobs=settings.N_JOBS)(
diff --git a/NewMethods/new_gen_tables.py b/NewMethods/new_gen_tables.py
index 6b9adff..7fa8983 100644
--- a/NewMethods/new_gen_tables.py
+++ b/NewMethods/new_gen_tables.py
@@ -4,7 +4,9 @@ from os import makedirs
 import sys, os
 import pickle
 from experiments import result_path
+from gen_tables import save_table, experiment_errors
 from tabular import Table
+import argparse
 
 tables_path = './tables'
 MAXTONE = 50  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
@@ -29,6 +31,7 @@ nice = {
     'svmmrae': 'SVM(RAE)',
     'quanet': 'QuaNet',
     'hdy': 'HDy',
+    'hdysld': 'HDy-SLD',
     'dys': 'DyS',
     'svmperf':'',
     'sanders': 'Sanders',
@@ -44,153 +47,102 @@ def nicerm(key):
     return '\mathrm{'+nice[key]+'}'
 
 
-def load_Gao_Sebastiani_previous_results():
-    def rename(method):
-        old2new = {
-            'kld': 'svmkld',
-            'nkld': 'svmnkld',
-            'qbeta2': 'svmq',
-            'em': 'sld'
-        }
-        return old2new.get(method, method)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate tables for Tweeter Sentiment Quantification')
+    parser.add_argument('results', metavar='RESULT_PATH', type=str,
+                        help='path to the directory containing the results of the methods tested in Gao & Sebastiani')
+    parser.add_argument('newresults', metavar='RESULT_PATH', type=str,
+                        help='path to the directory containing the results for the experimental methods')
+    args = parser.parse_args()
 
-    gao_seb_results = {}
-    with open('./Gao_Sebastiani_results.txt', 'rt') as fin:
-        lines = fin.readlines()
-        for line in lines[1:]:
-            line = line.strip()
-            parts = line.lower().split()
-            if len(parts) == 4:
-                dataset, method, ae, rae = parts
-            else:
-                method, ae, rae = parts
-            learner, method = method.split('-')
-            method = rename(method)
-            gao_seb_results[f'{dataset}-{method}-ae'] = float(ae)
-            gao_seb_results[f'{dataset}-{method}-rae'] = float(rae)
-    return gao_seb_results
+    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
+    evaluation_measures = [qp.error.ae, qp.error.rae]
+    gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
+    new_methods = ['hdy']  # methods added to the Gao & Sebastiani methods
+    experimental_methods = ['hdysld']  # experimental
 
+    for i, eval_func in enumerate(evaluation_measures):
 
-def get_ranks_from_Gao_Sebastiani():
-    gao_seb_results = load_Gao_Sebastiani_previous_results()
-    datasets = set([key.split('-')[0] for key in gao_seb_results.keys()])
-    methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()]))
-    ranks = {}
-    for metric in ['ae', 'rae']:
+        # Tables evaluation scores for AE and RAE (two tables)
+        # ----------------------------------------------------
+
+        eval_name = eval_func.__name__
+
+        added_methods = ['svmm' + eval_name] + new_methods
+        methods = gao_seb_methods + added_methods + experimental_methods
+        nold_methods = len(gao_seb_methods)
+        nnew_methods = len(added_methods)
+        nexp_methods = len(experimental_methods)
+
+        # fill data table
+        table = Table(rows=datasets, cols=methods)
         for dataset in datasets:
-            scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods]
-            order = np.argsort(scores)
-            sorted_methods = methods[order]
-            for i, method in enumerate(sorted_methods):
-                ranks[f'{dataset}-{method}-{metric}'] = i+1
+            for method in methods:
+                if method in experimental_methods:
+                    path = args.newresults
+                else:
+                    path = args.results
+                table.add(dataset, method, experiment_errors(path, dataset, method, eval_name))
+
+        # write the latex table
+        tabular = """
+        \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods) + '|' + ('Y|'*nnew_methods) + '|' + ('Y|'*nexp_methods) + """} \hline
+          & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & 
+            \multicolumn{"""+str(nnew_methods)+"""}{c|}{} & 
+            \multicolumn{"""+str(nexp_methods)+"""}{c|}{}\\\\ \hline
+        """
+        rowreplace={dataset: nice.get(dataset, dataset.upper()) for dataset in datasets}
+        colreplace={method:'\side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' for method in methods}
+
+        tabular += table.latexTabular(rowreplace=rowreplace, colreplace=colreplace)
+        tabular += "\n\end{tabularx}"
+
+        save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
+
+        # Tables ranks for AE and RAE (two tables)
+        # ----------------------------------------------------
+        # fill the data table
+        ranktable = Table(rows=datasets, cols=methods, missing='--')
+        for dataset in datasets:
+            for method in methods:
+                ranktable.add(dataset, method, values=table.get(dataset, method, 'rank'))
+
+        # write the latex table
+        tabular = """
+        \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods) + '|' + ('Y|'*nnew_methods) + '|' + ('Y|'*nexp_methods) + """} \hline
+              & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & 
+            \multicolumn{"""+str(nnew_methods)+"""}{c|}{} & 
+            \multicolumn{"""+str(nexp_methods)+"""}{c|}{}\\\\ \hline
+        """
         for method in methods:
-            rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets])
-            ranks[f'Average-{method}-{metric}'] = rankave
-    return ranks, gao_seb_results
+            tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
+        tabular += '\\\\\hline\n'
 
+        for dataset in datasets:
+            tabular += nice.get(dataset, dataset.upper()) + ' '
+            for method in methods:
+                newrank = ranktable.get(dataset, method)
+                if newrank != '--':
+                    newrank = f'{int(newrank)}'
+                color = ranktable.get_color(dataset, method)
+                if color == '--':
+                    color = ''
+                tabular += ' & ' + f'{newrank}' + color
+            tabular += '\\\\\hline\n'
+        tabular += '\hline\n'
 
-def save_table(path, table):
-    print(f'saving results in {path}')
-    with open(path, 'wt') as foo:
-        foo.write(table)
-
-
-datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
-evaluation_measures = [qp.error.ae, qp.error.rae]
-gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
-new_methods = []
-
-
-def experiment_errors(dataset, method, loss):
-    path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
-    if os.path.exists(path):
-        true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb'))
-        err_fn = getattr(qp.error, loss)
-        errors = err_fn(true_prevs, estim_prevs)
-        return errors
-    return None
-
-
-gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
-
-for i, eval_func in enumerate(evaluation_measures):
-
-    # Tables evaluation scores for AE and RAE (two tables)
-    # ----------------------------------------------------
-
-    eval_name = eval_func.__name__
-    added_methods = ['svmm' + eval_name] + new_methods
-    methods = gao_seb_methods + added_methods
-    nold_methods = len(gao_seb_methods)
-    nnew_methods = len(added_methods)
-
-    # fill data table
-    table = Table(rows=datasets, cols=methods)
-    for dataset in datasets:
+        tabular += 'Average '
         for method in methods:
-            table.add(dataset, method, experiment_errors(dataset, method, eval_name))
-
-    # write the latex table
-    tabular = """
-    \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods)+ '|' + ('Y|'*nnew_methods) + """} \hline
-      & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & 
-        \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
-    """
-    rowreplace={dataset: nice.get(dataset, dataset.upper()) for dataset in datasets}
-    colreplace={method:'\side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' for method in methods}
-
-    tabular += table.latexTabular(rowreplace=rowreplace, colreplace=colreplace)
-    tabular += "\n\end{tabularx}"
-
-    save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
-
-    # Tables ranks for AE and RAE (two tables)
-    # ----------------------------------------------------
-    methods = gao_seb_methods
-
-    # fill the data table
-    ranktable = Table(rows=datasets, cols=methods, missing='--')
-    for dataset in datasets:
-        for method in methods:
-            ranktable.add(dataset, method, values=table.get(dataset, method, 'rank'))
-
-    # write the latex table
-    tabular = """
-    \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline
-          & \multicolumn{""" + str(nold_methods) + """}{c|}{Methods tested in~\cite{Gao:2016uq}}  \\\\ \hline
-    """
-    for method in methods:
-        tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
-    tabular += '\\\\\hline\n'
-
-    for dataset in datasets:
-        tabular += nice.get(dataset, dataset.upper()) + ' '
-        for method in methods:
-            newrank = ranktable.get(dataset, method)
-            oldrank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
+            newrank = ranktable.get_average(method)
             if newrank != '--':
-                newrank = f'{int(newrank)}'
-            color = ranktable.get_color(dataset, method)
+                newrank = f'{newrank:.1f}'
+            color = ranktable.get_average(method, 'color')
             if color == '--':
                 color = ''
-            tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
+            tabular += ' & ' + f'{newrank}' + color
         tabular += '\\\\\hline\n'
-    tabular += '\hline\n'
+        tabular += "\end{tabularx}"
 
-    tabular += 'Average '
-    for method in methods:
-        newrank = ranktable.get_average(method)
-        oldrank = gao_seb_ranks[f'Average-{method}-{eval_name}']
-        if newrank != '--':
-            newrank = f'{newrank:.1f}'
-        oldrank = f'{oldrank:.1f}'
-        color = ranktable.get_average(method, 'color')
-        if color == '--':
-            color = ''
-        tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
-    tabular += '\\\\\hline\n'
-    tabular += "\end{tabularx}"
+        save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
 
-    save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
-
-print("[Done]")
+    print("[Done]")
diff --git a/NewMethods/settings.py b/NewMethods/settings.py
index 1b006c6..2ade31a 100644
--- a/NewMethods/settings.py
+++ b/NewMethods/settings.py
@@ -1,3 +1,4 @@
 import multiprocessing
 
 N_JOBS = -2  #multiprocessing.cpu_count()
+SAMPLE_SIZE = 100
\ No newline at end of file
diff --git a/TweetSentQuant/experiments.py b/TweetSentQuant/experiments.py
index fb78dbe..a96e17f 100644
--- a/TweetSentQuant/experiments.py
+++ b/TweetSentQuant/experiments.py
@@ -2,7 +2,9 @@ from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from classification.methods import PCALR
 from method.meta import QuaNet
+from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
 from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
+from quapy.method.meta import EPACC, EEMQ
 import quapy.functional as F
 import numpy as np
 import os
@@ -14,16 +16,6 @@ import argparse
 import torch
 import shutil
 
-parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
-parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
-parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str,default='./svm_perf_quantification',
-                    help='path to the directory with svmperf')
-parser.add_argument('--checkpointdir', metavar='PATH', type=str,default='./checkpoint',
-                    help='path to the directory where to dump QuaNet checkpoints')
-args = parser.parse_args()
-
-SAMPLE_SIZE = 100
-
 
 def quantification_models():
     def newLR():
@@ -49,13 +41,15 @@ def quantification_models():
 
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     print(f'Running QuaNet in {device}')
-    yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
+    #yield 'quanet', QuaNet(PCALR(**newLR().get_params()), settings.SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
 
-    # to add:
-    # quapy
-    # ensembles
-    #
-    # 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),
+    param_mod_sel={'sample_size':settings.SAMPLE_SIZE, 'n_prevpoints':21, 'n_repetitions':5}
+    yield 'epaccmaeptr', EPACC(newLR(), param_grid=lr_params, optim='mae', policy='ptr', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
+    # yield 'epaccmraeptr', EPACC(newLR(), param_grid=lr_params, optim='mrae', policy='ptr', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
+    # yield 'epaccmae', EPACC(newLR(), param_grid=lr_params, optim='mae', policy='mae', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
+    # yield 'epaccmrae', EPACC(newLR(), param_grid=lr_params, optim='mrae', policy='mrae', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
+
+    #yield 'mlpe', MaximumLikelihoodPrevalenceEstimation(), {}
 
 
 def evaluate_experiment(true_prevalences, estim_prevalences):
@@ -74,8 +68,8 @@ def evaluate_method_point_test(true_prev, estim_prev):
         print(f'\t{eval_measure.__name__}={err:.4f}')
 
 
-def result_path(dataset_name, model_name, optim_loss):
-    return os.path.join(args.results, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
+def result_path(path, dataset_name, model_name, optim_loss):
+    return os.path.join(path, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
 
 
 def is_already_computed(dataset_name, model_name, optim_loss):
@@ -83,11 +77,11 @@ def is_already_computed(dataset_name, model_name, optim_loss):
         check_datasets = ['semeval13', 'semeval14', 'semeval15']
     else:
         check_datasets = [dataset_name]
-    return all(os.path.exists(result_path(name, model_name, optim_loss)) for name in check_datasets)
+    return all(os.path.exists(result_path(args.results, name, model_name, optim_loss)) for name in check_datasets)
 
 
 def save_results(dataset_name, model_name, optim_loss, *results):
-    rpath = result_path(dataset_name, model_name, optim_loss)
+    rpath = result_path(args.results, dataset_name, model_name, optim_loss)
     qp.util.create_parent_dir(rpath)
     with open(rpath, 'wb') as foo:
         pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
@@ -95,14 +89,14 @@ def save_results(dataset_name, model_name, optim_loss, *results):
 
 def run(experiment):
 
-    qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
+    qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
 
     optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
 
     if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
         print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
         return
-    elif (optim_loss=='mae' and model_name=='svmmrae') or (optim_loss=='mrae' and model_name=='svmmae'):
+    elif (optim_loss=='mae' and 'mrae' in model_name) or (optim_loss=='mrae' and 'mae' in model_name):
         print(f'skipping model={model_name} for optim_loss={optim_loss}')
         return
     else:
@@ -112,19 +106,24 @@ def run(experiment):
     benchmark_devel.stats()
 
     # model selection (hyperparameter optimization for a quantification-oriented loss)
-    model_selection = qp.model_selection.GridSearchQ(
-        model,
-        param_grid=hyperparams,
-        sample_size=SAMPLE_SIZE,
-        n_prevpoints=21,
-        n_repetitions=5,
-        error=optim_loss,
-        refit=False,
-        timeout=60*60,
-        verbose=True
-    )
-    model_selection.fit(benchmark_devel.training, benchmark_devel.test)
-    model = model_selection.best_model()
+    if hyperparams is None:
+        model.fit(benchmark_devel.training, benchmark_devel.test)
+        best_params = {}
+    else:
+        model_selection = qp.model_selection.GridSearchQ(
+            model,
+            param_grid=hyperparams,
+            sample_size=settings.SAMPLE_SIZE,
+            n_prevpoints=21,
+            n_repetitions=5,
+            error=optim_loss,
+            refit=False,
+            timeout=60*60,
+            verbose=True
+        )
+        model_selection.fit(benchmark_devel.training, benchmark_devel.test)
+        model = model_selection.best_model()
+        best_params=model_selection.best_params_
 
     # model evaluation
     test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15']
@@ -137,7 +136,7 @@ def run(experiment):
         true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
             model,
             test=benchmark_eval.test,
-            sample_size=SAMPLE_SIZE,
+            sample_size=settings.SAMPLE_SIZE,
             n_prevpoints=21,
             n_repetitions=25
         )
@@ -149,15 +148,23 @@ def run(experiment):
         save_results(test_name, model_name, optim_loss,
                      true_prevalences, estim_prevalences,
                      benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,
-                     model_selection.best_params_)
+                     best_params)
 
 
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
+    parser.add_argument('results', metavar='RESULT_PATH', type=str,
+                        help='path to the directory where to store the results')
+    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
+                        help='path to the directory with svmperf')
+    parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
+                        help='path to the directory where to dump QuaNet checkpoints')
+    args = parser.parse_args()
 
     print(f'Result folder: {args.results}')
     np.random.seed(0)
 
-    optim_losses = ['mae', 'mrae']
+    optim_losses = ['mae']#['mae', 'mrae']
     datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
     models = quantification_models()
 
diff --git a/TweetSentQuant/gen_tables.py b/TweetSentQuant/gen_tables.py
index 141c58e..3637d64 100644
--- a/TweetSentQuant/gen_tables.py
+++ b/TweetSentQuant/gen_tables.py
@@ -3,6 +3,9 @@ import numpy as np
 from os import makedirs
 import sys, os
 import pickle
+import argparse
+
+import settings
 from experiments import result_path
 from tabular import Table
 
@@ -11,8 +14,7 @@ MAXTONE = 50  # sets the intensity of the maximum color reached by the worst (re
 
 makedirs(tables_path, exist_ok=True)
 
-sample_size = 100
-qp.environ['SAMPLE_SIZE'] = sample_size
+qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
 
 
 nice = {
@@ -95,14 +97,8 @@ def save_table(path, table):
         foo.write(table)
 
 
-datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
-evaluation_measures = [qp.error.ae, qp.error.rae]
-gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
-new_methods = ['hdy']
-
-
-def experiment_errors(dataset, method, loss):
-    path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
+def experiment_errors(path, dataset, method, loss):
+    path = result_path(path, dataset, method, 'm'+loss if not loss.startswith('m') else loss)
     if os.path.exists(path):
         true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb'))
         err_fn = getattr(qp.error, loss)
@@ -111,86 +107,110 @@ def experiment_errors(dataset, method, loss):
     return None
 
 
-gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate tables for Tweeter Sentiment Quantification')
+    parser.add_argument('results', metavar='RESULT_PATH', type=str,
+                        help='path to the directory where to store the results')
+    args = parser.parse_args()
 
-for i, eval_func in enumerate(evaluation_measures):
+    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
+    evaluation_measures = [qp.error.ae, qp.error.rae]
+    gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
+    new_methods = ['hdy']
 
-    # Tables evaluation scores for AE and RAE (two tables)
-    # ----------------------------------------------------
+    gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
 
-    eval_name = eval_func.__name__
-    added_methods = ['svmm' + eval_name] + new_methods
-    methods = gao_seb_methods + added_methods
-    nold_methods = len(gao_seb_methods)
-    nnew_methods = len(added_methods)
+    for i, eval_func in enumerate(evaluation_measures):
 
-    # fill data table
-    table = Table(rows=datasets, cols=methods)
-    for dataset in datasets:
+        # Tables evaluation scores for AE and RAE (two tables)
+        # ----------------------------------------------------
+
+        eval_name = eval_func.__name__
+        added_methods = ['svmm' + eval_name] + new_methods
+        methods = gao_seb_methods + added_methods
+        nold_methods = len(gao_seb_methods)
+        nnew_methods = len(added_methods)
+
+        # fill data table
+        table = Table(rows=datasets, cols=methods)
+        for dataset in datasets:
+            for method in methods:
+                table.add(dataset, method, experiment_errors(args.results, dataset, method, eval_name))
+
+        # write the latex table
+        # tabular = """
+        # \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods)+ '|' + ('Y|'*nnew_methods) + """} \hline
+        #   & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} &
+        #     \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
+        # """
+        tabular = """
+        \\resizebox{\\textwidth}{!}{%
+                \\begin{tabular}{|c||""" + ('c|' * nold_methods) + '|' + ('c|' * nnew_methods) + """} \hline
+                  & \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} & 
+                    \multicolumn{""" + str(nnew_methods) + """}{c|}{} \\\\ \hline
+                """
+        rowreplace={dataset: nice.get(dataset, dataset.upper()) for dataset in datasets}
+        colreplace={method:'\side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' for method in methods}
+
+        tabular += table.latexTabular(rowreplace=rowreplace, colreplace=colreplace)
+        tabular += """
+            \end{tabular}%
+            }
+        """
+
+        save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
+
+        # Tables ranks for AE and RAE (two tables)
+        # ----------------------------------------------------
+        methods = gao_seb_methods
+
+        # fill the data table
+        ranktable = Table(rows=datasets, cols=methods, missing='--')
+        for dataset in datasets:
+            for method in methods:
+                ranktable.add(dataset, method, values=table.get(dataset, method, 'rank'))
+
+        # write the latex table
+        tabular = """
+        \\resizebox{\\textwidth}{!}{%
+        \\begin{tabular}{|c||""" + ('c|' * len(gao_seb_methods)) + """} \hline
+              & \multicolumn{""" + str(nold_methods) + """}{c|}{Methods tested in~\cite{Gao:2016uq}}  \\\\ \hline
+        """
         for method in methods:
-            table.add(dataset, method, experiment_errors(dataset, method, eval_name))
+            tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
+        tabular += "\\\\\hline\n"
 
-    # write the latex table
-    tabular = """
-    \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods)+ '|' + ('Y|'*nnew_methods) + """} \hline
-      & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & 
-        \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
-    """
-    rowreplace={dataset: nice.get(dataset, dataset.upper()) for dataset in datasets}
-    colreplace={method:'\side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' for method in methods}
+        for dataset in datasets:
+            tabular += nice.get(dataset, dataset.upper()) + ' '
+            for method in methods:
+                newrank = ranktable.get(dataset, method)
+                oldrank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
+                if newrank != '--':
+                    newrank = f'{int(newrank)}'
+                color = ranktable.get_color(dataset, method)
+                if color == '--':
+                    color = ''
+                tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
+            tabular += '\\\\\hline\n'
+        tabular += '\hline\n'
 
-    tabular += table.latexTabular(rowreplace=rowreplace, colreplace=colreplace)
-    tabular += "\n\end{tabularx}"
-
-    save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
-
-    # Tables ranks for AE and RAE (two tables)
-    # ----------------------------------------------------
-    methods = gao_seb_methods
-
-    # fill the data table
-    ranktable = Table(rows=datasets, cols=methods, missing='--')
-    for dataset in datasets:
+        tabular += 'Average '
         for method in methods:
-            ranktable.add(dataset, method, values=table.get(dataset, method, 'rank'))
-
-    # write the latex table
-    tabular = """
-    \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline
-          & \multicolumn{""" + str(nold_methods) + """}{c|}{Methods tested in~\cite{Gao:2016uq}}  \\\\ \hline
-    """
-    for method in methods:
-        tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
-    tabular += '\\\\\hline\n'
-
-    for dataset in datasets:
-        tabular += nice.get(dataset, dataset.upper()) + ' '
-        for method in methods:
-            newrank = ranktable.get(dataset, method)
-            oldrank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
+            newrank = ranktable.get_average(method)
+            oldrank = gao_seb_ranks[f'Average-{method}-{eval_name}']
             if newrank != '--':
-                newrank = f'{int(newrank)}'
-            color = ranktable.get_color(dataset, method)
+                newrank = f'{newrank:.1f}'
+            oldrank = f'{oldrank:.1f}'
+            color = ranktable.get_average(method, 'color')
             if color == '--':
                 color = ''
             tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
         tabular += '\\\\\hline\n'
-    tabular += '\hline\n'
+        tabular += """
+        \end{tabular}%
+        }
+        """
 
-    tabular += 'Average '
-    for method in methods:
-        newrank = ranktable.get_average(method)
-        oldrank = gao_seb_ranks[f'Average-{method}-{eval_name}']
-        if newrank != '--':
-            newrank = f'{newrank:.1f}'
-        oldrank = f'{oldrank:.1f}'
-        color = ranktable.get_average(method, 'color')
-        if color == '--':
-            color = ''
-        tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
-    tabular += '\\\\\hline\n'
-    tabular += "\end{tabularx}"
+        save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
 
-    save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
-
-print("[Done]")
+    print("[Done]")
diff --git a/TweetSentQuant/settings.py b/TweetSentQuant/settings.py
index 1b006c6..6993c37 100644
--- a/TweetSentQuant/settings.py
+++ b/TweetSentQuant/settings.py
@@ -1,3 +1,7 @@
 import multiprocessing
 
-N_JOBS = -2  #multiprocessing.cpu_count()
+N_JOBS = 1  #multiprocessing.cpu_count()
+ENSEMBLE_N_JOBS = -2
+SAMPLE_SIZE = 100
+
+assert N_JOBS==1 or ENSEMBLE_N_JOBS==1, 'general N_JOBS and ENSEMBLE_N_JOBS should not be both greater than 1'
\ No newline at end of file
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 293b709..ca5beed 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -41,17 +41,17 @@ def artificial_sampling_prediction(
         indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
 
     if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier):
-        print('\tinstance of aggregative-quantifier')
+        # print('\tinstance of aggregative-quantifier')
         quantification_func = model.aggregate
         if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
-            print('\t\tinstance of probabilitstic-aggregative-quantifier')
+            # print('\t\tinstance of probabilitstic-aggregative-quantifier')
             preclassified_instances = model.posterior_probabilities(test.instances)
         else:
-            print('\t\tinstance of hard-aggregative-quantifier')
+            # print('\t\tinstance of hard-aggregative-quantifier')
             preclassified_instances = model.classify(test.instances)
         test = LabelledCollection(preclassified_instances, test.labels)
     else:
-        print('\t\tinstance of base-quantifier')
+        # print('\t\tinstance of base-quantifier')
         quantification_func = model.quantify
 
     def _predict_prevalences(index):
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index eb1661e..e6886cb 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -123,11 +123,11 @@ def training_helper(learner,
                 if not (0 < val_split < 1):
                     raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)')
                 train, unused = data.split_stratified(train_prop=1-val_split)
-            elif isinstance(val_split, LabelledCollection):
+            elif val_split.__class__.__name__ == LabelledCollection.__name__: #isinstance(val_split, LabelledCollection):
                 train = data
                 unused = val_split
             else:
-                raise ValueError('param "val_split" not understood; use either a float indicating the split '
+                raise ValueError(f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
                                  'proportion, or a LabelledCollection indicating the validation split')
         else:
             train, unused = data, None
@@ -495,7 +495,7 @@ class OneVsAll(AggregativeQuantifier):
         self.binary_quantifier = binary_quantifier
         self.n_jobs = n_jobs
 
-    def fit(self, data: LabelledCollection, fit_learner=True):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection]=None):
         assert not data.binary, \
             f'{self.__class__.__name__} expect non-binary data'
         assert isinstance(self.binary_quantifier, BaseQuantifier), \
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index e6a3de1..ab57d62 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -1,4 +1,6 @@
 from copy import deepcopy
+from typing import Union
+from tqdm import tqdm
 
 import numpy as np
 from joblib import Parallel, delayed
@@ -6,12 +8,13 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import GridSearchCV, cross_val_predict
 
 import quapy as qp
-from quapy import functional as F
 from quapy.data import LabelledCollection
+from quapy import functional as F
 from quapy.evaluation import evaluate
 from quapy.model_selection import GridSearchQ
 from . import neural
 from .base import BaseQuantifier
+from quapy.method.aggregative import CC, ACC, PCC, PACC, HDy, EMQ
 
 QuaNet = neural.QuaNetTrainer
 
@@ -31,7 +34,7 @@ class Ensemble(BaseQuantifier):
     Information Fusion, 45, 1-15.
     """
 
-    def __init__(self, quantifier: BaseQuantifier, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    def __init__(self, quantifier: BaseQuantifier, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, verbose=False):
         assert policy in Ensemble.VALID_POLICIES, f'unknown policy={policy}; valid are {Ensemble.VALID_POLICIES}'
         self.base_quantifier = quantifier
         self.size = size
@@ -40,8 +43,14 @@ class Ensemble(BaseQuantifier):
         self.policy = policy
         self.n_jobs = n_jobs
         self.post_proba_fn = None
+        self.verbose = verbose
 
-    def fit(self, data: LabelledCollection):
+    def sout(self, msg):
+        if self.verbose:
+            print('[Ensemble]' + msg)
+
+    def fit(self, data: qp.data.LabelledCollection, val_split:Union[qp.data.LabelledCollection, float]=None):
+        self.sout('Fit')
         if self.policy=='ds' and not data.binary:
             raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary')
 
@@ -57,14 +66,15 @@ class Ensemble(BaseQuantifier):
         is_static_policy = (self.policy in qp.error.QUANTIFICATION_ERROR_NAMES)
         self.ensemble = Parallel(n_jobs=self.n_jobs)(
             delayed(_delayed_new_instance)(
-                self.base_quantifier, data, prev, posteriors, keep_samples=is_static_policy
-            ) for prev in prevs
+                self.base_quantifier, data, val_split, prev, posteriors, keep_samples=is_static_policy, verbose=self.verbose
+            ) for prev in tqdm(prevs, desc='fitting ensamble')
         )
 
         # static selection policy (the name of a quantification-oriented error function to minimize)
         if self.policy in qp.error.QUANTIFICATION_ERROR_NAMES:
             self.accuracy_policy(error_name=self.policy)
 
+        self.sout('Fit [Done]')
         return self
 
     def quantify(self, instances):
@@ -82,8 +92,9 @@ class Ensemble(BaseQuantifier):
 
     def set_params(self, **parameters):
         raise NotImplementedError(f'{self.__class__.__name__} should not be used within GridSearchQ; '
-                                  f'instead, use GridSearchQ within Ensemble, or GridSearchCV whithin the '
-                                  f'base quantifier if it is an aggregative one.')
+                                  f'instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), '
+                                  f'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a learner '
+                                  f'l optimized for classification (not recommended).')
 
     def get_params(self, deep=True):
         raise NotImplementedError()
@@ -158,11 +169,13 @@ class Ensemble(BaseQuantifier):
 
     @property
     def aggregative(self):
-        raise NotImplementedError('aggregative functionality not yet supported for Ensemble')
+        return False
+        #raise NotImplementedError('aggregative functionality not yet supported for Ensemble')
 
     @property
     def probabilistic(self):
-        raise NotImplementedError('probabilistic functionality not yet supported for Ensemble')
+        return False
+        #raise NotImplementedError('probabilistic functionality not yet supported for Ensemble')
         #return self.base_quantifier.probabilistic
 
 
@@ -177,20 +190,32 @@ def select_k(elements, order, k):
     return [elements[idx] for idx in order[:k]]
 
 
-def _delayed_new_instance(base_quantifier, data:LabelledCollection, prev, posteriors, keep_samples):
+def _delayed_new_instance(base_quantifier,
+                          data: LabelledCollection,
+                          val_split: Union[LabelledCollection, float],
+                          prev,
+                          posteriors,
+                          keep_samples,
+                          verbose):
+    if verbose:
+        print(f'\tfit-start for prev {F.strprev(prev)}')
     model = deepcopy(base_quantifier)
     sample_index = data.sampling_index(len(data), *prev)
     sample = data.sampling_from_index(sample_index)
-    model.fit(sample)
+    if val_split is None:
+        model.fit(sample)
+    else:
+        if isinstance(val_split, float):
+            assert 0<val_split<1, 'val_split should be in (0,1)'
+            sample, val_split = sample.split_stratified(train_prop=1-val_split)
+        model.fit(sample, val_split=val_split)
     tr_prevalence = sample.prevalence()
     tr_distribution = get_probability_distribution(posteriors[sample_index]) if (posteriors is not None) else None
+    if verbose:
+        print(f'\t\--fit-ended for prev {F.strprev(prev)}')
     return (model, tr_prevalence, tr_distribution, sample if keep_samples else None)
 
 
-def _delayed_fit(quantifier, data):
-    quantifier.fit(data)
-
-
 def _delayed_quantify(quantifier, instances):
     return quantifier[0].quantify(instances)
 
@@ -219,7 +244,7 @@ def _draw_simplex(ndim, min_val, max_trials=100):
                              f'>= {min_val} is unlikely (it failed after {max_trials} trials)')
 
 
-def _instantiate_ensemble(learner, base_quantifier_class, param_grid, optim, sample_size, eval_budget, **kwargs):
+def _instantiate_ensemble(learner, base_quantifier_class, param_grid, optim, param_model_sel, **kwargs):
     if optim is None:
         base_quantifier = base_quantifier_class(learner)
     elif optim in qp.error.CLASSIFICATION_ERROR:
@@ -228,8 +253,7 @@ def _instantiate_ensemble(learner, base_quantifier_class, param_grid, optim, sam
     elif optim in qp.error.QUANTIFICATION_ERROR:
         base_quantifier = GridSearchQ(base_quantifier_class(learner),
                                       param_grid=param_grid,
-                                      sample_size=sample_size,
-                                      eval_budget=eval_budget,
+                                      **param_model_sel,
                                       error=optim)
     else:
         raise ValueError(f'value optim={optim} not understood')
@@ -237,74 +261,49 @@ def _instantiate_ensemble(learner, base_quantifier_class, param_grid, optim, sam
     return Ensemble(base_quantifier, **kwargs)
 
 
-class EnsembleFactory(BaseQuantifier):
+def _check_error(error):
+    if error is None:
+        return None
+    if error in qp.error.QUANTIFICATION_ERROR or error in qp.error.CLASSIFICATION_ERROR:
+        return error
+    elif isinstance(error, str):
+        assert error in qp.error.ERROR_NAMES, \
+            f'unknown error name; valid ones are {qp.error.ERROR_NAMES}'
+        return getattr(qp.error, error)
+    else:
+        raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
+                         f'the name of an error function in {qp.error.ERROR_NAMES}')
 
-    def __init__(self, learner, base_quantifier_class, param_grid=None, optim=None, sample_size=None, eval_budget=None,
+
+def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None,
+                 param_model_sel:dict=None,
                  size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
-        if param_grid is None and optim is not None:
-            raise ValueError(f'param_grid is None but optim was requested.')
-        error = self._check_error(optim)
-        self.model = _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, sample_size,
-                                           eval_budget, size=size, min_pos=min_pos, red_size=red_size,
-                                           policy=policy, n_jobs=n_jobs)
-
-    def fit(self, data):
-        return self.model.fit(data)
-
-    def quantify(self, instances):
-        return self.model.quantify(instances)
-
-    def set_params(self, **parameters):
-        return self.model.set_params(**parameters)
-
-    def get_params(self, deep=True):
-        return self.model.get_params(deep)
-
-    def _check_error(self, error):
-        if error is None:
-            return None
-        if error in qp.error.QUANTIFICATION_ERROR or error in qp.error.CLASSIFICATION_ERROR:
-            return error
-        elif isinstance(error, str):
-            assert error in qp.error.ERROR_NAMES, \
-                f'unknown error name; valid ones are {qp.error.ERROR_NAMES}'
-            return getattr(qp.error, error)
-        else:
-            raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
-                             f'the name of an error function in {qp.error.ERROR_NAMES}')
+        if optim is not None:
+            if param_grid is None:
+                raise ValueError(f'param_grid is None but optim was requested.')
+            if param_model_sel is None:
+                raise ValueError(f'param_model_sel is None but optim was requested.')
+        error = _check_error(optim)
+        return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel,
+                                     size=size, min_pos=min_pos, red_size=red_size,
+                                     policy=policy, n_jobs=n_jobs)
 
 
-class ECC(EnsembleFactory):
-    def __init__(self, learner, param_grid=None, optim=None, sample_size=None, eval_budget=None,
-                 size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
-        super().__init__(
-            learner, qp.method.aggregative.CC, param_grid, optim, sample_size, eval_budget, size, min_pos,
-            red_size, policy, n_jobs
-        )
+def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    return ensembleFactory(learner, CC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
 
 
-class EACC(EnsembleFactory):
-    def __init__(self, learner, param_grid=None, optim=None, sample_size=None, eval_budget=None,
-                 size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
-        super().__init__(
-            learner, qp.method.aggregative.ACC, param_grid, optim, sample_size, eval_budget, size, min_pos,
-            red_size, policy, n_jobs
-        )
+def EACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    return ensembleFactory(learner, ACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
 
 
-class EHDy(EnsembleFactory):
-    def __init__(self, learner, param_grid=None, optim=None, sample_size=None, eval_budget=None,
-                 size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
-        super().__init__(
-            learner, qp.method.aggregative.HDy, param_grid, optim, sample_size, eval_budget, size, min_pos,
-            red_size, policy, n_jobs
-        )
+def EPACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    return ensembleFactory(learner, PACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
 
 
-class EEMQ(EnsembleFactory):
-    def __init__(self, learner, param_grid=None, optim=None, sample_size=None, eval_budget=None,
-                 size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
-        super().__init__(
-            learner, qp.method.aggregative.EMQ, param_grid, optim, sample_size, eval_budget, size, min_pos,
-            red_size, policy, n_jobs
-        )
\ No newline at end of file
+def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    return ensembleFactory(learner, HDy, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
+
+
+def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
+    return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
\ No newline at end of file
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index ea11148..70b5042 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -278,6 +278,7 @@ class QuaNetModule(torch.nn.Module):
         # the shape should be (1, number-of-instances, embedding-size + 1)
         embeded_posteriors = embeded_posteriors.unsqueeze(0)
 
+        self.lstm.flatten_parameters()
         _, (rnn_hidden,_) = self.lstm(embeded_posteriors, self.init_hidden())
         rnn_hidden = rnn_hidden.view(self.nlayers, self.ndirections, -1, self.hidden_size)
         quant_embedding = rnn_hidden[0].view(-1)
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 06fb293..ecc3677 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -118,17 +118,18 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    def fit(self, training: LabelledCollection, validation: Union[LabelledCollection, float]=0.4):
+    def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float]=0.4):
         """
         :param training: the training set on which to optimize the hyperparameters
-        :param validation: either a LabelledCollection on which to test the performance of the different settings, or
+        :param val_split: either a LabelledCollection on which to test the performance of the different settings, or
         a float in [0,1] indicating the proportion of labelled data to extract from the training set
         """
-        training, validation = self.__check_training_validation(training, validation)
+        training, val_split = self.__check_training_validation(training, val_split)
+        assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
         self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes)
 
-        print(f'training size={len(training)}')
-        print(f'validation size={len(validation)}')
+        # print(f'training size={len(training)}')
+        # print(f'validation size={len(val_split)}')
         params_keys = list(self.param_grid.keys())
         params_values = list(self.param_grid.values())
 
@@ -146,7 +147,7 @@ class GridSearchQ(BaseQuantifier):
         self.best_score_ = None
         some_timeouts = False
         for values in itertools.product(*params_values):
-            params = {k: values[i] for i, k in enumerate(params_keys)}
+            params = dict({k: values[i] for i, k in enumerate(params_keys)})
 
             if self.timeout > 0:
                 signal.alarm(self.timeout)
@@ -156,8 +157,8 @@ class GridSearchQ(BaseQuantifier):
                 model.set_params(**params)
                 model.fit(training)
                 true_prevalences, estim_prevalences = artificial_sampling_prediction(
-                    model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
-                    verbose=True
+                    model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
+                    verbose=False
                 )
 
                 score = self.error(true_prevalences, estim_prevalences)
@@ -184,7 +185,7 @@ class GridSearchQ(BaseQuantifier):
 
         if self.refit:
             self.sout(f'refitting on the whole development set')
-            self.best_model_.fit(training + validation)
+            self.best_model_.fit(training + val_split)
 
         return self
 
diff --git a/test.py b/test.py
index 1d0dbb7..74bb454 100644
--- a/test.py
+++ b/test.py
@@ -17,7 +17,7 @@ from quapy.model_selection import GridSearchQ
 qp.environ['SAMPLE_SIZE'] = 500
 #param_grid = {'C': np.logspace(-3,3,7), 'class_weight': ['balanced', None]}
 param_grid = {'C': np.logspace(0,3,4), 'class_weight': ['balanced']}
-max_evaluations = 5000
+max_evaluations = 500
 
 sample_size = qp.environ['SAMPLE_SIZE']
 binary = False
@@ -29,7 +29,7 @@ if binary:
 
 else:
     dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
-    #dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
+    dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
 
 print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
 
@@ -52,8 +52,14 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes
 
 #learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
 learner = LogisticRegression(max_iter=1000)
-model = qp.method.aggregative.ClassifyAndCount(learner)
-#model = qp.method.meta.ECC(learner, size=20, red_size=10, param_grid=None, optim=None, policy='ds')
+# model = qp.method.aggregative.ClassifyAndCount(learner)
+
+
+model = qp.method.meta.EPACC(learner, size=10, red_size=5,
+                             param_grid={'C':[1,10,100]},
+                             optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
+                             policy='ptr', n_jobs=1)
+
 #model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
 #                           sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
 #model = qp.method.aggregative.ClassifyAndCount(learner)
@@ -69,10 +75,10 @@ if qp.isbinary(model) and not qp.isbinary(dataset):
 print(f'fitting model {model.__class__.__name__}')
 #train, val = dataset.training.split_stratified(0.6)
 #model.fit(train, val_split=val)
-model.fit(dataset.training)
-#for i,e in enumerate(model.ensemble):
-    #print(i, e.learner.best_estimator_)
-#    print(i, e.best_model_.learner)
+model.fit(dataset.training, val_split=dataset.test)
+
+
+
 
 
 # estimating class prevalences
@@ -106,7 +112,7 @@ for error in qp.error.QUANTIFICATION_ERROR:
     score = error(true_prev, estim_prev)
     print(f'{error.__name__}={score:.5f}')
 
-
+sys.exit(0)
 # Model selection and Evaluation according to the artificial sampling protocol
 # ----------------------------------------------------------------------------
 
@@ -119,7 +125,7 @@ model_selection = GridSearchQ(model,
                               verbose=True,
                               timeout=4)
 
-model = model_selection.fit(dataset.training, validation=0.3)
+model = model_selection.fit(dataset.training, val_split=0.3)
 #model = model_selection.fit(train, validation=val)
 print(f'Model selection: best_params = {model_selection.best_params_}')
 print(f'param scores:')