From 3aaf57f2f3981379c114eed691e6b027e1c0cdbc Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 28 Jan 2021 18:22:43 +0100
Subject: [PATCH] =?UTF-8?q?all=20uci=20datasets=20from=20P=C3=A9rez-G?=
 =?UTF-8?q?=C3=A1llego=20added,=20quantification=20report=20added?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 TweetSentQuant/evaluate_results.py |   9 +-
 TweetSentQuant/experiments.py      |  10 +--
 quapy/data/base.py                 |  13 ++-
 quapy/data/datasets.py             | 133 +++++++++++++++++++++--------
 quapy/data/reader.py               |   3 +-
 quapy/evaluation.py                |  63 ++++++++++++--
 quapy/method/meta.py               |  25 ++----
 test.py                            |  39 ++++++++-
 8 files changed, 222 insertions(+), 73 deletions(-)

diff --git a/TweetSentQuant/evaluate_results.py b/TweetSentQuant/evaluate_results.py
index a8aba9d..2b8a4d0 100644
--- a/TweetSentQuant/evaluate_results.py
+++ b/TweetSentQuant/evaluate_results.py
@@ -1,3 +1,4 @@
+import numpy as np
 import quapy as qp
 import settings
 import os
@@ -11,8 +12,10 @@ qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
 resultdir = './results'
 methods = ['*']
 
+
 def evaluate_results(methods, datasets, error_name):
     results_str = []
+    all = []
     error = qp.error.from_name(error_name)
     for method, dataset in itertools.product(methods, datasets):
         for experiment in glob(f'{resultdir}/{dataset}-{method}-{error_name}.pkl'):
@@ -21,8 +24,12 @@ def evaluate_results(methods, datasets, error_name):
             result = error(true_prevalences, estim_prevalences)
             string = f'{pathlib.Path(experiment).name}: {result:.3f}'
             results_str.append(string)
+            all.append(result)
     results_str = sorted(results_str)
     for r in results_str:
         print(r)
+    print()
+    print(f'Ave: {np.mean(all):.3f}')
 
-evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')
\ No newline at end of file
+
+evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')
diff --git a/TweetSentQuant/experiments.py b/TweetSentQuant/experiments.py
index 7e3f0e9..3f3c2d7 100644
--- a/TweetSentQuant/experiments.py
+++ b/TweetSentQuant/experiments.py
@@ -58,7 +58,7 @@ def quantification_ensembles():
         'verbose': False
     }
     common={
-        'max_sample_size': 500,
+        'max_sample_size': 1000,
         'n_jobs': settings.ENSEMBLE_N_JOBS,
         'param_grid': lr_params,
         'param_mod_sel': param_mod_sel,
@@ -69,13 +69,13 @@ def quantification_ensembles():
     # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
     # will be skipped (by setting hyperparameters to None)
     hyper_none = None
-    yield 'epaccmaeptr', EPACC(newLR(), optim='mae', policy='ptr', **common), hyper_none
-    yield 'epaccmaemae', EPACC(newLR(), optim='mae', policy='mae', **common), hyper_none
+    #yield 'epaccmaeptr', EPACC(newLR(), optim='mae', policy='ptr', **common), hyper_none
+    yield 'epaccmaemae1k', EPACC(newLR(), optim='mae', policy='mae', **common), hyper_none
     # yield 'esldmaeptr', EEMQ(newLR(), optim='mae', policy='ptr', **common), hyper_none
     # yield 'esldmaemae', EEMQ(newLR(), optim='mae', policy='mae', **common), hyper_none
 
-    yield 'epaccmraeptr', EPACC(newLR(), optim='mrae', policy='ptr', **common), hyper_none
-    yield 'epaccmraemrae', EPACC(newLR(), optim='mrae', policy='mrae', **common), hyper_none
+    #yield 'epaccmraeptr', EPACC(newLR(), optim='mrae', policy='ptr', **common), hyper_none
+    #yield 'epaccmraemrae', EPACC(newLR(), optim='mrae', policy='mrae', **common), hyper_none
     #yield 'esldmraeptr', EEMQ(newLR(), optim='mrae', policy='ptr', **common), hyper_none
     #yield 'esldmraemrae', EEMQ(newLR(), optim='mrae', policy='mrae', **common), hyper_none
 
diff --git a/quapy/data/base.py b/quapy/data/base.py
index 0fed0d7..ffa1e33 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -1,7 +1,7 @@
 import numpy as np
 from scipy.sparse import issparse
 from scipy.sparse import vstack
-from sklearn.model_selection import train_test_split
+from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
 from quapy.functional import artificial_prevalence_sampling, strprev
 
 
@@ -151,6 +151,12 @@ class LabelledCollection:
                   f'#classes={stats_["classes"]}, prevs={stats_["prevs"]}')
         return stats_
 
+    def kFCV(self, nfolds=5, nrepeats=1, random_state=0):
+        kf = RepeatedStratifiedKFold(n_splits=nfolds, n_repeats=nrepeats, random_state=random_state)
+        for train_index, test_index in kf.split(*self.Xy):
+            train = self.sampling_from_index(train_index)
+            test  = self.sampling_from_index(test_index)
+            yield train, test
 
 class Dataset:
 
@@ -190,6 +196,11 @@ class Dataset:
               f'type={tr_stats["type"]}, #features={tr_stats["features"]}, #classes={tr_stats["classes"]}, '
               f'tr-prevs={tr_stats["prevs"]}, te-prevs={te_stats["prevs"]}')
 
+    @classmethod
+    def kFCV(cls, data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0):
+        for i, (train, test) in enumerate(data.kFCV(nfolds=nfolds, nrepeats=nrepeats, random_state=random_state)):
+            yield Dataset(train, test, name=f'fold {(i%nfolds)+1}/{nfolds} (round={(i//nfolds)+1})')
+
 
 def isbinary(data):
     if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 00c4d7d..15a3921 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -1,7 +1,12 @@
+def warn(*args, **kwargs):
+    pass
+import warnings
+warnings.warn = warn
 import os
 import zipfile
 from os.path import join
 from urllib.error import HTTPError
+from sklearn.model_selection import StratifiedKFold
 
 import pandas as pd
 
@@ -17,6 +22,29 @@ TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',
 TWITTER_SENTIMENT_DATASETS_TRAIN = ['gasp', 'hcr', 'omd', 'sanders',
                                  'semeval', 'semeval16',
                                  'sst', 'wa', 'wb']
+UCI_DATASETS = ['acute.a', 'acute.b',
+                'balance.1', 'balance.2', 'balance.3',
+                'breast-cancer',
+                'cmc.1', 'cmc.2', 'cmc.3',
+                'ctg.1', 'ctg.2', 'ctg.3',
+                #'diabetes', # <-- I haven't found this one...
+                'german',
+                'haberman',
+                'ionosphere',
+                'iris.1', 'iris.2', 'iris.3',
+                'mammographic',
+                'pageblocks.5',
+                #'phoneme', # <-- I haven't found this one...
+                'semeion',
+                'sonar',
+                'spambase',
+                'spectf',
+                'tictactoe',
+                'transfusion',
+                'wdbc',
+                'wine.1', 'wine.2', 'wine.3',
+                'wine-q-red', 'wine-q-white',
+                'yeast']
 
 
 def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False):
@@ -134,27 +162,12 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
     return data
 
 
-UCI_DATASETS = ['acute.a', 'acute.b',
-                'balance.1', 'balance.2', 'balance.3',
-                'breast-cancer',
-                'cmc.1', 'cmc.2', 'cmc.3',
-                'ctg.1', 'ctg.2', 'ctg.3',
-                #'diabetes', # <-- I haven't found this one...
-                'german',
-                'haberman',
-                'ionosphere',
-                'iris.1', 'iris.2', 'iris.3',
-                'mammographic',
-                'pageblocks.5',
-                #'phoneme', # <-- I haven't found this one...
-                'semeion',
-                'sonar',
-                'spambase',
-                'spectf',
-                'tictactoe',
-                'transfusion'] # ongoing...
+def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False):
+    data = fetch_UCILabelledCollection(dataset_name, data_home, verbose)
+    return Dataset(*data.split_stratified(1 - test_split, random_state=0))
 
-def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3):
+
+def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False):
 
     assert dataset_name in UCI_DATASETS, \
         f'Name {dataset_name} does not match any known dataset from the UCI Machine Learning datasets repository. ' \
@@ -188,7 +201,14 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
         'spambase': 'Spambase Data Set',
         'spectf': 'SPECTF Heart Data',
         'tictactoe': 'Tic-Tac-Toe Endgame Database',
-        'transfusion': 'Blood Transfusion Service Center Data Set '
+        'transfusion': 'Blood Transfusion Service Center Data Set',
+        'wdbc': 'Wisconsin Diagnostic Breast Cancer',
+        'wine.1': 'Wine Recognition Data (1)',
+        'wine.2': 'Wine Recognition Data (2)',
+        'wine.3': 'Wine Recognition Data (3)',
+        'wine-q-red': 'Wine Quality Red (6-10)',
+        'wine-q-white': 'Wine Quality White (6-10)',
+        'yeast': 'Yeast',
     }
 
     # the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use
@@ -219,7 +239,14 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
         'spambase': 'spambase',
         'spectf': 'spect',
         'tictactoe': 'tic-tac-toe',
-        'transfusion': 'blood-transfusion'
+        'transfusion': 'blood-transfusion',
+        'wdbc': 'breast-cancer-wisconsin',
+        'wine-q-red': 'wine-quality',
+        'wine-q-white': 'wine-quality',
+        'wine.1': 'wine',
+        'wine.2': 'wine',
+        'wine.3': 'wine',
+        'yeast': 'yeast',
     }
 
     # the filename is the name of the file within the data_folder indexed by the identifier
@@ -231,7 +258,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
         'page-blocks': 'page-blocks.data.Z',
         'undocumented/connectionist-bench/sonar': 'sonar.all-data',
         'spect': ['SPECTF.train', 'SPECTF.test'],
-        'blood-transfusion': 'transfusion.data'
+        'blood-transfusion': 'transfusion.data',
+        'wine-quality': ['winequality-red.csv', 'winequality-white.csv'],
+        'breast-cancer-wisconsin': 'breast-cancer-wisconsin.data' if dataset_name=='breast-cancer' else 'wdbc.data'
     }
 
     # the filename containing the dataset description (if any)
@@ -242,7 +271,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
         'mammographic-masses': 'mammographic_masses.names',
         'undocumented/connectionist-bench/sonar': 'sonar.names',
         'spect': 'SPECTF.names',
-        'blood-transfusion': 'transfusion.names'
+        'blood-transfusion': 'transfusion.names',
+        'wine-quality': 'winequality.names',
+        'breast-cancer-wisconsin': 'breast-cancer-wisconsin.names' if dataset_name == 'breast-cancer' else 'wdbc.names'
     }
 
     identifier = identifier_map[dataset_name]
@@ -269,16 +300,15 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
     print(f'Loading {dataset_name} ({fullname})')
     if identifier == 'acute':
         df = pd.read_csv(data_path, header=None, encoding='utf-16', sep='\t')
+
+        df[0] = df[0].apply(lambda x: float(x.replace(',', '.'))).astype(float, copy=False)
+        [df_replace(df, col) for col in range(1, 6)]
+        X = df.loc[:, 0:5].values
         if dataset_name == 'acute.a':
             y = binarize(df[6], pos_class='yes')
         elif dataset_name == 'acute.b':
             y = binarize(df[7], pos_class='yes')
 
-        mintemp, maxtemp = 35, 42
-        df[0] = df[0].apply(lambda x:(float(x.replace(',','.'))-mintemp)/(maxtemp-mintemp)).astype(float, copy=False)
-        [df_replace(df, col) for col in range(1, 6)]
-        X = df.loc[:, 0:5].values
-
     if identifier == 'balance-scale':
         df = pd.read_csv(data_path, header=None, sep=',')
         if dataset_name == 'balance.1':
@@ -289,14 +319,20 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
             y = binarize(df[0], pos_class='R')
         X = df.loc[:, 1:].astype(float).values
 
-    if identifier == 'breast-cancer-wisconsin':
+    if identifier == 'breast-cancer-wisconsin' and dataset_name=='breast-cancer':
         df = pd.read_csv(data_path, header=None, sep=',')
         Xy = df.loc[:, 1:10]
         Xy[Xy=='?']=np.nan
         Xy = Xy.dropna(axis=0)
         X = Xy.loc[:, 1:9]
         X = X.astype(float).values
-        y = binarize(Xy[10], pos_class=4)
+        y = binarize(Xy[10], pos_class=2)
+
+    if identifier == 'breast-cancer-wisconsin' and dataset_name=='wdbc':
+        df = pd.read_csv(data_path, header=None, sep=',')
+        X = df.loc[:, 2:32].astype(float).values
+        y = df[1].values
+        y = binarize(y, pos_class='M')
 
     if identifier == 'cmc':
         df = pd.read_csv(data_path, header=None, sep=',')
@@ -356,8 +392,8 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
 
     if identifier == 'mammographic-masses':
         df = pd.read_csv(data_path, header=None, sep=',')
-        Xy[df == '?'] = np.nan
-        Xy = Xy.dropna(axis=0)
+        df[df == '?'] = np.nan
+        Xy = df.dropna(axis=0)
         X = Xy.iloc[:, 0:5]
         X = X.astype(float).values
         y = binarize(Xy.iloc[:,5], pos_class=1)
@@ -395,9 +431,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
 
     if identifier == 'spect':
         dfs = []
-        for file in  filename:
+        for file in filename:
             data_path = join(data_dir, file)
-            download_file_if_not_exists(f'{URL}/{filename}', data_path)
+            download_file_if_not_exists(f'{URL}/{file}', data_path)
             dfs.append(pd.read_csv(data_path, header=None, sep=','))
         df = pd.concat(dfs)
         X = df.iloc[:, 1:45].astype(float).values
@@ -416,9 +452,34 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
         y = df.iloc[:, 4].values
         y = binarize(y, pos_class=1)
 
+    if identifier == 'wine':
+        df = pd.read_csv(data_path, header=None, sep=',')
+        X = df.iloc[:, 1:14].astype(float).values
+        y = df[0].values
+        if dataset_name == 'wine.1':
+            y = binarize(y, pos_class=1)
+        elif dataset_name == 'wine.2':
+            y = binarize(y, pos_class=2)
+        elif dataset_name == 'wine.3':
+            y = binarize(y, pos_class=3)
+
+    if identifier == 'wine-quality':
+        filename = filename[0] if dataset_name=='wine-q-red' else filename[1]
+        data_path = join(data_dir, filename)
+        download_file_if_not_exists(f'{URL}/{filename}', data_path)
+        df = pd.read_csv(data_path, sep=';')
+        X = df.iloc[:, 0:11].astype(float).values
+        y = df.iloc[:, 11].values > 5
+
+    if identifier == 'yeast':
+        df = pd.read_csv(data_path, header=None, delim_whitespace=True)
+        X = df.iloc[:, 1:9].astype(float).values
+        y = df.iloc[:, 9].values
+        y = binarize(y, pos_class='NUC')
+
     data = LabelledCollection(X, y)
     data.stats()
-    return Dataset(*data.split_stratified(1-test_split, random_state=0))
+    return data
 
 
 def df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float):
diff --git a/quapy/data/reader.py b/quapy/data/reader.py
index f7e45f4..743b99e 100644
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@@ -93,4 +93,5 @@ def binarize(y, pos_class):
     y = np.asarray(y)
     ybin = np.zeros(y.shape, dtype=np.int)
     ybin[y == pos_class] = 1
-    return ybin
\ No newline at end of file
+    return ybin
+
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 02e1c1c..5f5205c 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -9,7 +9,7 @@ from quapy.data import LabelledCollection
 from quapy.method.base import BaseQuantifier
 from quapy.util import temp_seed
 import quapy.functional as F
-
+import pandas as pd
 
 def artificial_sampling_prediction(
         model: BaseQuantifier,
@@ -62,9 +62,6 @@ def artificial_sampling_prediction(
 
     pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes
     results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
-    # results = Parallel(n_jobs=n_jobs)(
-    #     delayed(_predict_prevalences)(index) for index in pbar
-    # )
 
     true_prevalences, estim_prevalences = zip(*results)
     true_prevalences = np.asarray(true_prevalences)
@@ -73,13 +70,65 @@ def artificial_sampling_prediction(
     return true_prevalences, estim_prevalences
 
 
+def artificial_sampling_report(
+        model: BaseQuantifier,
+        test: LabelledCollection,
+        sample_size,
+        n_prevpoints=210,
+        n_repetitions=1,
+        n_jobs=1,
+        random_seed=42,
+        error_metrics:Iterable[Union[str,Callable]]='mae',
+        verbose=True):
+
+    if isinstance(error_metrics, str):
+        error_metrics=[error_metrics]
+
+    error_names = [e if isinstance(e, str) else e.__name__ for e in error_metrics]
+    error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics]
+    assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions'
+
+    df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
+    true_prevs, estim_prevs = artificial_sampling_prediction(
+        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
+    )
+    for true_prev, estim_prev in zip(true_prevs, estim_prevs):
+        series = {'true-prev': true_prev, 'estim-prev': estim_prev}
+        for error_name, error_metric in zip(error_names, error_funcs):
+            score = error_metric(true_prev, estim_prev)
+            series[error_name] = score
+        df = df.append(series, ignore_index=True)
+
+    return df
+
+
+def artificial_sampling_eval(
+        model: BaseQuantifier,
+        test: LabelledCollection,
+        sample_size,
+        n_prevpoints=210,
+        n_repetitions=1,
+        n_jobs=1,
+        random_seed=42,
+        error_metric:Union[str,Callable]='mae',
+        verbose=True):
+
+    if isinstance(error_metric, str):
+        error_metric = qp.error.from_name(error_metric)
+
+    assert hasattr(error_metric, '__call__'), 'invalid error function'
+
+    true_prevs, estim_prevs = artificial_sampling_prediction(
+        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
+    )
+
+    return error_metric(true_prevs, estim_prevs)
+
+
 def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
     if isinstance(err, str):
         err = qp.error.from_name(err)
     scores = qp.util.parallel(_delayed_eval, ((model, Ti, err) for Ti in test_samples), n_jobs=n_jobs)
-    # scores = Parallel(n_jobs=n_jobs)(
-    #     delayed(_delayed_eval)(model, Ti, err) for Ti in test_samples
-    # )
     return np.mean(scores)
 
 
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index 8849394..5088e39 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -38,7 +38,7 @@ class Ensemble(BaseQuantifier):
                  quantifier: BaseQuantifier,
                  size=50,
                  red_size=25,
-                 min_pos=1,
+                 min_pos=5,
                  policy='ave',
                  max_sample_size=None,
                  val_split=None,
@@ -88,15 +88,8 @@ class Ensemble(BaseQuantifier):
         )
         self.ensemble = qp.util.parallel(
             _delayed_new_instance,
-            tqdm(args, desc='fitting ensamble', total=self.size),
+            tqdm(args, desc='fitting ensamble', total=self.size) if self.verbose else args,
             n_jobs=self.n_jobs)
-        # self.ensemble = Parallel(n_jobs=self.n_jobs)(
-        #     delayed(_delayed_new_instance)(
-        #         self.base_quantifier, data, val_split, prev, posteriors, keep_samples=is_static_policy,
-        #         verbose=self.verbose, sample_size=sample_size
-        #     ) for prev in tqdm(prevs, desc='fitting ensamble')
-        # )
-
 
         # static selection policy (the name of a quantification-oriented error function to minimize)
         if self.policy in qp.error.QUANTIFICATION_ERROR_NAMES:
@@ -109,9 +102,6 @@ class Ensemble(BaseQuantifier):
         predictions = np.asarray(
             qp.util.parallel(_delayed_quantify, ((Qi, instances) for Qi in self.ensemble), n_jobs=self.n_jobs)
         )
-        # predictions = np.asarray(Parallel(n_jobs=self.n_jobs)(
-        #     delayed(_delayed_quantify)(Qi, instances) for Qi in self.ensemble
-        # ))
 
         if self.policy == 'ptr':
             predictions = self.ptr_policy(predictions)
@@ -143,7 +133,7 @@ class Ensemble(BaseQuantifier):
             scores.append(evaluate(model[0], tests[:i] + tests[i+1:], error, self.n_jobs))
         order = np.argsort(scores)
 
-        self.ensemble = select_k(self.ensemble, order, k=self.red_size)
+        self.ensemble = _select_k(self.ensemble, order, k=self.red_size)
 
     def ptr_policy(self, predictions):
         """
@@ -154,7 +144,7 @@ class Ensemble(BaseQuantifier):
         tr_prevs = [m[1] for m in self.ensemble]
         ptr_differences = [qp.error.mse(ptr_i, test_prev_estim) for ptr_i in tr_prevs]
         order = np.argsort(ptr_differences)
-        return select_k(predictions, order, k=self.red_size)
+        return _select_k(predictions, order, k=self.red_size)
 
     def ds_policy_get_posteriors(self, data: LabelledCollection):
         """
@@ -192,7 +182,7 @@ class Ensemble(BaseQuantifier):
         tr_distributions = [m[2] for m in self.ensemble]
         dist = [F.HellingerDistance(tr_dist_i, test_distribution) for tr_dist_i in tr_distributions]
         order = np.argsort(dist)
-        return select_k(predictions, order, k=self.red_size)
+        return _select_k(predictions, order, k=self.red_size)
 
     @property
     def binary(self):
@@ -201,13 +191,10 @@ class Ensemble(BaseQuantifier):
     @property
     def aggregative(self):
         return False
-        #raise NotImplementedError('aggregative functionality not yet supported for Ensemble')
 
     @property
     def probabilistic(self):
         return False
-        #raise NotImplementedError('probabilistic functionality not yet supported for Ensemble')
-        #return self.base_quantifier.probabilistic
 
 
 def get_probability_distribution(posterior_probabilities, bins=8):
@@ -217,7 +204,7 @@ def get_probability_distribution(posterior_probabilities, bins=8):
     return distribution
 
 
-def select_k(elements, order, k):
+def _select_k(elements, order, k):
     return [elements[idx] for idx in order[:k]]
 
 
diff --git a/test.py b/test.py
index b301c54..b7b75f3 100644
--- a/test.py
+++ b/test.py
@@ -8,15 +8,48 @@ import numpy as np
 
 from NewMethods.methods import AveragePoolQuantification
 from classification.methods import PCALR
-from classification.neural import NeuralClassifierTrainer, CNNnet
+from data import Dataset
 from method.meta import EPACC
 from quapy.model_selection import GridSearchQ
+from tqdm import tqdm
+import pandas as pd
+
+sample_size=100
+qp.environ['SAMPLE_SIZE'] = sample_size
+
+np.random.seed(0)
+
+nfolds=5
+nrepeats=1
+
+df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
+for datasetname in qp.datasets.UCI_DATASETS[2:]:
+    collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
+    scores = []
+    pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)
+    for data in pbar:
+        pbar.set_description(f'{data.name}')
+        # learner = GridSearchCV(LogisticRegression(class_weight='balanced'), param_grid={'C': np.logspace(-3,3,7)}, n_jobs=-1)
+        learner = LogisticRegression(class_weight='balanced')
+        # model = qp.method.aggregative.CC(learner)
+        model = qp.method.meta.EHDy(learner, size=30, red_size=15, verbose=False)
+        model.fit(data.training)
+        err = qp.evaluation.artificial_sampling_eval(model, data.test, sample_size, n_prevpoints=101, n_jobs=-1,
+                                                     error_metric='mse', verbose=False)
+        scores.append(err)
+
+    score = np.mean(scores)
+    df = df.append({
+        'dataset': datasetname,
+        'method': model.__class__.__name__,
+        'mse': score
+    }, ignore_index=True)
+    print(df)
 
-dataset = qp.datasets.fetch_UCIDataset('transfusion', verbose=True)
 sys.exit(0)
 
 
-qp.environ['SAMPLE_SIZE'] = 500
+
 #param_grid = {'C': np.logspace(-3,3,7), 'class_weight': ['balanced', None]}
 param_grid = {'C': np.logspace(0,3,4), 'class_weight': ['balanced']}
 max_evaluations = 500