diff --git a/TODO.txt b/TODO.txt
index cee90fa..7837c70 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,11 +1,8 @@
 Documentation with sphinx
-Add evaluation - artificial sampling
 Add quantification_report (akin to classification_report from sklearn)
 Add optimization - artificial sampling
-Add prediction - artificial sampling
-Add readers for typical datasets used in Quantification
 Add NAE, NRAE
 Add "measures for evaluating ordinal"?
 Document methods with paper references
-The parallel training in svmperf seems not to work
+The parallel training in svmperf seems not to work (not sure...)
 
diff --git a/quapy/data/__init__.py b/quapy/data/__init__.py
index e44efa4..9c119ab 100644
--- a/quapy/data/__init__.py
+++ b/quapy/data/__init__.py
@@ -1,5 +1,6 @@
 from .base import *
 from .reader import *
 from . import preprocessing
+from . import datasets
 
 
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
new file mode 100644
index 0000000..2c25de9
--- /dev/null
+++ b/quapy/data/datasets.py
@@ -0,0 +1,83 @@
+import zipfile
+from utils.util import download_file_if_not_exists, download_file, get_quapy_home
+import os
+from os.path import join
+from data.base import Dataset, LabelledCollection
+from data.reader import from_text, from_sparse
+from data.preprocessing import text2tfidf, reduce_columns
+
+
+REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
+TWITTER_SENTIMENT_DATASETS = ['gasp', 'hcr', 'omd', 'sanders', 'semeval13', 'semeval14', 'semeval15', 'semeval16',
+                              'sst', 'wa', 'wb']
+
+
+def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None):
+    assert dataset_name in REVIEWS_SENTIMENT_DATASETS, \
+        f'Name {dataset_name} does not match any known dataset for sentiment reviews. ' \
+        f'Valid ones are {REVIEWS_SENTIMENT_DATASETS}'
+    if data_home is None:
+        data_home = get_quapy_home()
+
+    URL_TRAIN = f'https://zenodo.org/record/4117827/files/{dataset_name}_train.txt'
+    URL_TEST = f'https://zenodo.org/record/4117827/files/{dataset_name}_test.txt'
+    os.makedirs(join(data_home, 'reviews'), exist_ok=True)
+    train_path = join(data_home, 'reviews', dataset_name, 'train.txt')
+    test_path = join(data_home, 'reviews', dataset_name, 'test.txt')
+    download_file_if_not_exists(URL_TRAIN, train_path)
+    download_file_if_not_exists(URL_TEST, test_path)
+
+    data = Dataset.load(train_path, test_path, from_text)
+
+    if tfidf:
+        text2tfidf(data, inplace=True)
+
+    if min_df is not None:
+        reduce_columns(data, min_df=min_df, inplace=True)
+
+    return data
+
+
+def fetch_twitter(dataset_name, model_selection=False, min_df=None, data_home=None):
+    assert dataset_name in TWITTER_SENTIMENT_DATASETS, \
+        f'Name {dataset_name} does not match any known dataset for sentiment twitter. ' \
+        f'Valid ones are {TWITTER_SENTIMENT_DATASETS}'
+    if data_home is None:
+        data_home = get_quapy_home()
+
+    URL = 'https://zenodo.org/record/4255764/files/tweet_sentiment_quantification_snam.zip'
+    unzipped_path = join(data_home, 'tweet_sentiment_quantification_snam')
+    if not os.path.exists(unzipped_path):
+        downloaded_path = join(data_home, 'tweet_sentiment_quantification_snam.zip')
+        download_file(URL, downloaded_path)
+        with zipfile.ZipFile(downloaded_path) as file:
+            file.extractall(data_home)
+        os.remove(downloaded_path)
+
+    if dataset_name in {'semeval13', 'semeval14', 'semeval15'}:
+        trainset_name = 'semeval'
+        testset_name  = 'semeval' if model_selection else dataset_name
+        print(f"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common "
+              f"(called 'semeval'); returning trainin-set='{trainset_name}' and test-set={testset_name}")
+    else:
+        trainset_name = testset_name = dataset_name
+
+    if model_selection:
+        train = join(unzipped_path, 'train', f'{trainset_name}.train.feature.txt')
+        test  = join(unzipped_path, 'test', f'{testset_name}.dev.feature.txt')
+    else:
+        train = join(unzipped_path, 'train', f'{trainset_name}.train+dev.feature.txt')
+        if dataset_name == 'semeval16':
+            test = join(unzipped_path, 'test', f'{testset_name}.dev-test.feature.txt')
+        else:
+            test = join(unzipped_path, 'test', f'{testset_name}.test.feature.txt')
+
+    data = Dataset.load(train, test, from_sparse)
+
+    if min_df is not None:
+        reduce_columns(data, min_df=min_df, inplace=True)
+
+    return data
+
+
+
diff --git a/quapy/data/reader.py b/quapy/data/reader.py
index e160d15..84550c6 100644
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@@ -54,3 +54,4 @@ def from_sparse(path):
     X = X.tocsr()
     y = np.asarray(all_labels) + 1
     return X, y
+
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 92c77c2..106eb11 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -1,4 +1,5 @@
 from data import LabelledCollection
+from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier
 from method.base import BaseQuantifier
 from utils.util import temp_seed
 import numpy as np
@@ -10,8 +11,8 @@ def artificial_sampling_prediction(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
-        prevalence_points=21,
-        point_repetitions=1,
+        n_prevpoints=210,
+        n_repetitions=1,
         n_jobs=-1,
         random_seed=42):
     """
@@ -19,27 +20,40 @@ def artificial_sampling_prediction(
     :param model: the model in charge of generating the class prevalence estimations
     :param test: the test set on which to perform arificial sampling
     :param sample_size: the size of the samples
-    :param prevalence_points: the number of different prevalences to sample
-    :param point_repetitions: the number of repetitions for each prevalence
+    :param n_prevpoints: the number of different prevalences to sample
+    :param n_repetitions: the number of repetitions for each prevalence
     :param n_jobs: number of jobs to be run in parallel
     :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
     any other random process.
-    :return: two ndarrays of [m,n] with m the number of samples (prevalence_points*point_repetitions) and n the
+    :return: two ndarrays of [m,n] with m the number of samples (n_prevpoints*n_repetitions) and n the
      number of classes. The first one contains the true prevalences for the samples generated while the second one
      containing the the prevalences estimations
     """
 
     with temp_seed(random_seed):
-        indexes = list(test.artificial_sampling_index_generator(sample_size, prevalence_points, point_repetitions))
+        indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
+
+    if isinstance(model, AggregativeQuantifier):
+        quantification_func = model.aggregate
+        if isinstance(model, AggregativeProbabilisticQuantifier):
+            print('\tpreclassifying with soft')
+            preclassified_instances = model.posterior_probabilities(test.instances)
+        else:
+            print('\tpreclassifying with hard')
+            preclassified_instances = model.classify(test.instances)
+        test = LabelledCollection(preclassified_instances, test.labels)
+    else:
+        quantification_func = model.quantify
+        print('not an aggregative')
 
     def _predict_prevalences(index):
         sample = test.sampling_from_index(index)
         true_prevalence = sample.prevalence()
-        estim_prevalence = model.quantify(sample.instances)
+        estim_prevalence = quantification_func(sample.instances)
         return true_prevalence, estim_prevalence
 
     results = Parallel(n_jobs=n_jobs)(
-        delayed(_predict_prevalences)(index) for index in tqdm(indexes)
+        delayed(_predict_prevalences)(index) for index in tqdm(indexes, desc='[artificial sampling protocol] predicting')
     )
 
     true_prevalences, estim_prevalences = zip(*results)
diff --git a/quapy/functional.py b/quapy/functional.py
index d235b6b..c351990 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -36,6 +36,8 @@ def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
 
 
 def prevalence_from_labels(labels, n_classes):
+    if labels.ndim != 1:
+        raise ValueError(f'param labels does not seem to be a ndarray of label predictions')
     unique, counts = np.unique(labels, return_counts=True)
     by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
     prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float)
@@ -44,6 +46,8 @@ def prevalence_from_labels(labels, n_classes):
 
 
 def prevalence_from_probabilities(posteriors, binarize: bool = False):
+    if posteriors.ndim != 2:
+        raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities')
     if binarize:
         predictions = np.argmax(posteriors, axis=-1)
         return prevalence_from_labels(predictions, n_classes=posteriors.shape[1])
@@ -78,15 +82,15 @@ def normalize_prevalence(prevalences):
 
 
 
-def num_prevalence_combinations(nclasses:int, nprevpoints:int, nrepeats:int):
+def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1):
     """
-    Computes the number of prevalence combinations in the nclasses-dimensional simplex if nprevpoints equally distant
-    prevalences are generated and nrepeats repetitions are requested
-    :param nclasses: number of classes
-    :param nprevpoints: number of prevalence points.
-    :param nrepeats: number of repetitions for each prevalence combination
-    :return: The number of possible combinations. For example, if nclasses=2, nprevpoints=5, nrepeats=1, then the number
-    of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]
+    Computes the number of prevalence combinations in the n_classes-dimensional simplex if nprevpoints equally distant
+    prevalences are generated and n_repeats repetitions are requested
+    :param n_classes: number of classes
+    :param n_prevpoints: number of prevalence points.
+    :param n_repeats: number of repetitions for each prevalence combination
+    :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the
+    number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]
     """
     __cache={}
     def __f(nc,np):
@@ -98,25 +102,25 @@ def num_prevalence_combinations(nclasses:int, nprevpoints:int, nrepeats:int):
             x = sum([__f(nc-1, np-i) for i in range(np)])
             __cache[(nc,np)] = x
             return x
-    return __f(nclasses, nprevpoints) * nrepeats
+    return __f(n_classes, n_prevpoints) * n_repeats
 
 
-def get_nprevpoints_approximation(nclasses, nrepeats, combinations_budget):
+def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repeats:int=1):
     """
-    Searches for the largest number of (equidistant) prevalence points to define for each of the nclasses classe so that
-    the number of valid prevalences generated as combinations of prevalence points (points in a nclasses-dimensional
+    Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so that
+    the number of valid prevalences generated as combinations of prevalence points (points in a n_classes-dimensional
     simplex) do not exceed combinations_budget.
-    :param nclasses: number of classes
-    :param nrepeats: number of repetitions for each prevalence combination
+    :param n_classes: number of classes
+    :param n_repeats: number of repetitions for each prevalence combination
     :param combinations_budget: maximum number of combinatios allowed
     :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences
     """
-    assert nclasses>0 and nrepeats>0 and combinations_budget>0, 'parameters must be positive integers'
-    nprevpoints = 1
+    assert n_classes > 0 and n_repeats > 0 and combinations_budget > 0, 'parameters must be positive integers'
+    n_prevpoints = 1
     while True:
-        combinations = num_prevalence_combinations(nclasses, nprevpoints, nrepeats)
+        combinations = num_prevalence_combinations(n_prevpoints, n_classes, n_repeats)
         if combinations > combinations_budget:
-            return nprevpoints-1
+            return n_prevpoints-1
         else:
-            nprevpoints+=1
+            n_prevpoints += 1
 
diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py
index 88acd16..b5bbd72 100644
--- a/quapy/method/__init__.py
+++ b/quapy/method/__init__.py
@@ -8,7 +8,7 @@ AGGREGATIVE_METHODS = {
     agg.AdjustedClassifyAndCount,
     agg.ProbabilisticClassifyAndCount,
     agg.ProbabilisticAdjustedClassifyAndCount,
-    agg.ExplicitLossMinimisation,
+    agg.ExplicitLossMinimisationBinary,
     agg.ExpectationMaximizationQuantifier,
     agg.HellingerDistanceY
 }
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index c2857f7..f2d2756 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -34,6 +34,13 @@ class AggregativeQuantifier(BaseQuantifier):
     def classify(self, instances):
         return self.learner.predict(instances)
 
+    def quantify(self, instances, *args):
+        classif_predictions = self.classify(instances)
+        return self.aggregate(classif_predictions, *args)
+
+    @abstractmethod
+    def aggregate(self, classif_predictions:np.ndarray, *args): ...
+
     def get_params(self, deep=True):
         return self.learner.get_params()
 
@@ -53,13 +60,17 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
     """
     Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities
     as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative
-    Quantifiersimplement by implementing a _soft_classify_ method returning values in [0,1] -- the posterior
+    Quantifiersimplement by implementing a _posterior_probabilities_ method returning values in [0,1] -- the posterior
     probabilities.
     """
 
-    def soft_classify(self, data):
+    def posterior_probabilities(self, data):
         return self.learner.predict_proba(data)
 
+    def quantify(self, instances, *args):
+        classif_posteriors = self.posterior_probabilities(instances)
+        return self.aggregate(classif_posteriors, *args)
+
     def set_params(self, **parameters):
         if isinstance(self.learner, CalibratedClassifierCV):
             parameters={'base_estimator__'+k:v for k,v in parameters.items()}
@@ -128,9 +139,8 @@ class ClassifyAndCount(AggregativeQuantifier):
         self.learner, _ = training_helper(self.learner, data, fit_learner)
         return self
 
-    def quantify(self, instances, *args):
-        classification = self.classify(instances)  # classify
-        return F.prevalence_from_labels(classification, self.n_classes)  # & count
+    def aggregate(self, classif_predictions, *args):
+        return F.prevalence_from_labels(classif_predictions, self.n_classes)
 
 
 class AdjustedClassifyAndCount(AggregativeQuantifier):
@@ -141,17 +151,24 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
     def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
         self.learner, validation = training_helper(self.learner, data, fit_learner, train_val_split=train_val_split)
         self.cc = ClassifyAndCount(self.learner)
-        y_ = self.cc.classify(validation.instances)
+        y_ = self.classify(validation.instances)
         y  = validation.labels
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
         # document that belongs to yj ends up being classified as belonging to yi
         self.Pte_cond_estim_ = confusion_matrix(y,y_).T / validation.counts()
         return self
 
-    def quantify(self, instances, *args):
-        prevs_estim = self.cc.quantify(instances)
-        # solve for the linear system Ax = B with A=Pte_cond_estim and B = prevs_estim
-        A = self.Pte_cond_estim_
+    def classify(self, data):
+        return self.cc.classify(data)
+
+    def aggregate(self, classif_predictions, *args):
+        prevs_estim = self.cc.aggregate(classif_predictions)
+        return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+
+    @classmethod
+    def solve_adjustment(cls, PteCondEstim, prevs_estim):
+        # solve for the linear system Ax = B with A=PteCondEstim and B = prevs_estim
+        A = PteCondEstim
         B = prevs_estim
         try:
             adjusted_prevs = np.linalg.solve(A, B)
@@ -161,9 +178,6 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
             adjusted_prevs = prevs_estim  # no way to adjust them!
         return adjusted_prevs
 
-    def classify(self, data):
-        return self.cc.classify(data)
-
 
 class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
     def __init__(self, learner):
@@ -173,13 +187,11 @@ class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
         self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
         return self
 
-    def quantify(self, instances, *args):
-        posteriors = self.soft_classify(instances)  # classify
-        prevalences = F.prevalence_from_probabilities(posteriors, binarize=False)  # & count
-        return prevalences
+    def aggregate(self, classif_posteriors, *args):
+        return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
 
 
-class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier):
+class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
 
     def __init__(self, learner):
         self.learner = learner
@@ -189,28 +201,23 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier):
             self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split
         )
         self.pcc = ProbabilisticClassifyAndCount(self.learner)
-        y_ = self.pcc.classify(validation.instances)
+        y_ = self.classify(validation.instances)
         y = validation.labels
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
         # document that belongs to yj ends up being classified as belonging to yi
         self.Pte_cond_estim_ = confusion_matrix(y, y_).T / validation.counts()
         return self
 
-    def quantify(self, instances, *args):
-        prevs_estim = self.pcc.quantify(instances)
-        A = self.Pte_cond_estim_
-        B = prevs_estim
-        try:
-            adjusted_prevs = np.linalg.solve(A, B)
-            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
-            adjusted_prevs /= adjusted_prevs.sum()
-        except np.linalg.LinAlgError:
-            adjusted_prevs = prevs_estim  # no way to adjust them!
-        return adjusted_prevs
+    def aggregate(self, classif_posteriors, *args):
+        prevs_estim = self.pcc.aggregate(classif_posteriors)
+        return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
 
     def classify(self, data):
         return self.pcc.classify(data)
 
+    def soft_classify(self, data):
+        return self.pcc.posterior_probabilities(data)
+
 
 class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
 
@@ -226,10 +233,8 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
         self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes)
         return self
 
-    def quantify(self, X, epsilon=EPSILON):
-        tr_prev=self.train_prevalence
-        posteriors = self.soft_classify(X)
-        return self.EM(tr_prev, posteriors, self.verbose, epsilon)
+    def aggregate(self, classif_posteriors, epsilon=EPSILON):
+        return self.EM(self.train_prevalence, classif_posteriors, self.verbose, epsilon)
 
     @classmethod
     def EM(cls, tr_prev, posterior_probabilities, verbose=False, epsilon=EPSILON):
@@ -277,17 +282,17 @@ class HellingerDistanceY(AggregativeProbabilisticQuantifier):
                             f'Use the class OneVsAll to enable {self.__class__.__name__} work on single-label data.'
         self.learner, validation = training_helper(
             self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split)
-        Px = self.soft_classify(validation.instances)
+        Px = self.posterior_probabilities(validation.instances)
         self.Pxy1 = Px[validation.labels == 1]
         self.Pxy0 = Px[validation.labels == 0]
         return self
 
-    def quantify(self, instances, *args):
+    def aggregate(self, classif_posteriors, *args):
         # "In this work, the number of bins b used in HDx and HDy was chosen from 10 to 110 in steps of 10,
         # and the final estimated a priori probability was taken as the median of these 11 estimates."
         # (González-Castro, et al., 2013).
 
-        Px = self.soft_classify(instances)
+        Px = classif_posteriors
 
         prev_estimations = []
         for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
@@ -318,71 +323,87 @@ class OneVsAll(AggregativeQuantifier):
     quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
     """
 
-    def __init__(self, binary_method, n_jobs=-1):
-        self.binary_method = binary_method
+    def __init__(self, binary_quantifier, n_jobs=-1):
+        self.binary_quantifier = binary_quantifier
         self.n_jobs = n_jobs
 
     def fit(self, data: LabelledCollection, **kwargs):
-        assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
-        assert isinstance(self.binary_method, BaseQuantifier), f'{self.binary_method} does not seem to be a Quantifier'
-        self.class_method = {c: deepcopy(self.binary_method) for c in data.classes_}
-        Parallel(n_jobs=self.n_jobs, backend='threading')(
-            delayed(self._delayed_binary_fit)(c, self.class_method, data, **kwargs) for c in data.classes_
-        )
+        assert not data.binary, \
+            f'{self.__class__.__name__} expect non-binary data'
+        assert isinstance(self.binary_quantifier, BaseQuantifier), \
+            f'{self.binary_quantifier} does not seem to be a Quantifier'
+        self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
+        self.__parallel(self._delayed_binary_fit, data, **kwargs)
         return self
 
+    def classify(self, instances):
+        classif_predictions_bin = self.__parallel(self._delayed_binary_classification, instances)
+        return classif_predictions_bin.T
+
+    def aggregate(self, classif_predictions_bin, *args):
+        assert set(np.unique(classif_predictions_bin)) == {0,1}, \
+            'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
+            'predictions for each document (row) and class (columns)'
+        prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin)
+        return F.normalize_prevalence(prevalences)
+
     def quantify(self, X, *args):
-        prevalences = np.asarray(
+        prevalences = self.__parallel(self._delayed_binary_quantify, X)
+        return F.normalize_prevalence(prevalences)
+
+    def __parallel(self, func, *args, **kwargs):
+        return np.asarray(
             Parallel(n_jobs=self.n_jobs, backend='threading')(
-                delayed(self._delayed_binary_predict)(c, self.class_method, X) for c in self.classes
+                delayed(func)(c, *args, **kwargs) for c in self.classes
             )
         )
-<<<<<<< HEAD
-=======
-        print('one vs all: ', prevalences)
->>>>>>> 2361186a01c53e744f4291e2e2299700216ff139
-        return F.normalize_prevalence(prevalences)
 
     @property
     def classes(self):
-        return sorted(self.class_method.keys())
+        return sorted(self.dict_binary_quantifiers.keys())
 
     def set_params(self, **parameters):
-        self.binary_method.set_params(**parameters)
+        self.binary_quantifier.set_params(**parameters)
 
     def get_params(self, deep=True):
-        return self.binary_method.get_params()
+        return self.binary_quantifier.get_params()
 
-    def _delayed_binary_predict(self, c, learners, X):
-        return learners[c].classify(X).mean()  # the mean is the estimation for the positive class prevalence
+    def _delayed_binary_classification(self, c, X):
+        return self.dict_binary_quantifiers[c].classify(X)
 
-    def _delayed_binary_fit(self, c, learners, data, **kwargs):
+    def _delayed_binary_quantify(self, c, X):
+        return self.dict_binary_quantifiers[c].quantify(X)[1]  # the estimation for the positive class prevalence
+
+    def _delayed_binary_aggregate(self, c, classif_predictions):
+        return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:,c])[1]  # the estimation for the positive class prevalence
+
+    def _delayed_binary_fit(self, c, data, **kwargs):
         bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
-        learners[c].fit(bindata, **kwargs)
+        self.dict_binary_quantifiers[c].fit(bindata, **kwargs)
 
 
-class ExplicitLossMinimisation(AggregativeQuantifier):
-    """
-    A variant of Explicit Loss Minimisation based on SVMperf that works also on single-label data. It uses one binary
-    quantifier for each class and then l1-normalizes the class predictions so that they sum up to one.
-    This variant was used in Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
-    Social Network Analysis and Mining6(19), 1–22 (2016)
-    """
-
-    def __init__(self, svmperf_base, loss, **kwargs):
-        self.svmperf_base = svmperf_base
-        self.loss = loss
-        self.kwargs = kwargs
-
-    def fit(self, data: LabelledCollection, fit_learner=True, *args):
-        assert fit_learner, 'the method requires that fit_learner=True'
-        self.learner = ExplicitLossMinimisationBinary(self.svmperf_base, self.loss, **self.kwargs)
-        if not data.binary:
-            self.learner = OneVsAll(self.learner, n_jobs=-1)
-        return self.learner.fit(data, *args)
-
-    def quantify(self, instances, *args):
-        return self.learner.quantify(instances, *args)
+# class ExplicitLossMinimisation(AggregativeQuantifier):
+#     """
+#     A variant of Explicit Loss Minimisation based on SVMperf that works also on single-label data. It uses one binary
+#     quantifier for each class and then l1-normalizes the class predictions so that they sum up to one.
+#     This variant was used in Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
+#     Social Network Analysis and Mining6(19), 1–22 (2016)
+#     """
+#
+#     def __init__(self, svmperf_base, loss, **kwargs):
+#         self.svmperf_base = svmperf_base
+#         self.loss = loss
+#         self.kwargs = kwargs
+#
+#     def fit(self, data: LabelledCollection, fit_learner=True, *args):
+#         assert fit_learner, 'the method requires that fit_learner=True'
+#         self.learner = ExplicitLossMinimisationBinary(self.svmperf_base, self.loss, **self.kwargs)
+#         if not data.binary:
+#             self.learner = OneVsAll(self.learner, n_jobs=-1)
+#         return self.learner.fit(data, *args)
+#
+#     def aggregate(self, instances, *args):
+#         return self.learner.aggregate(instances, *args)
 
 
 class ExplicitLossMinimisationBinary(AggregativeQuantifier):
@@ -398,38 +419,35 @@ class ExplicitLossMinimisationBinary(AggregativeQuantifier):
         self.learner = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs).fit(data.instances, data.labels)
         return self
 
-    def quantify(self, X, y=None):
-        predictions = self.learner.predict(X)
-        prev = F.prevalence_from_labels(predictions, self.learner.n_classes_)
-        print('binary: ', prev)
-        return prev
+    def aggregate(self, classif_predictions:np.ndarray, *args):
+        return F.prevalence_from_labels(classif_predictions, self.learner.n_classes_)
 
     def classify(self, X, y=None):
         return self.learner.predict(X)
 
 
 
-class SVMQ(ExplicitLossMinimisation):
+class SVMQ(ExplicitLossMinimisationBinary):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
 
 
-class SVMKLD(ExplicitLossMinimisation):
+class SVMKLD(ExplicitLossMinimisationBinary):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
 
 
-class SVMNKLD(ExplicitLossMinimisation):
+class SVMNKLD(ExplicitLossMinimisationBinary):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
 
 
-class SVMAE(ExplicitLossMinimisation):
+class SVMAE(ExplicitLossMinimisationBinary):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
 
 
-class SVMRAE(ExplicitLossMinimisation):
+class SVMRAE(ExplicitLossMinimisationBinary):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
 
@@ -438,7 +456,7 @@ CC = ClassifyAndCount
 ACC = AdjustedClassifyAndCount
 PCC = ProbabilisticClassifyAndCount
 PACC = ProbabilisticAdjustedClassifyAndCount
-ELM = ExplicitLossMinimisation
+ELM = ExplicitLossMinimisationBinary
 EMQ = ExpectationMaximizationQuantifier
 HDy = HellingerDistanceY
 
diff --git a/quapy/method/base.py b/quapy/method/base.py
index e65b45e..9561a27 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -18,3 +18,48 @@ class BaseQuantifier(metaclass=ABCMeta):
     def get_params(self, deep=True): ...
 
 
+# class OneVsAll(AggregativeQuantifier):
+#     """
+#     Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
+#     quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
+#     """
+#
+#     def __init__(self, binary_method, n_jobs=-1):
+#         self.binary_method = binary_method
+#         self.n_jobs = n_jobs
+#
+#     def fit(self, data: LabelledCollection, **kwargs):
+#         assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
+#         assert isinstance(self.binary_method, BaseQuantifier), f'{self.binary_method} does not seem to be a Quantifier'
+#         self.class_method = {c: deepcopy(self.binary_method) for c in data.classes_}
+#         Parallel(n_jobs=self.n_jobs, backend='threading')(
+#             delayed(self._delayed_binary_fit)(c, self.class_method, data, **kwargs) for c in data.classes_
+#         )
+#         return self
+#
+#     def quantify(self, X, *args):
+#         prevalences = np.asarray(
+#             Parallel(n_jobs=self.n_jobs, backend='threading')(
+#                 delayed(self._delayed_binary_predict)(c, self.class_method, X) for c in self.classes
+#             )
+#         )
+#         return F.normalize_prevalence(prevalences)
+#
+#     @property
+#     def classes(self):
+#         return sorted(self.class_method.keys())
+#
+#     def set_params(self, **parameters):
+#         self.binary_method.set_params(**parameters)
+#
+#     def get_params(self, deep=True):
+#         return self.binary_method.get_params()
+#
+#     def _delayed_binary_predict(self, c, learners, X):
+#         return learners[c].quantify(X)[:,1]  # the mean is the estimation for the positive class prevalence
+#
+#     def _delayed_binary_fit(self, c, learners, data, **kwargs):
+#         bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
+#         learners[c].fit(bindata, **kwargs)
+
+
diff --git a/quapy/utils/util.py b/quapy/utils/util.py
index 583cb1a..921ab1b 100644
--- a/quapy/utils/util.py
+++ b/quapy/utils/util.py
@@ -3,6 +3,10 @@ import multiprocessing
 from joblib import Parallel, delayed
 import contextlib
 import numpy as np
+import urllib
+import os
+from pathlib import Path
+
 
 
 
@@ -33,3 +37,27 @@ def temp_seed(seed):
     finally:
         np.random.set_state(state)
 
+
+def download_file(url, archive_filename):
+    def progress(blocknum, bs, size):
+        total_sz_mb = '%.2f MB' % (size / 1e6)
+        current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
+        print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='')
+    print("Downloading %s" % url)
+    urllib.request.urlretrieve(url, filename=archive_filename, reporthook=progress)
+    print("")
+
+
+def download_file_if_not_exists(url, archive_path):
+    if os.path.exists(archive_path):
+        return
+    create_if_not_exist(os.path.dirname(archive_path))
+    download_file(url,archive_path)
+
+
+def create_if_not_exist(path):
+    os.makedirs(path, exist_ok=True)
+
+
+def get_quapy_home():
+    return os.path.join(str(Path.home()), 'quapy_data')
\ No newline at end of file
diff --git a/test.py b/test.py
index b6cb243..85d8bb6 100644
--- a/test.py
+++ b/test.py
@@ -2,37 +2,45 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.svm import LinearSVC
 import quapy as qp
 import quapy.functional as F
+import sys
 
+#qp.datasets.fetch_reviews('hp')
+#qp.datasets.fetch_twitter('sst')
+
+#sys.exit()
 
 SAMPLE_SIZE=500
 binary = False
+svmperf_home = './svm_perf_quantification'
 
 if binary:
-    # load a textual binary dataset and create a tfidf bag of words
-    train_path = './datasets/reviews/kindle/train.txt'
-    test_path = './datasets/reviews/kindle/test.txt'
-    dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_text)
-    qp.preprocessing.text2tfidf(dataset, inplace=True)
-    qp.preprocessing.reduce_columns(dataset, min_df=10, inplace=True)
+    dataset = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
 
 else:
-    # load a sparse matrix ternary dataset
-    train_path = './datasets/twitter/train/sst.train+dev.feature.txt'
-    test_path = './datasets/twitter/test/sst.test.feature.txt'
-    dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_sparse)
+    dataset = qp.datasets.fetch_twitter('semeval13', model_selection=False, min_df=10)
+    dataset.training = dataset.training.sampling(SAMPLE_SIZE, 0.2, 0.5, 0.3)
+
+print('dataset loaded')
 
 # training a quantifier
 learner = LogisticRegression()
-model = qp.method.aggregative.ClassifyAndCount(learner)
-# model = qp.method.aggregative.AdjustedClassifyAndCount(learner)
+# model = qp.method.aggregative.ClassifyAndCount(learner)
 # model = qp.method.aggregative.AdjustedClassifyAndCount(learner)
 # model = qp.method.aggregative.ProbabilisticClassifyAndCount(learner)
 # model = qp.method.aggregative.ProbabilisticAdjustedClassifyAndCount(learner)
 # model = qp.method.aggregative.ExpectationMaximizationQuantifier(learner)
+# model = qp.method.aggregative.ExplicitLossMinimisationBinary(svmperf_home, loss='q', C=100)
+model = qp.method.aggregative.SVMQ(svmperf_home, C=1)
 
+if not binary:
+    model = qp.method.aggregative.OneVsAll(model)
+
+print('fitting model')
 model.fit(dataset.training)
 
+
 # estimating class prevalences
+print('quantifying')
 prevalences_estim = model.quantify(dataset.test.instances)
 prevalences_true  = dataset.test.prevalence()
 
@@ -46,9 +54,17 @@ print(f'true prevalence {F.strprev(prevalences_true)}')
 print(f'estim prevalence {F.strprev(prevalences_estim)}')
 print(f'mae={error:.3f}')
 
-true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(model, dataset.test, SAMPLE_SIZE)
 
-qp.error.SAMPLE_SIZE=SAMPLE_SIZE
+max_evaluations = 5000
+n_prevpoints = F.get_nprevpoints_approximation(combinations_budget=max_evaluations, n_classes=dataset.n_classes)
+n_evaluations = F.num_prevalence_combinations(n_prevpoints, dataset.n_classes)
+print(f'the prevalence interval [0,1] will be split in {n_prevpoints} prevalence points for each class, so that\n'
+      f'the requested maximum number of sample evaluations ({max_evaluations}) is not exceeded.\n'
+      f'For the {dataset.n_classes} classes this dataset has, this will yield a total of {n_evaluations} evaluations.')
+
+true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(model, dataset.test, SAMPLE_SIZE, n_prevpoints)
+
+qp.error.SAMPLE_SIZE = SAMPLE_SIZE
 print(f'Evaluation according to the artificial sampling protocol ({len(true_prev)} evals)')
 for error in qp.error.QUANTIFICATION_ERROR:
     score = error(true_prev, estim_prev)