diff --git a/README.md b/README.md
index 668576f..aed5f1b 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
 # QuaPy
 
-A Python framework for Quantification
\ No newline at end of file
+A Quantification framework written in Python.
\ No newline at end of file
diff --git a/TODO.txt b/TODO.txt
index d25ed25..02882af 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,3 +1,8 @@
 Documentation with sphinx
-The parallel training in svmperf seems not to work
-Add "prepare svmperf for quantification" script
\ No newline at end of file
+Add evaluation - artificial sampling
+Add quantification_report (akin to classification_report from sklearn)
+Add optimization - artificial sampling
+Add prediction - artificial sampling
+Add readers for typical datasets used in Quantification
+Add NAE, NRAE
+Add "measures for evaluating ordinal"?
diff --git a/quapy/__init__.py b/quapy/__init__.py
index 59e21fe..19dc14e 100644
--- a/quapy/__init__.py
+++ b/quapy/__init__.py
@@ -1,6 +1,5 @@
-from .dataset import *
+from .data import *
 from . import functional
 from . import method
 from . import error
-
-
+from . import evaluation
diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py
index eb788c4..ceab225 100644
--- a/quapy/classification/svmperf.py
+++ b/quapy/classification/svmperf.py
@@ -20,12 +20,9 @@ class SVMperf(BaseEstimator, ClassifierMixin):
         self.verbose = verbose
         self.loss = loss
 
-    def set_c(self, C):
-        self.param_C = '-c ' + str(C)
-
     def set_params(self, **parameters):
         assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported'
-        self.set_c(parameters['C'])
+        self.C = parameters['C']
 
     def fit(self, X, y):
         assert self.loss in SVMperf.valid_losses, \
@@ -33,8 +30,8 @@ class SVMperf(BaseEstimator, ClassifierMixin):
 
         self.svmperf_learn = join(self.svmperf_base, 'svm_perf_learn')
         self.svmperf_classify = join(self.svmperf_base, 'svm_perf_classify')
-        self.loss_cmd = '-l ' + str(self.valid_losses[self.loss])
-        self.set_c(self.C)
+        self.loss_cmd = '-w 3 -l ' + str(self.valid_losses[self.loss])
+        self.c_cmd = '-c ' + str(self.C)
 
         self.classes_ = sorted(np.unique(y))
         self.n_classes_ = len(self.classes_)
@@ -49,7 +46,7 @@ class SVMperf(BaseEstimator, ClassifierMixin):
 
         dump_svmlight_file(X, y, traindat, zero_based=False)
 
-        cmd = ' '.join([self.svmperf_learn, self.param_C, self.loss_cmd, traindat, self.model])
+        cmd = ' '.join([self.svmperf_learn, self.c_cmd, self.loss_cmd, traindat, self.model])
         if self.verbose:
             print('[Running]', cmd)
         p = subprocess.run(cmd.split(), stdout=PIPE, stderr=STDOUT)
@@ -60,7 +57,7 @@ class SVMperf(BaseEstimator, ClassifierMixin):
 
         return self
 
-    def predict(self, X, y=None):
+    def predict(self, X):
         confidence_scores = self.decision_function(X)
         predictions = (confidence_scores > 0) * 1
         return predictions
diff --git a/quapy/dataset/__init__.py b/quapy/data/__init__.py
similarity index 100%
rename from quapy/dataset/__init__.py
rename to quapy/data/__init__.py
diff --git a/quapy/dataset/base.py b/quapy/data/base.py
similarity index 91%
rename from quapy/dataset/base.py
rename to quapy/data/base.py
index 29a188f..ce7b6d9 100644
--- a/quapy/dataset/base.py
+++ b/quapy/data/base.py
@@ -22,12 +22,6 @@ class LabelledCollection:
     def load(cls, path:str, loader_func:callable):
         return LabelledCollection(*loader_func(path))
 
-    @classmethod
-    def load_dataset(cls, train_path, test_path):
-        training = cls.load(train_path)
-        test = cls.load(test_path)
-        return Dataset(training, test)
-
     def __len__(self):
         return self.instances.shape[0]
 
@@ -43,13 +37,13 @@ class LabelledCollection:
 
     @property
     def binary(self):
-        return self.n_classes==2
+        return self.n_classes == 2
 
     def sampling_index(self, size, *prevs, shuffle=True):
         if len(prevs) == self.n_classes-1:
             prevs = prevs + (1-sum(prevs),)
         assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
-        assert sum(prevs) == 1, f'prevalences ({prevs}) out of range (sum={sum(prevs)})'
+        assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})'
 
         taken = 0
         indexes_sample = []
@@ -93,6 +87,11 @@ class LabelledCollection:
         for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
             yield self.sampling(sample_size, *prevs)
 
+    def artificial_sampling_index_generator(self, sample_size, n_prevalences=101, repeats=1):
+        dimensions=self.n_classes
+        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
+            yield self.sampling_index(sample_size, *prevs)
+
     def __add__(self, other):
         if issparse(self.instances) and issparse(other.documents):
             docs = vstack([self.instances, other.documents])
diff --git a/quapy/dataset/preprocessing.py b/quapy/data/preprocessing.py
similarity index 97%
rename from quapy/dataset/preprocessing.py
rename to quapy/data/preprocessing.py
index a6259b2..b08bcab 100644
--- a/quapy/dataset/preprocessing.py
+++ b/quapy/data/preprocessing.py
@@ -1,9 +1,10 @@
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from dataset.base import Dataset
+from data.base import Dataset
 from scipy.sparse import spmatrix
 from utils.util import parallelize
 from .base import LabelledCollection
+from tqdm import tqdm
 
 
 def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):
@@ -78,8 +79,8 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs):
     :return: a new Dataset (if inplace=False) or a reference to the current Dataset (inplace=True)
     consisting of lists of integer values representing indices.
     """
-    __check_type(dataset.training.instances, list, str)
-    __check_type(dataset.test.instances, list, str)
+    __check_type(dataset.training.instances, np.ndarray, str)
+    __check_type(dataset.test.instances, np.ndarray, str)
 
     indexer = IndexTransformer(min_df=min_df, **kwargs)
     training_index = indexer.fit_transform(dataset.training.instances)
@@ -105,7 +106,6 @@ def __check_type(container, container_type=None, element_type=None):
             f'unexpected type of element (expected {container_type}, found {type(container)})'
 
 
-
 class IndexTransformer:
 
     def __init__(self, **kwargs):
@@ -140,7 +140,7 @@ class IndexTransformer:
         return self.fit(X).transform(X, n_jobs=n_jobs)
 
     def vocabulary_size(self):
-        return len(self.vocabulary_) + 1  # the reserved unk token
+        return len(self.vocabulary_)
 
     def add_word(self, word):
         if word in self.vocabulary_:
diff --git a/quapy/dataset/reader.py b/quapy/data/reader.py
similarity index 100%
rename from quapy/dataset/reader.py
rename to quapy/data/reader.py
diff --git a/quapy/error.py b/quapy/error.py
index ff9a6e0..f52540f 100644
--- a/quapy/error.py
+++ b/quapy/error.py
@@ -1,5 +1,8 @@
 from sklearn.metrics import f1_score
-from settings import SAMPLE_SIZE
+import numpy as np
+
+
+SAMPLE_SIZE = None
 
 
 def f1e(y_true, y_pred):
@@ -7,8 +10,7 @@ def f1e(y_true, y_pred):
 
 
 def acce(y_true, y_pred):
-    acc = (y_true == y_pred).mean()
-    return 1. - acc
+    return 1. - (y_true == y_pred).mean()
 
 
 def mae(prevs, prevs_hat):
@@ -20,11 +22,40 @@ def ae(p, p_hat):
     return abs(p_hat-p).mean(axis=-1)
 
 
-def mrae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
+def mse(prevs, prevs_hat):
+    return se(prevs, prevs_hat).mean()
+
+
+def se(p, p_hat):
+    return ((p_hat-p)**2).mean(axis=-1)
+
+
+def mkld(prevs, prevs_hat):
+    return kld(prevs, prevs_hat).mean()
+
+
+def kld(p, p_hat, eps=None):
+    eps = __check_eps(eps)
+    sp = p+eps
+    sp_hat = p_hat + eps
+    return (sp*np.log(sp/sp_hat)).sum(axis=-1)
+
+
+def mnkld(prevs, prevs_hat):
+    return nkld(prevs, prevs_hat).mean()
+
+
+def nkld(p, p_hat, eps=None):
+    ekld = np.exp(kld(p, p_hat, eps))
+    return 2. * ekld / (1 + ekld) - 1.
+
+
+def mrae(p, p_hat, eps=None):
     return rae(p, p_hat, eps).mean()
 
 
-def rae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
+def rae(p, p_hat, eps=None):
+    eps = __check_eps(eps)
     p = smooth(p, eps)
     p_hat = smooth(p_hat, eps)
     return (abs(p-p_hat)/p).mean(axis=-1)
@@ -35,8 +66,17 @@ def smooth(p, eps):
     return (p+eps)/(eps*n_classes + 1)
 
 
+def __check_eps(eps):
+    if eps is None:
+        if SAMPLE_SIZE is None:
+            raise ValueError('eps was not defined, and qp.error.SAMPLE_SIZE was not set')
+        else:
+            eps = 1. / (2. * SAMPLE_SIZE)
+    return eps
+
+
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mrae}
+QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld}
 
 f1_error = f1e
 acc_error = acce
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
new file mode 100644
index 0000000..92c77c2
--- /dev/null
+++ b/quapy/evaluation.py
@@ -0,0 +1,53 @@
+from data import LabelledCollection
+from method.base import BaseQuantifier
+from utils.util import temp_seed
+import numpy as np
+from joblib import Parallel, delayed
+from tqdm import tqdm
+
+
+def artificial_sampling_prediction(
+        model: BaseQuantifier,
+        test: LabelledCollection,
+        sample_size,
+        prevalence_points=21,
+        point_repetitions=1,
+        n_jobs=-1,
+        random_seed=42):
+    """
+    Performs the predictions for all samples generated according to the artificial sampling protocol.
+    :param model: the model in charge of generating the class prevalence estimations
+    :param test: the test set on which to perform arificial sampling
+    :param sample_size: the size of the samples
+    :param prevalence_points: the number of different prevalences to sample
+    :param point_repetitions: the number of repetitions for each prevalence
+    :param n_jobs: number of jobs to be run in parallel
+    :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
+    any other random process.
+    :return: two ndarrays of [m,n] with m the number of samples (prevalence_points*point_repetitions) and n the
+     number of classes. The first one contains the true prevalences for the samples generated while the second one
+     containing the the prevalences estimations
+    """
+
+    with temp_seed(random_seed):
+        indexes = list(test.artificial_sampling_index_generator(sample_size, prevalence_points, point_repetitions))
+
+    def _predict_prevalences(index):
+        sample = test.sampling_from_index(index)
+        true_prevalence = sample.prevalence()
+        estim_prevalence = model.quantify(sample.instances)
+        return true_prevalence, estim_prevalence
+
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(_predict_prevalences)(index) for index in tqdm(indexes)
+    )
+
+    true_prevalences, estim_prevalences = zip(*results)
+    true_prevalences = np.asarray(true_prevalences)
+    estim_prevalences = np.asarray(estim_prevalences)
+
+    return true_prevalences, estim_prevalences
+
+
+
+
diff --git a/quapy/functional.py b/quapy/functional.py
index f44a85b..48952b2 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -15,6 +15,26 @@ def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, retur
     return prevs
 
 
+def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
+    """
+    Produces a uniformly separated values of prevalence. By default, produces an array 21 prevalences, with step 0.05
+    and with the limits smoothed, i.e.:
+    [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]
+    :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)
+    :param repeat: number of times each prevalence is to be repeated (defaults to 1)
+    :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1
+    :return: an array of uniformly separated prevalence values
+    """
+    p = np.linspace(0., 1., num=n_prevalences, endpoint=True)
+    p[0] += smooth_limits_epsilon
+    p[-1] -= smooth_limits_epsilon
+    if p[0] > p[1]:
+        raise ValueError(f'the smoothing in the limits is greater than the prevalence step')
+    if repeat > 1:
+        p = np.repeat(p, repeat)
+    return p
+
+
 def prevalence_from_labels(labels, n_classes):
     unique, counts = np.unique(labels, return_counts=True)
     by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
@@ -47,3 +67,54 @@ def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True):
     return adjusted
 
 
+def normalize_prevalence(prevalences):
+    assert prevalences.ndim==1, 'unexpected shape'
+    accum = prevalences.sum()
+    if accum > 0:
+        return prevalences / accum
+    else:
+        # if all classifiers are trivial rejectors
+        return np.ones_like(prevalences) / prevalences.size
+
+
+def num_prevalence_combinations(nclasses:int, nprevpoints:int, nrepeats:int):
+    """
+    Computes the number of prevalence combinations in the nclasses-dimensional simplex if nprevpoints equally distant
+    prevalences are generated and nrepeats repetitions are requested
+    :param nclasses: number of classes
+    :param nprevpoints: number of prevalence points.
+    :param nrepeats: number of repetitions for each prevalence combination
+    :return: The number of possible combinations. For example, if nclasses=2, nprevpoints=5, nrepeats=1, then the number
+    of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]
+    """
+    __cache={}
+    def __f(nc,np):
+        if (nc,np) in __cache:
+            return __cache[(nc,np)]
+        if nc==1:
+            return 1
+        else:
+            x = sum([__f(nc-1, np-i) for i in range(np)])
+            __cache[(nc,np)] = x
+            return x
+    return __f(nclasses, nprevpoints) * nrepeats
+
+
+def get_nprevpoints_approximation(nclasses, nrepeats, combinations_budget):
+    """
+    Searches for the largest number of (equidistant) prevalence points to define for each of the nclasses classe so that
+    the number of valid prevalences generated as combinations of prevalence points (points in a nclasses-dimensional
+    simplex) do not exceed combinations_budget.
+    :param nclasses: number of classes
+    :param nrepeats: number of repetitions for each prevalence combination
+    :param combinations_budget: maximum number of combinatios allowed
+    :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences
+    """
+    assert nclasses>0 and nrepeats>0 and combinations_budget>0, 'parameters must be positive integers'
+    nprevpoints = 1
+    while True:
+        combinations = num_prevalence_combinations(nclasses, nprevpoints, nrepeats)
+        if combinations > combinations_budget:
+            return nprevpoints-1
+        else:
+            nprevpoints+=1
diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py
index a8e98d0..88acd16 100644
--- a/quapy/method/__init__.py
+++ b/quapy/method/__init__.py
@@ -1,5 +1,6 @@
+from . import base
 from . import aggregative as agg
-from . import non_aggregative as nagg
+from . import non_aggregative
 
 
 AGGREGATIVE_METHODS = {
@@ -9,22 +10,14 @@ AGGREGATIVE_METHODS = {
     agg.ProbabilisticAdjustedClassifyAndCount,
     agg.ExplicitLossMinimisation,
     agg.ExpectationMaximizationQuantifier,
+    agg.HellingerDistanceY
 }
 
 NON_AGGREGATIVE_METHODS = {
-    nagg.MaximumLikelihoodPrevalenceEstimation
+    non_aggregative.MaximumLikelihoodPrevalenceEstimation
 }
 
 QUANTIFICATION_METHODS = AGGREGATIVE_METHODS | NON_AGGREGATIVE_METHODS
 
 
-# common alisases
-CC = agg.ClassifyAndCount
-ACC = agg.AdjustedClassifyAndCount
-PCC = agg.ProbabilisticClassifyAndCount
-PACC = agg.ProbabilisticAdjustedClassifyAndCount
-ELM = agg.ExplicitLossMinimisation
-EMQ = agg.ExpectationMaximizationQuantifier
-MLPE = nagg.MaximumLikelihoodPrevalenceEstimation
-
 
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index ee16baf..1423fe6 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1,12 +1,14 @@
 import numpy as np
-from .base import *
-from ..error import mae
+from copy import deepcopy
 import functional as F
-from ..classification.svmperf import SVMperf
-from ..dataset import LabelledCollection
+import error
+from method.base import BaseQuantifier
+from quapy.classification.svmperf import SVMperf
+from quapy.data import LabelledCollection
 from sklearn.metrics import confusion_matrix
 from sklearn.calibration import CalibratedClassifierCV
 from joblib import Parallel, delayed
+from abc import abstractmethod
 
 
 # Abstract classes
@@ -21,8 +23,16 @@ class AggregativeQuantifier(BaseQuantifier):
     @abstractmethod
     def fit(self, data: LabelledCollection, fit_learner=True, *args): ...
 
-    def classify(self, documents):
-        return self.learner.predict(documents)
+    @property
+    def learner(self):
+        return self.learner_
+
+    @learner.setter
+    def learner(self, value):
+        self.learner_ = value
+
+    def classify(self, instances):
+        return self.learner.predict(instances)
 
     def get_params(self, deep=True):
         return self.learner.get_params()
@@ -67,12 +77,12 @@ def training_helper(learner,
     Training procedure common to all Aggregative Quantifiers.
     :param learner: the learner to be fit
     :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner.
-    :param fit_learner: whether or not to fit the learner
+    :param fit_learner: whether or not to fit the learner (if False, then bypasses any action)
     :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the
     learner is not probabilistic, then a CalibratedCV instance of it is trained)
-    :param train_val_split: if specified, indicates the proportion of training documents on which to fit the learner
+    :param train_val_split: if specified, indicates the proportion of training instances on which to fit the learner
     :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0
-    or None otherwise)
+    or None otherwise) to be used as a validation set for any subsequent parameter fitting
     """
     if fit_learner:
         if ensure_probabilistic:
@@ -118,8 +128,8 @@ class ClassifyAndCount(AggregativeQuantifier):
         self.learner, _ = training_helper(self.learner, data, fit_learner)
         return self
 
-    def quantify(self, documents, *args):
-        classification = self.classify(documents)  # classify
+    def quantify(self, instances, *args):
+        classification = self.classify(instances)  # classify
         return F.prevalence_from_labels(classification, self.n_classes)  # & count
 
 
@@ -138,8 +148,8 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
         self.Pte_cond_estim_ = confusion_matrix(y,y_).T / validation.counts()
         return self
 
-    def quantify(self, documents, *args):
-        prevs_estim = self.cc.quantify(documents)
+    def quantify(self, instances, *args):
+        prevs_estim = self.cc.quantify(instances)
         # solve for the linear system Ax = B with A=Pte_cond_estim and B = prevs_estim
         A = self.Pte_cond_estim_
         B = prevs_estim
@@ -163,8 +173,8 @@ class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
         self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
         return self
 
-    def quantify(self, documents, *args):
-        posteriors = self.soft_classify(documents)                        # classify
+    def quantify(self, instances, *args):
+        posteriors = self.soft_classify(instances)  # classify
         prevalences = F.prevalence_from_probabilities(posteriors, binarize=False)  # & count
         return prevalences
 
@@ -186,8 +196,8 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier):
         self.Pte_cond_estim_ = confusion_matrix(y, y_).T / validation.counts()
         return self
 
-    def quantify(self, documents, *args):
-        prevs_estim = self.pcc.quantify(documents)
+    def quantify(self, instances, *args):
+        prevs_estim = self.pcc.quantify(instances)
         A = self.Pte_cond_estim_
         B = prevs_estim
         try:
@@ -237,7 +247,7 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
             # M-step: qs_pos is Ps+1(y=+1)
             qs = ps.mean(axis=0)
 
-            if qs_prev_ is not None and mae(qs, qs_prev_) < epsilon and s>10:
+            if qs_prev_ is not None and error.mae(qs, qs_prev_) < epsilon and s>10:
                 converged = True
 
             qs_prev_ = qs
@@ -252,79 +262,149 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
         return qs
 
 
-# todo: from here
-def train_task(c, learners, data):
-    learners[c].fit(data.documents, data.labels == c)
+class HellingerDistanceY(AggregativeProbabilisticQuantifier):
+    """
+    Implementation of the method based on the Hellinger Distance y (HDy) proposed by
+    González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution
+    estimation based on the Hellinger distance. Information Sciences, 218:146–164.
+    """
+
+    def __init__(self, learner):
+        self.learner = learner
+
+    def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
+        assert data.binary, f'{self.__class__.__name__} works only on problems of binary classification. ' \
+                            f'Use the class OneVsAll to enable {self.__class__.__name__} work on single-label data.'
+        self.learner, validation = training_helper(
+            self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split)
+        Px = self.soft_classify(validation.instances)
+        self.Pxy1 = Px[validation.labels == 1]
+        self.Pxy0 = Px[validation.labels == 0]
+        return self
+
+    def quantify(self, instances, *args):
+        # "In this work, the number of bins b used in HDx and HDy was chosen from 10 to 110 in steps of 10,
+        # and the final estimated a priori probability was taken as the median of these 11 estimates."
+        # (González-Castro, et al., 2013).
+
+        Px = self.soft_classify(instances)
+
+        prev_estimations = []
+        for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
+            Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
+            Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
+
+            Px_test, _ = np.histogram(Px, bins=bins, range=(0, 1), density=True)
+
+            prev_selected, min_dist = None, None
+            for prev in F.prevalence_linspace(n_prevalences=100, repeat=1, smooth_limits_epsilon=0.0):
+                Px_train = prev*Pxy1_density + (1 - prev)*Pxy0_density
+                hdy = HellingerDistanceY.HellingerDistance(Px_train, Px_test)
+                if prev_selected is None or hdy < min_dist:
+                    prev_selected, min_dist = prev, hdy
+            prev_estimations.append(prev_selected)
+
+        pos_class_prev = np.median(prev_estimations)
+        return np.asarray([1-pos_class_prev, pos_class_prev])
+
+    @classmethod
+    def HellingerDistance(cls, P, Q):
+        return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
 
 
-def binary_quant_task(c, learners, X):
-    predictions_ci = learners[c].predict(X)
-    return predictions_ci.mean()  # since the predictions array is binary
+class OneVsAll(AggregativeQuantifier):
+    """
+    Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
+    quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
+    """
+
+    def __init__(self, binary_method, n_jobs=-1):
+        self.binary_method = binary_method
+        self.n_jobs = n_jobs
+
+    def fit(self, data: LabelledCollection, **kwargs):
+        assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
+        assert isinstance(self.binary_method, BaseQuantifier), f'{self.binary_method} does not seem to be a Quantifier'
+        self.class_method = {c: deepcopy(self.binary_method) for c in data.classes_}
+        Parallel(n_jobs=self.n_jobs, backend='threading')(
+            delayed(self._delayed_binary_fit)(c, self.class_method, data, **kwargs) for c in data.classes_
+        )
+        return self
+
+    def quantify(self, X, *args):
+        prevalences = np.asarray(
+            Parallel(n_jobs=self.n_jobs, backend='threading')(
+                delayed(self._delayed_binary_predict)(c, self.class_method, X) for c in self.classes
+            )
+        )
+        return F.normalize_prevalence(prevalences)
+
+    @property
+    def classes(self):
+        return sorted(self.class_method.keys())
+
+    def set_params(self, **parameters):
+        self.binary_method.set_params(**parameters)
+
+    def get_params(self, deep=True):
+        return self.binary_method.get_params()
+
+    def _delayed_binary_predict(self, c, learners, X):
+        return learners[c].classify(X).mean()  # the mean is the estimation for the positive class prevalence
+
+    def _delayed_binary_fit(self, c, learners, data, **kwargs):
+        bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
+        learners[c].fit(bindata, **kwargs)
 
 
-class OneVsAllELM(AggregativeQuantifier):
+class ExplicitLossMinimisation(AggregativeQuantifier):
+    """
+    A variant of Explicit Loss Minimisation based on SVMperf that works also on single-label data. It uses one binary
+    quantifier for each class and then l1-normalizes the class predictions so that they sum up to one.
+    This variant was used in Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
+    Social Network Analysis and Mining6(19), 1–22 (2016)
+    """
 
-    def __init__(self, svmperf_base, loss, n_jobs=-1, **kwargs):
+    def __init__(self, svmperf_base, loss, **kwargs):
         self.svmperf_base = svmperf_base
         self.loss = loss
-        self.n_jobs = n_jobs
         self.kwargs = kwargs
 
     def fit(self, data: LabelledCollection, fit_learner=True, *args):
         assert fit_learner, 'the method requires that fit_learner=True'
+        self.learner = ExplicitLossMinimisationBinary(self.svmperf_base, self.loss, **self.kwargs)
+        if not data.binary:
+            self.learner = OneVsAll(self.learner, n_jobs=-1)
+        return self.learner.fit(data, *args)
 
-        self.learners = {c: SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) for c in data.classes_}
-        Parallel(n_jobs=self.n_jobs, backend='threading')(
-            delayed(train_task)(c, self.learners, data) for c in self.learners.keys()
-        )
-        return self
-
-    def quantify(self, X, y=None):
-        prevalences = np.asarray(
-            Parallel(n_jobs=self.n_jobs, backend='threading')(
-                delayed(binary_quant_task)(c, self.learners, X) for c in self.learners.keys()
-            )
-        )
-        prevalences /= prevalences.sum()
-        return prevalences
-
-    @property
-    def classes(self):
-        return sorted(self.learners.keys())
-
-    def preclassify_collection(self, data: LabelledCollection):
-        classifications = []
-        for class_ in data.classes_:
-            classifications.append(self.learners[class_].predict(data.instances))
-        classifications = np.vstack(classifications).T
-        precomputed = LabelledCollection(classifications, data.labels)
-        return precomputed
-
-    def set_params(self, **parameters):
-        self.kwargs=parameters
-
-    def get_params(self, deep=True):
-        return self.kwargs
+    def quantify(self, instances, *args):
+        return self.learner.quantify(instances, *args)
 
 
-class ExplicitLossMinimisation(AggregativeQuantifier):
+class ExplicitLossMinimisationBinary(AggregativeQuantifier):
 
     def __init__(self, svmperf_base, loss, **kwargs):
-        self.learner = SVMperf(svmperf_base, loss=loss, **kwargs)
+        self.svmperf_base = svmperf_base
+        self.loss = loss
+        self.kwargs = kwargs
 
     def fit(self, data: LabelledCollection, fit_learner=True, *args):
+        assert data.binary, f'{self.__class__.__name__} works only on problems of binary classification'
         assert fit_learner, 'the method requires that fit_learner=True'
-        self.learner.fit(data.instances, data.labels)
+        self.learner = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs).fit(data.instances, data.labels)
         return self
 
     def quantify(self, X, y=None):
         predictions = self.learner.predict(X)
-        return F.prevalence_from_labels(predictions, self.learner.n_classes_)
+        prev = F.prevalence_from_labels(predictions, self.learner.n_classes_)
+        print('binary: ', prev)
+        return prev
 
     def classify(self, X, y=None):
         return self.learner.predict(X)
 
 
+
 class SVMQ(ExplicitLossMinimisation):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
@@ -349,3 +429,12 @@ class SVMRAE(ExplicitLossMinimisation):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
 
+
+CC = ClassifyAndCount
+ACC = AdjustedClassifyAndCount
+PCC = ProbabilisticClassifyAndCount
+PACC = ProbabilisticAdjustedClassifyAndCount
+ELM = ExplicitLossMinimisation
+EMQ = ExpectationMaximizationQuantifier
+HDy = HellingerDistanceY
+
diff --git a/quapy/method/base.py b/quapy/method/base.py
index 4679a8f..e65b45e 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -1,5 +1,4 @@
 from abc import ABCMeta, abstractmethod
-import quapy as qp
 
 
 # Base Quantifier abstract class
@@ -7,10 +6,10 @@ import quapy as qp
 class BaseQuantifier(metaclass=ABCMeta):
 
     @abstractmethod
-    def fit(self, data: qp.LabelledCollection, *args): ...
+    def fit(self, data, *args): ...
 
     @abstractmethod
-    def quantify(self, documents, *args): ...
+    def quantify(self, instances, *args): ...
 
     @abstractmethod
     def set_params(self, **parameters): ...
diff --git a/quapy/utils/__init__.py b/quapy/utils/__init__.py
new file mode 100644
index 0000000..907cc97
--- /dev/null
+++ b/quapy/utils/__init__.py
@@ -0,0 +1 @@
+from . import util
\ No newline at end of file
diff --git a/quapy/utils/util.py b/quapy/utils/util.py
new file mode 100644
index 0000000..8b6f67f
--- /dev/null
+++ b/quapy/utils/util.py
@@ -0,0 +1,35 @@
+import itertools
+import multiprocessing
+from joblib import Parallel, delayed
+import contextlib
+import numpy as np
+
+
+def get_parallel_slices(n_tasks, n_jobs=-1):
+    if n_jobs == -1:
+        n_jobs = multiprocessing.cpu_count()
+    batch = int(n_tasks / n_jobs)
+    remainder = n_tasks % n_jobs
+    return [slice(job * batch, (job + 1) * batch + (remainder if job == n_jobs - 1 else 0)) for job in
+            range(n_jobs)]
+
+
+def parallelize(func, args, n_jobs):
+    args = np.asarray(args)
+    slices = get_parallel_slices(len(args), n_jobs)
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(func)(args[slice_i]) for slice_i in slices
+    )
+    return list(itertools.chain.from_iterable(results))
+
+
+@contextlib.contextmanager
+def temp_seed(seed):
+    state = np.random.get_state()
+    np.random.seed(seed)
+    try:
+        yield
+    finally:
+        np.random.set_state(state)
+
+
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..679e069
--- /dev/null
+++ b/test.py
@@ -0,0 +1,53 @@
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC
+import quapy as qp
+import quapy.functional as F
+
+SAMPLE_SIZE=500
+binary = False
+
+if binary:
+    # load a textual binary dataset and create a tfidf bag of words
+    train_path = './datasets/reviews/kindle/train.txt'
+    test_path = './datasets/reviews/kindle/test.txt'
+    dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_text)
+    qp.preprocessing.text2tfidf(dataset, inplace=True)
+    qp.preprocessing.reduce_columns(dataset, min_df=10, inplace=True)
+
+else:
+    # load a sparse matrix ternary dataset
+    train_path = './datasets/twitter/train/sst.train+dev.feature.txt'
+    test_path = './datasets/twitter/test/sst.test.feature.txt'
+    dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_sparse)
+
+# training a quantifier
+learner = LogisticRegression()
+model = qp.method.aggregative.ClassifyAndCount(learner)
+# model = qp.method.aggregative.AdjustedClassifyAndCount(learner)
+# model = qp.method.aggregative.AdjustedClassifyAndCount(learner)
+# model = qp.method.aggregative.ProbabilisticClassifyAndCount(learner)
+# model = qp.method.aggregative.ProbabilisticAdjustedClassifyAndCount(learner)
+# model = qp.method.aggregative.ExpectationMaximizationQuantifier(learner)
+model.fit(dataset.training)
+
+# estimating class prevalences
+prevalences_estim = model.quantify(dataset.test.instances)
+prevalences_true  = dataset.test.prevalence()
+
+# evaluation (one single prediction)
+error = qp.error.mae(prevalences_true, prevalences_estim)
+
+print(f'method {model.__class__.__name__}')
+
+print(f'Evaluation in test (1 eval)')
+print(f'true prevalence {F.strprev(prevalences_true)}')
+print(f'estim prevalence {F.strprev(prevalences_estim)}')
+print(f'mae={error:.3f}')
+
+true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(model, dataset.test, SAMPLE_SIZE)
+
+qp.error.SAMPLE_SIZE=SAMPLE_SIZE
+print(f'Evaluation according to the artificial sampling protocol ({len(true_prev)} evals)')
+for error in qp.error.QUANTIFICATION_ERROR:
+    score = error(true_prev, estim_prev)
+    print(f'{error.__name__}={score:.5f}')