From d86c402916fd895d38ffa3ab654ec8399874a553 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Thu, 29 Apr 2021 16:07:39 +0200
Subject: [PATCH 01/10] Added first tests

---
 quapy/tests/__init__.py      |  0
 quapy/tests/test_base.py     |  5 +++++
 quapy/tests/test_datasets.py | 18 ++++++++++++++++++
 quapy/tests/test_methods.py  | 29 +++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+)
 create mode 100644 quapy/tests/__init__.py
 create mode 100644 quapy/tests/test_base.py
 create mode 100644 quapy/tests/test_datasets.py
 create mode 100644 quapy/tests/test_methods.py
diff --git a/quapy/tests/__init__.py b/quapy/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/quapy/tests/test_base.py b/quapy/tests/test_base.py
new file mode 100644
index 0000000..4fd9faa
--- /dev/null
+++ b/quapy/tests/test_base.py
@@ -0,0 +1,5 @@
+import pytest
+
+def test_import():
+    import quapy as qp
+    assert qp.__version__ is not None
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
new file mode 100644
index 0000000..3f8f673
--- /dev/null
+++ b/quapy/tests/test_datasets.py
@@ -0,0 +1,18 @@
+import pytest
+
+from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \
+    TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, fetch_reviews, fetch_twitter, fetch_UCIDataset
+
+
+@pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
+def test_fetch_reviews(dataset_name):
+    fetch_reviews(dataset_name)
+
+
+@pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN)
+def test_fetch_twitter(dataset_name):
+    fetch_twitter(dataset_name)
+
+@pytest.mark.parametrize('dataset_name',  UCI_DATASETS)
+def test_fetch_UCIDataset(dataset_name):
+    fetch_UCIDataset(dataset_name)
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
new file mode 100644
index 0000000..660c45e
--- /dev/null
+++ b/quapy/tests/test_methods.py
@@ -0,0 +1,29 @@
+import numpy
+import pytest
+from sklearn.linear_model import LogisticRegression
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.svm import LinearSVC
+
+import quapy as qp
+
+datasets = [qp.datasets.fetch_twitter('semeval16')]
+
+aggregative_methods = [qp.method.aggregative.CC, qp.method.aggregative.ACC, qp.method.aggregative.ELM]
+
+learners = [LogisticRegression, MultinomialNB, LinearSVC]
+
+
+@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('aggregative_method', aggregative_methods)
+@pytest.mark.parametrize('learner', learners)
+def test_aggregative_methods(dataset, aggregative_method, learner):
+    model = aggregative_method(learner())
+
+    model.fit(dataset.training)
+
+    estim_prevalences = model.quantify(dataset.test.instances)
+
+    true_prevalences = dataset.test.prevalence()
+    error = qp.error.mae(true_prevalences, estim_prevalences)
+
+    assert type(error) == numpy.float64

From 44cec7a0462cbcd7c603f3c9c4416e7f614b9800 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Fri, 30 Apr 2021 17:00:46 +0200
Subject: [PATCH 02/10] Added encoding option with default to utf-8.

---
 quapy/data/reader.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/quapy/data/reader.py b/quapy/data/reader.py
index 743b99e..5b4d115 100644
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@@ -3,7 +3,7 @@ from scipy.sparse import dok_matrix
 from tqdm import tqdm
 
 
-def from_text(path):
+def from_text(path, encoding='utf-8'):
     """
     Reas a labelled colletion of documents.
     File fomart <0 or 1>\t<document>\n
@@ -11,7 +11,7 @@ def from_text(path):
     :return: a list of sentences, and a list of labels
     """
     all_sentences, all_labels = [], []
-    for line in tqdm(open(path, 'rt').readlines(), f'loading {path}'):
+    for line in tqdm(open(path, 'rt', encoding=encoding).readlines(), f'loading {path}'):
         line = line.strip()
         if line:
             label, sentence = line.split('\t')
@@ -25,8 +25,8 @@ def from_text(path):
 
 def from_sparse(path):
     """
-    Reas a labelled colletion of real-valued instances expressed in sparse format
-    File fomart <-1 or 0 or 1>[\s col(int):val(float)]\n
+    Reads a labelled collection of real-valued instances expressed in sparse format
+    File format <-1 or 0 or 1>[\s col(int):val(float)]\n
     :param path: path to the labelled collection
     :return: a csr_matrix containing the instances (rows), and a ndarray containing the labels
     """
@@ -56,16 +56,16 @@ def from_sparse(path):
     return X, y
 
 
-def from_csv(path):
+def from_csv(path, encoding='utf-8'):
     """
-    Reas a csv file in which columns are separated by ','.
-    File fomart <label>,<feat1>,<feat2>,...,<featn>\n
+    Reads a csv file in which columns are separated by ','.
+    File format <label>,<feat1>,<feat2>,...,<featn>\n
     :param path: path to the csv file
     :return: a ndarray for the labels and a ndarray (float) for the covariates
     """
 
     X, y = [], []
-    for instance in tqdm(open(path, 'rt').readlines(), desc=f'reading {path}'):
+    for instance in tqdm(open(path, 'rt', encoding=encoding).readlines(), desc=f'reading {path}'):
         yi, *xi = instance.strip().split(',')
         X.append(list(map(float,xi)))
         y.append(yi)

From 8f284e540aa90e398769b43b579cbb8229b6f1b2 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Fri, 30 Apr 2021 17:22:58 +0200
Subject: [PATCH 03/10] Tests

---
 quapy/tests/test_datasets.py | 22 ++++++++++++++++++----
 quapy/tests/test_methods.py  | 11 +++++++----
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index 3f8f673..1358f71 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -6,13 +6,27 @@ from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DA
 
 @pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
 def test_fetch_reviews(dataset_name):
-    fetch_reviews(dataset_name)
+    dataset = fetch_reviews(dataset_name)
+    print(dataset.n_classes, len(dataset.training), len(dataset.test))
 
 
 @pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN)
 def test_fetch_twitter(dataset_name):
-    fetch_twitter(dataset_name)
+    try:
+        dataset = fetch_twitter(dataset_name)
+    except ValueError as ve:
+        if dataset_name == 'semeval' and ve.args[0].startswith(
+                'dataset "semeval" can only be used for model selection.'):
+            dataset = fetch_twitter(dataset_name, for_model_selection=True)
+    print(dataset.n_classes, len(dataset.training), len(dataset.test))
 
-@pytest.mark.parametrize('dataset_name',  UCI_DATASETS)
+
+@pytest.mark.parametrize('dataset_name', UCI_DATASETS)
 def test_fetch_UCIDataset(dataset_name):
-    fetch_UCIDataset(dataset_name)
+    try:
+        dataset = fetch_UCIDataset(dataset_name)
+    except FileNotFoundError as fnfe:
+        if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
+                'If this is the first time you attempt to load this dataset') > 0:
+            return
+    print(dataset.n_classes, len(dataset.training), len(dataset.test))
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index 660c45e..b59c900 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -5,20 +5,23 @@ from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import LinearSVC
 
 import quapy as qp
+from quapy.method import AGGREGATIVE_METHODS
 
-datasets = [qp.datasets.fetch_twitter('semeval16')]
-
-aggregative_methods = [qp.method.aggregative.CC, qp.method.aggregative.ACC, qp.method.aggregative.ELM]
+datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
+            pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
 
 learners = [LogisticRegression, MultinomialNB, LinearSVC]
 
 
 @pytest.mark.parametrize('dataset', datasets)
-@pytest.mark.parametrize('aggregative_method', aggregative_methods)
+@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS)
 @pytest.mark.parametrize('learner', learners)
 def test_aggregative_methods(dataset, aggregative_method, learner):
     model = aggregative_method(learner())
 
+    if model.binary and not dataset.binary:
+        return
+
     model.fit(dataset.training)
 
     estim_prevalences = model.quantify(dataset.test.instances)

From 70a3d4bd0f2c9c18b226cf6e9fa6d48be7890086 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Tue, 4 May 2021 12:14:14 +0200
Subject: [PATCH 04/10] Tests for non aggregative and meta methods.

---
 quapy/classification/neural.py |  4 +-
 quapy/method/__init__.py       | 14 ++++-
 quapy/tests/test_methods.py    | 97 +++++++++++++++++++++++++++++++++-
 3 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/quapy/classification/neural.py b/quapy/classification/neural.py
index 68a924e..afeb649 100644
--- a/quapy/classification/neural.py
+++ b/quapy/classification/neural.py
@@ -11,8 +11,8 @@ from torch.nn.utils.rnn import pad_sequence
 from tqdm import tqdm
 
 import quapy as qp
-from data import LabelledCollection
-from util import EarlyStop
+from quapy.data import LabelledCollection
+from quapy.util import EarlyStop
 
 
 class NeuralClassifierTrainer:
diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py
index 6ef83f1..b69d38f 100644
--- a/quapy/method/__init__.py
+++ b/quapy/method/__init__.py
@@ -3,21 +3,31 @@ from . import base
 from . import meta
 from . import non_aggregative
 
+EXPLICIT_LOSS_MINIMIZATION_METHODS = {
+    aggregative.ELM,
+    aggregative.SVMQ,
+    aggregative.SVMAE,
+    aggregative.SVMKLD,
+    aggregative.SVMRAE,
+    aggregative.SVMNKLD
+}
+
 AGGREGATIVE_METHODS = {
     aggregative.CC,
     aggregative.ACC,
     aggregative.PCC,
     aggregative.PACC,
-    aggregative.ELM,
     aggregative.EMQ,
     aggregative.HDy
-}
+} | EXPLICIT_LOSS_MINIMIZATION_METHODS
+
 
 NON_AGGREGATIVE_METHODS = {
     non_aggregative.MaximumLikelihoodPrevalenceEstimation
 }
 
 META_METHODS = {
+    meta.Ensemble,
     meta.QuaNet
 }
 
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index b59c900..d32916d 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -5,7 +5,8 @@ from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import LinearSVC
 
 import quapy as qp
-from quapy.method import AGGREGATIVE_METHODS
+from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS
+from quapy.method.meta import Ensemble
 
 datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
             pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
@@ -14,12 +15,104 @@ learners = [LogisticRegression, MultinomialNB, LinearSVC]
 
 
 @pytest.mark.parametrize('dataset', datasets)
-@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS)
+@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
 @pytest.mark.parametrize('learner', learners)
 def test_aggregative_methods(dataset, aggregative_method, learner):
     model = aggregative_method(learner())
 
     if model.binary and not dataset.binary:
+        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
+        return
+
+    model.fit(dataset.training)
+
+    estim_prevalences = model.quantify(dataset.test.instances)
+
+    true_prevalences = dataset.test.prevalence()
+    error = qp.error.mae(true_prevalences, estim_prevalences)
+
+    assert type(error) == numpy.float64
+
+
+@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS)
+def test_elm_methods(dataset, elm_method):
+    try:
+        model = elm_method()
+    except AssertionError as ae:
+        if ae.args[0].find('does not seem to point to a valid path') > 0:
+            print('Missing SVMperf binary program, skipping test')
+            return
+
+    if model.binary and not dataset.binary:
+        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
+        return
+
+    model.fit(dataset.training)
+
+    estim_prevalences = model.quantify(dataset.test.instances)
+
+    true_prevalences = dataset.test.prevalence()
+    error = qp.error.mae(true_prevalences, estim_prevalences)
+
+    assert type(error) == numpy.float64
+
+
+@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS)
+def test_non_aggregative_methods(dataset, non_aggregative_method):
+    model = non_aggregative_method()
+
+    if model.binary and not dataset.binary:
+        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
+        return
+
+    model.fit(dataset.training)
+
+    estim_prevalences = model.quantify(dataset.test.instances)
+
+    true_prevalences = dataset.test.prevalence()
+    error = qp.error.mae(true_prevalences, estim_prevalences)
+
+    assert type(error) == numpy.float64
+
+
+@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
+@pytest.mark.parametrize('learner', learners)
+@pytest.mark.parametrize('dataset', datasets)
+@pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
+def test_ensemble_method(base_method, learner, dataset, policy):
+    qp.environ['SAMPLE_SIZE'] = len(dataset.training)
+    model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1)
+    if model.binary and not dataset.binary:
+        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
+        return
+
+    model.fit(dataset.training)
+
+    estim_prevalences = model.quantify(dataset.test.instances)
+
+    true_prevalences = dataset.test.prevalence()
+    error = qp.error.mae(true_prevalences, estim_prevalences)
+
+    assert type(error) == numpy.float64
+
+
+def test_quanet_method():
+    dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
+    qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
+
+    from quapy.classification.neural import CNNnet
+    cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
+
+    from quapy.classification.neural import NeuralClassifierTrainer
+    learner = NeuralClassifierTrainer(cnn, device='cuda')
+
+    from quapy.method.meta import QuaNet
+    model = QuaNet(learner, sample_size=len(dataset.training), device='cuda')
+
+    if model.binary and not dataset.binary:
+        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
         return
 
     model.fit(dataset.training)

From bfbfe081168d985b32b7b442d541d6e4975d7608 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Tue, 4 May 2021 17:09:13 +0200
Subject: [PATCH 05/10] Added classes_ property to all quantifiers.

---
 TODO.txt                        |  2 --
 quapy/method/aggregative.py     |  4 ++--
 quapy/method/base.py            |  4 ++++
 quapy/method/meta.py            |  4 ++++
 quapy/method/neural.py          |  6 ++++++
 quapy/method/non_aggregative.py |  8 ++++++--
 quapy/model_selection.py        | 11 +++++++----
 7 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/TODO.txt b/TODO.txt
index 6ff9e9c..3d22651 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -17,14 +17,12 @@ Current issues:
 In binary quantification (hp, kindle, imdb) we used F1 in the minority class (which in kindle and hp happens to be the
 negative class). This is not covered in this new implementation, in which the binary case is not treated as such, but as
 an instance of single-label with 2 labels. Check
-Add classnames to LabelledCollection? This should improve visualization of reports
 Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps)
 OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
 Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
 
 Improvements:
 ==========================================
-Clarify whether QuaNet is an aggregative method or not.
 Explore the hyperparameter "number of bins" in HDy
 Rename EMQ to SLD ?
 Parallelize the kFCV in ACC and PACC?
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 34a10eb..332fea0 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -53,10 +53,10 @@ class AggregativeQuantifier(BaseQuantifier):
 
     @property
     def n_classes(self):
-        return len(self.classes)
+        return len(self.classes_)
 
     @property
-    def classes(self):
+    def classes_(self):
         return self.learner.classes_
 
     @property
diff --git a/quapy/method/base.py b/quapy/method/base.py
index de53ad9..59a6bbf 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -19,6 +19,10 @@ class BaseQuantifier(metaclass=ABCMeta):
     @abstractmethod
     def get_params(self, deep=True): ...
 
+    @abstractmethod
+    @property
+    def classes_(self): ...
+
     # these methods allows meta-learners to reimplement the decision based on their constituents, and not
     # based on class structure
     @property
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index d6c29d8..e74c969 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -186,6 +186,10 @@ class Ensemble(BaseQuantifier):
         order = np.argsort(dist)
         return _select_k(predictions, order, k=self.red_size)
 
+    @property
+    def classes_(self):
+        return self.base_quantifier.classes_
+
     @property
     def binary(self):
         return self.base_quantifier.binary
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index fc133f7..29a4127 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -58,6 +58,7 @@ class QuaNetTrainer(BaseQuantifier):
         self.device = torch.device(device)
 
         self.__check_params_colision(self.quanet_params, self.learner.get_params())
+        self._classes_ = None
 
     def fit(self, data: LabelledCollection, fit_learner=True):
         """
@@ -67,6 +68,7 @@ class QuaNetTrainer(BaseQuantifier):
         :param fit_learner: if true, trains the classifier on a split containing 40% of the data
         :return: self
         """
+        self._classes_ = data.classes_
         classifier_data, unused_data = data.split_stratified(0.4)
         train_data, valid_data = unused_data.split_stratified(0.66)  # 0.66 split of 60% makes 40% and 20%
 
@@ -256,6 +258,10 @@ class QuaNetTrainer(BaseQuantifier):
         import shutil
         shutil.rmtree(self.checkpointdir, ignore_errors=True)
 
+    @property
+    def classes_(self):
+        return self._classes_
+
 
 def mae_loss(output, target):
     return torch.mean(torch.abs(output - target))
diff --git a/quapy/method/non_aggregative.py b/quapy/method/non_aggregative.py
index 4defdeb..94b7c50 100644
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@@ -2,18 +2,22 @@ from quapy.data import LabelledCollection
 from .base import BaseQuantifier
 
 
-
 class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
 
     def __init__(self, **kwargs):
-        pass
+        self._classes_ = None
 
     def fit(self, data: LabelledCollection, *args):
+        self._classes_ = data.classes_
         self.estimated_prevalence = data.prevalence()
 
     def quantify(self, documents, *args):
         return self.estimated_prevalence
 
+    @property
+    def classes_(self):
+        return self._classes_
+
     def get_params(self):
         pass
 
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index feeb14d..f78a576 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -4,7 +4,6 @@ from copy import deepcopy
 from typing import Union, Callable
 
 import quapy as qp
-import quapy.functional as F
 from quapy.data.base import LabelledCollection
 from quapy.evaluation import artificial_sampling_prediction
 from quapy.method.aggregative import BaseQuantifier
@@ -80,7 +79,7 @@ class GridSearchQ(BaseQuantifier):
             return training, validation
         elif isinstance(validation, float):
             assert 0. < validation < 1., 'validation proportion should be in (0,1)'
-            training, validation = training.split_stratified(train_prop=1-validation)
+            training, validation = training.split_stratified(train_prop=1 - validation)
             return training, validation
         else:
             raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
@@ -97,7 +96,7 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
-    def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float]=None):
+    def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float] = None):
         """
         :param training: the training set on which to optimize the hyperparameters
         :param val_split: either a LabelledCollection on which to test the performance of the different settings, or
@@ -118,6 +117,7 @@ class GridSearchQ(BaseQuantifier):
             def handler(signum, frame):
                 self.sout('timeout reached')
                 raise TimeoutError()
+
             signal.signal(signal.SIGALRM, handler)
 
         self.sout(f'starting optimization with n_jobs={n_jobs}')
@@ -175,6 +175,10 @@ class GridSearchQ(BaseQuantifier):
     def quantify(self, instances):
         return self.best_model_.quantify(instances)
 
+    @property
+    def classes_(self):
+        return self.best_model_.classes_
+
     def set_params(self, **parameters):
         self.param_grid = parameters
 
@@ -185,4 +189,3 @@ class GridSearchQ(BaseQuantifier):
         if hasattr(self, 'best_model_'):
             return self.best_model_
         raise ValueError('best_model called before fit')
-

From 5b772c7eda68e0da1e43d10ef80b736d48e571fd Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Wed, 5 May 2021 17:12:44 +0200
Subject: [PATCH 06/10] Bug fixes on use of classes_. Tests.

---
 quapy/data/base.py           | 76 +++++++++++++++++-------------
 quapy/data/datasets.py       |  8 ++--
 quapy/data/preprocessing.py  | 20 ++++----
 quapy/functional.py          |  6 +--
 quapy/method/aggregative.py  | 89 +++++++++++++++++++-----------------
 quapy/method/base.py         |  2 +-
 quapy/tests/test_datasets.py | 17 +++++--
 quapy/tests/test_methods.py  | 63 ++++++++++++++++++++++---
 8 files changed, 177 insertions(+), 104 deletions(-)

diff --git a/quapy/data/base.py b/quapy/data/base.py
index 6b2ddec..e68bcfa 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -2,40 +2,52 @@ import numpy as np
 from scipy.sparse import issparse
 from scipy.sparse import vstack
 from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
+
 from quapy.functional import artificial_prevalence_sampling, strprev
 
 
 class LabelledCollection:
+    '''
+    A LabelledCollection is a set of objects each with a label associated to it.
+    '''
 
-    def __init__(self, instances, labels, n_classes=None):
+    def __init__(self, instances, labels, classes_=None):
+        """
+        :param instances: list of objects
+        :param labels: list of labels, same length of instances
+        :param classes_: optional, list of classes from which labels are taken. When used, must contain the set of values used in labels.
+        """
         if issparse(instances):
             self.instances = instances
-        elif isinstance(instances, list) and len(instances)>0 and isinstance(instances[0], str):
+        elif isinstance(instances, list) and len(instances) > 0 and isinstance(instances[0], str):
             # lists of strings occupy too much as ndarrays (although python-objects add a heavy overload)
             self.instances = np.asarray(instances, dtype=object)
         else:
             self.instances = np.asarray(instances)
-        self.labels = np.asarray(labels, dtype=int)
+        self.labels = np.asarray(labels)
         n_docs = len(self)
-        if n_classes is None:
+        if classes_ is None:
             self.classes_ = np.unique(self.labels)
             self.classes_.sort()
         else:
-            self.classes_ = np.arange(n_classes)
-        self.index = {class_i: np.arange(n_docs)[self.labels == class_i] for class_i in self.classes_}
+            self.classes_ = np.unique(np.asarray(classes_))
+            self.classes_.sort()
+            if len(set(self.labels).difference(set(classes_))) > 0:
+                raise ValueError('labels contains values not included in classes_')
+        self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_}
 
     @classmethod
-    def load(cls, path:str, loader_func:callable):
+    def load(cls, path: str, loader_func: callable):
         return LabelledCollection(*loader_func(path))
 
     def __len__(self):
         return self.instances.shape[0]
 
     def prevalence(self):
-        return self.counts()/len(self)
+        return self.counts() / len(self)
 
     def counts(self):
-        return np.asarray([len(self.index[ci]) for ci in self.classes_])
+        return np.asarray([len(self.index[class_]) for class_ in self.classes_])
 
     @property
     def n_classes(self):
@@ -48,21 +60,21 @@ class LabelledCollection:
     def sampling_index(self, size, *prevs, shuffle=True):
         if len(prevs) == 0:  # no prevalence was indicated; returns an index for uniform sampling
             return np.random.choice(len(self), size, replace=False)
-        if len(prevs) == self.n_classes-1:
-            prevs = prevs + (1-sum(prevs),)
+        if len(prevs) == self.n_classes - 1:
+            prevs = prevs + (1 - sum(prevs),)
         assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
         assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})'
 
         taken = 0
         indexes_sample = []
-        for i, class_i in enumerate(self.classes_):
-            if i == self.n_classes-1:
+        for i, class_ in enumerate(self.classes_):
+            if i == self.n_classes - 1:
                 n_requested = size - taken
             else:
                 n_requested = int(size * prevs[i])
 
-            n_candidates = len(self.index[class_i])
-            index_sample = self.index[class_i][
+            n_candidates = len(self.index[class_])
+            index_sample = self.index[class_][
                 np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
             ] if n_requested > 0 else []
 
@@ -90,21 +102,22 @@ class LabelledCollection:
     def sampling_from_index(self, index):
         documents = self.instances[index]
         labels = self.labels[index]
-        return LabelledCollection(documents, labels, n_classes=self.n_classes)
+        return LabelledCollection(documents, labels, classes_=self.classes_)
 
     def split_stratified(self, train_prop=0.6, random_state=None):
         # with temp_seed(42):
         tr_docs, te_docs, tr_labels, te_labels = \
-            train_test_split(self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state)
+            train_test_split(self.instances, self.labels, train_size=train_prop, stratify=self.labels,
+                             random_state=random_state)
         return LabelledCollection(tr_docs, tr_labels), LabelledCollection(te_docs, te_labels)
 
     def artificial_sampling_generator(self, sample_size, n_prevalences=101, repeats=1):
-        dimensions=self.n_classes
+        dimensions = self.n_classes
         for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
             yield self.sampling(sample_size, *prevs)
 
     def artificial_sampling_index_generator(self, sample_size, n_prevalences=101, repeats=1):
-        dimensions=self.n_classes
+        dimensions = self.n_classes
         for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
             yield self.sampling_index(sample_size, *prevs)
 
@@ -142,10 +155,10 @@ class LabelledCollection:
         else:
             nfeats = '?'
         stats_ = {'instances': ninstances,
-                'type': instance_type,
-                'features': nfeats,
-                'classes': self.n_classes,
-                'prevs': strprev(self.prevalence())}
+                  'type': instance_type,
+                  'features': nfeats,
+                  'classes': self.classes_,
+                  'prevs': strprev(self.prevalence())}
         if show:
             print(f'#instances={stats_["instances"]}, type={stats_["type"]}, #features={stats_["features"]}, '
                   f'#classes={stats_["classes"]}, prevs={stats_["prevs"]}')
@@ -155,13 +168,14 @@ class LabelledCollection:
         kf = RepeatedStratifiedKFold(n_splits=nfolds, n_repeats=nrepeats, random_state=random_state)
         for train_index, test_index in kf.split(*self.Xy):
             train = self.sampling_from_index(train_index)
-            test  = self.sampling_from_index(test_index)
+            test = self.sampling_from_index(test_index)
             yield train, test
 
+
 class Dataset:
 
     def __init__(self, training: LabelledCollection, test: LabelledCollection, vocabulary: dict = None, name=''):
-        assert training.n_classes == test.n_classes, 'incompatible labels in training and test collections'
+        assert set(training.classes_) == set(test.classes_), 'incompatible labels in training and test collections'
         self.training = training
         self.test = test
         self.vocabulary = vocabulary
@@ -172,8 +186,8 @@ class Dataset:
         return Dataset(*collection.split_stratified(train_prop=train_size))
 
     @property
-    def n_classes(self):
-        return self.training.n_classes
+    def classes_(self):
+        return self.training.classes_
 
     @property
     def binary(self):
@@ -195,19 +209,15 @@ class Dataset:
         print(f'Dataset={self.name} #tr-instances={tr_stats["instances"]}, #te-instances={te_stats["instances"]}, '
               f'type={tr_stats["type"]}, #features={tr_stats["features"]}, #classes={tr_stats["classes"]}, '
               f'tr-prevs={tr_stats["prevs"]}, te-prevs={te_stats["prevs"]}')
-        return {'train': tr_stats ,'test':te_stats}
+        return {'train': tr_stats, 'test': te_stats}
 
     @classmethod
     def kFCV(cls, data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0):
         for i, (train, test) in enumerate(data.kFCV(nfolds=nfolds, nrepeats=nrepeats, random_state=random_state)):
-            yield Dataset(train, test, name=f'fold {(i%nfolds)+1}/{nfolds} (round={(i//nfolds)+1})')
+            yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 
 
 def isbinary(data):
     if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
         return data.binary
     return False
-
-
-
-
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 79d0bbf..575ffca 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -47,7 +47,7 @@ UCI_DATASETS = ['acute.a', 'acute.b',
                 'yeast']
 
 
-def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False):
+def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) -> Dataset:
     """
     Load a Reviews dataset as a Dataset instance, as used in:
     Esuli, A., Moreo, A., and Sebastiani, F. "A recurrent neural network for sentiment quantification."
@@ -91,7 +91,7 @@ def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle
     return data
 
 
-def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False):
+def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) -> Dataset:
     """
     Load a Twitter dataset as a Dataset instance, as used in:
     Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
@@ -162,12 +162,12 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
     return data
 
 
-def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False):
+def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset:
     data = fetch_UCILabelledCollection(dataset_name, data_home, verbose)
     return Dataset(*data.split_stratified(1 - test_split, random_state=0))
 
 
-def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False):
+def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> Dataset:
 
     assert dataset_name in UCI_DATASETS, \
         f'Name {dataset_name} does not match any known dataset from the UCI Machine Learning datasets repository. ' \
diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py
index 77752f0..ee1627e 100644
--- a/quapy/data/preprocessing.py
+++ b/quapy/data/preprocessing.py
@@ -29,13 +29,13 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
     test_documents = vectorizer.transform(dataset.test.instances)
 
     if inplace:
-        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.n_classes)
-        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.n_classes)
+        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.classes_)
+        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.classes_)
         dataset.vocabulary = vectorizer.vocabulary_
         return dataset
     else:
-        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test, vectorizer.vocabulary_)
 
 
@@ -66,8 +66,8 @@ def reduce_columns(dataset: Dataset, min_df=5, inplace=False):
         dataset.test.instances = Xte
         return dataset
     else:
-        training = LabelledCollection(Xtr, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(Xte, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(Xtr, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(Xte, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test)
 
 
@@ -100,13 +100,13 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs):
     test_index = indexer.transform(dataset.test.instances)
 
     if inplace:
-        dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.n_classes)
-        dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.n_classes)
+        dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.classes_)
+        dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.classes_)
         dataset.vocabulary = indexer.vocabulary_
         return dataset
     else:
-        training = LabelledCollection(training_index, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(test_index, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(training_index, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(test_index, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test, indexer.vocabulary_)
 
 
diff --git a/quapy/functional.py b/quapy/functional.py
index 726b214..39a867b 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -36,12 +36,12 @@ def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
     return p
 
 
-def prevalence_from_labels(labels, n_classes):
+def prevalence_from_labels(labels, classes_):
     if labels.ndim != 1:
         raise ValueError(f'param labels does not seem to be a ndarray of label predictions')
     unique, counts = np.unique(labels, return_counts=True)
     by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
-    prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float)
+    prevalences = np.asarray([by_class[class_] for class_ in classes_], dtype=np.float)
     prevalences /= prevalences.sum()
     return prevalences
 
@@ -51,7 +51,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
         raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities')
     if binarize:
         predictions = np.argmax(posteriors, axis=-1)
-        return prevalence_from_labels(predictions, n_classes=posteriors.shape[1])
+        return prevalence_from_labels(predictions, np.arange(posteriors.shape[1]))
     else:
         prevalences = posteriors.mean(axis=0)
         prevalences /= prevalences.sum()
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 332fea0..ff94c21 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1,6 +1,7 @@
 from abc import abstractmethod
 from copy import deepcopy
 from typing import Union
+
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
@@ -8,6 +9,7 @@ from sklearn.calibration import CalibratedClassifierCV
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import StratifiedKFold
 from tqdm import tqdm
+
 import quapy as qp
 import quapy.functional as F
 from quapy.classification.svmperf import SVMperf
@@ -43,7 +45,7 @@ class AggregativeQuantifier(BaseQuantifier):
         return self.aggregate(classif_predictions)
 
     @abstractmethod
-    def aggregate(self, classif_predictions:np.ndarray): ...
+    def aggregate(self, classif_predictions: np.ndarray): ...
 
     def get_params(self, deep=True):
         return self.learner.get_params()
@@ -84,7 +86,7 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
 
     def set_params(self, **parameters):
         if isinstance(self.learner, CalibratedClassifierCV):
-            parameters = {'base_estimator__'+k:v for k,v in parameters.items()}
+            parameters = {'base_estimator__' + k: v for k, v in parameters.items()}
         self.learner.set_params(**parameters)
 
     @property
@@ -98,7 +100,7 @@ def training_helper(learner,
                     data: LabelledCollection,
                     fit_learner: bool = True,
                     ensure_probabilistic=False,
-                    val_split:Union[LabelledCollection, float]=None):
+                    val_split: Union[LabelledCollection, float] = None):
     """
     Training procedure common to all Aggregative Quantifiers.
     :param learner: the learner to be fit
@@ -122,13 +124,14 @@ def training_helper(learner,
             if isinstance(val_split, float):
                 if not (0 < val_split < 1):
                     raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)')
-                train, unused = data.split_stratified(train_prop=1-val_split)
-            elif val_split.__class__.__name__ == LabelledCollection.__name__: #isinstance(val_split, LabelledCollection):
+                train, unused = data.split_stratified(train_prop=1 - val_split)
+            elif val_split.__class__.__name__ == LabelledCollection.__name__:  # isinstance(val_split, LabelledCollection):
                 train = data
                 unused = val_split
             else:
-                raise ValueError(f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
-                                 'proportion, or a LabelledCollection indicating the validation split')
+                raise ValueError(
+                    f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
+                    'proportion, or a LabelledCollection indicating the validation split')
         else:
             train, unused = data, None
 
@@ -153,7 +156,7 @@ class CC(AggregativeQuantifier):
     attributed each of the classes in order to compute class prevalence estimates.
     """
 
-    def __init__(self, learner:BaseEstimator):
+    def __init__(self, learner: BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True):
@@ -167,16 +170,16 @@ class CC(AggregativeQuantifier):
         return self
 
     def aggregate(self, classif_predictions):
-        return F.prevalence_from_labels(classif_predictions, self.n_classes)
+        return F.prevalence_from_labels(classif_predictions, self.classes_)
 
 
 class ACC(AggregativeQuantifier):
 
-    def __init__(self, learner:BaseEstimator, val_split=0.4):
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None):
         """
         Trains a ACC quantifier
         :param data: the training set
@@ -262,7 +265,7 @@ class PACC(AggregativeProbabilisticQuantifier):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, int, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None):
         """
         Trains a PACC quantifier
         :param data: the training set
@@ -294,7 +297,8 @@ class PACC(AggregativeProbabilisticQuantifier):
             y_ = np.vstack(y_)
 
             # fit the learner on all data
-            self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True, val_split=None)
+            self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True,
+                                              val_split=None)
 
         else:
             self.learner, val_data = training_helper(
@@ -307,8 +311,8 @@ class PACC(AggregativeProbabilisticQuantifier):
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
         # document that belongs to yj ends up being classified as belonging to yi
         confusion = np.empty(shape=(data.n_classes, data.n_classes))
-        for yi in range(data.n_classes):
-            confusion[yi] = y_[y==yi].mean(axis=0)
+        for i,class_ in enumerate(data.classes_):
+            confusion[i] = y_[y == class_].mean(axis=0)
 
         self.Pte_cond_estim_ = confusion.T
 
@@ -338,7 +342,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
 
     def fit(self, data: LabelledCollection, fit_learner=True):
         self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
-        self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes)
+        self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
         return self
 
     def aggregate(self, classif_posteriors, epsilon=EPSILON):
@@ -366,7 +370,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
             # M-step:
             qs = ps.mean(axis=0)
 
-            if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s>10:
+            if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s > 10:
                 converged = True
 
             qs_prev_ = qs
@@ -389,7 +393,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None):
         """
         Trains a HDy quantifier
         :param data: the training set
@@ -405,13 +409,15 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         self._check_binary(data, self.__class__.__name__)
         self.learner, validation = training_helper(
             self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split)
-        Px = self.posterior_probabilities(validation.instances)[:,1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[validation.labels == 1]
-        self.Pxy0 = Px[validation.labels == 0]
+        Px = self.posterior_probabilities(validation.instances)[:, 1]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[validation.labels == self.learner.classes_[1]]
+        self.Pxy0 = Px[validation.labels == self.learner.classes_[0]]
         # pre-compute the histogram for positive and negative examples
-        self.bins = np.linspace(10, 110, 11, dtype=int)  #[10, 20, 30, ..., 100, 110]
-        self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
-        self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
+        self.bins = np.linspace(10, 110, 11, dtype=int)  # [10, 20, 30, ..., 100, 110]
+        self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in
+                             self.bins}
+        self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in
+                             self.bins}
         return self
 
     def aggregate(self, classif_posteriors):
@@ -419,12 +425,12 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         # and the final estimated a priori probability was taken as the median of these 11 estimates."
         # (González-Castro, et al., 2013).
 
-        Px = classif_posteriors[:,1]  # takes only the P(y=+1|x)
+        Px = classif_posteriors[:, 1]  # takes only the P(y=+1|x)
 
         prev_estimations = []
-        #for bins in np.linspace(10, 110, 11, dtype=int):  #[10, 20, 30, ..., 100, 110]
-            #Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
-            #Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
+        # for bins in np.linspace(10, 110, 11, dtype=int):  #[10, 20, 30, ..., 100, 110]
+        # Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
+        # Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
         for bins in self.bins:
             Pxy0_density = self.Pxy0_density[bins]
             Pxy1_density = self.Pxy1_density[bins]
@@ -433,14 +439,14 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
 
             prev_selected, min_dist = None, None
             for prev in F.prevalence_linspace(n_prevalences=100, repeat=1, smooth_limits_epsilon=0.0):
-                Px_train = prev*Pxy1_density + (1 - prev)*Pxy0_density
+                Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density
                 hdy = F.HellingerDistance(Px_train, Px_test)
                 if prev_selected is None or hdy < min_dist:
                     prev_selected, min_dist = prev, hdy
             prev_estimations.append(prev_selected)
 
-        pos_class_prev = np.median(prev_estimations)
-        return np.asarray([1-pos_class_prev, pos_class_prev])
+        class1_prev = np.median(prev_estimations)
+        return np.asarray([1 - class1_prev, class1_prev])
 
 
 class ELM(AggregativeQuantifier, BinaryQuantifier):
@@ -457,8 +463,8 @@ class ELM(AggregativeQuantifier, BinaryQuantifier):
         self.learner.fit(data.instances, data.labels)
         return self
 
-    def aggregate(self, classif_predictions:np.ndarray):
-        return F.prevalence_from_labels(classif_predictions, self.learner.n_classes_)
+    def aggregate(self, classif_predictions: np.ndarray):
+        return F.prevalence_from_labels(classif_predictions, self.classes_)
 
     def classify(self, X, y=None):
         return self.learner.predict(X)
@@ -470,6 +476,7 @@ class SVMQ(ELM):
     Quantification-oriented learning based on reliable classifiers.
     Pattern Recognition, 48(2):591–604.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
 
@@ -480,6 +487,7 @@ class SVMKLD(ELM):
     Optimizing text quantifiers for multivariate loss functions.
     ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
 
@@ -490,6 +498,7 @@ class SVMNKLD(ELM):
     Optimizing text quantifiers for multivariate loss functions.
     ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
 
@@ -531,7 +540,7 @@ class OneVsAll(AggregativeQuantifier):
             f'{self.__class__.__name__} expect non-binary data'
         assert isinstance(self.binary_quantifier, BaseQuantifier), \
             f'{self.binary_quantifier} does not seem to be a Quantifier'
-        assert fit_learner==True, 'fit_learner must be True'
+        assert fit_learner == True, 'fit_learner must be True'
 
         self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
         self.__parallel(self._delayed_binary_fit, data)
@@ -559,11 +568,11 @@ class OneVsAll(AggregativeQuantifier):
 
     def aggregate(self, classif_predictions_bin):
         if self.probabilistic:
-            assert classif_predictions_bin.shape[1]==self.n_classes and classif_predictions_bin.shape[2]==2, \
+            assert classif_predictions_bin.shape[1] == self.n_classes and classif_predictions_bin.shape[2] == 2, \
                 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \
                 'probabilities (2 dimensions) for each document (row) and class (columns)'
         else:
-            assert set(np.unique(classif_predictions_bin)).issubset({0,1}), \
+            assert set(np.unique(classif_predictions_bin)).issubset({0, 1}), \
                 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
                 'predictions for each document (row) and class (columns)'
         prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin)
@@ -606,7 +615,7 @@ class OneVsAll(AggregativeQuantifier):
         return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1]
 
     def _delayed_binary_fit(self, c, data):
-        bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
+        bindata = LabelledCollection(data.instances, data.labels == c, classes_=[False, True])
         self.dict_binary_quantifiers[c].fit(bindata)
 
     @property
@@ -616,9 +625,3 @@ class OneVsAll(AggregativeQuantifier):
     @property
     def probabilistic(self):
         return self.binary_quantifier.probabilistic
-
-
-
-
-
-
diff --git a/quapy/method/base.py b/quapy/method/base.py
index 59a6bbf..0c2729f 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -19,8 +19,8 @@ class BaseQuantifier(metaclass=ABCMeta):
     @abstractmethod
     def get_params(self, deep=True): ...
 
-    @abstractmethod
     @property
+    @abstractmethod
     def classes_(self): ...
 
     # these methods allows meta-learners to reimplement the decision based on their constituents, and not
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index 1358f71..88209e8 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -7,7 +7,11 @@ from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DA
 @pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
 def test_fetch_reviews(dataset_name):
     dataset = fetch_reviews(dataset_name)
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
+    dataset.test.stats()
 
 
 @pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN)
@@ -18,7 +22,10 @@ def test_fetch_twitter(dataset_name):
         if dataset_name == 'semeval' and ve.args[0].startswith(
                 'dataset "semeval" can only be used for model selection.'):
             dataset = fetch_twitter(dataset_name, for_model_selection=True)
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
 
 
 @pytest.mark.parametrize('dataset_name', UCI_DATASETS)
@@ -28,5 +35,9 @@ def test_fetch_UCIDataset(dataset_name):
     except FileNotFoundError as fnfe:
         if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
                 'If this is the first time you attempt to load this dataset') > 0:
+            print('The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.')
             return
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index d32916d..c036692 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -1,23 +1,23 @@
 import numpy
 import pytest
 from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import LinearSVC
 
 import quapy as qp
+from quapy.data import Dataset, LabelledCollection
 from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS
 from quapy.method.meta import Ensemble
 
 datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
             pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
 
-learners = [LogisticRegression, MultinomialNB, LinearSVC]
+learners = [LogisticRegression, LinearSVC]
 
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
 @pytest.mark.parametrize('learner', learners)
-def test_aggregative_methods(dataset, aggregative_method, learner):
+def test_aggregative_methods(dataset: Dataset, aggregative_method, learner):
     model = aggregative_method(learner())
 
     if model.binary and not dataset.binary:
@@ -36,7 +36,7 @@ def test_aggregative_methods(dataset, aggregative_method, learner):
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS)
-def test_elm_methods(dataset, elm_method):
+def test_elm_methods(dataset: Dataset, elm_method):
     try:
         model = elm_method()
     except AssertionError as ae:
@@ -60,7 +60,7 @@ def test_elm_methods(dataset, elm_method):
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS)
-def test_non_aggregative_methods(dataset, non_aggregative_method):
+def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method):
     model = non_aggregative_method()
 
     if model.binary and not dataset.binary:
@@ -81,7 +81,7 @@ def test_non_aggregative_methods(dataset, non_aggregative_method):
 @pytest.mark.parametrize('learner', learners)
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
-def test_ensemble_method(base_method, learner, dataset, policy):
+def test_ensemble_method(base_method, learner, dataset: Dataset, policy):
     qp.environ['SAMPLE_SIZE'] = len(dataset.training)
     model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1)
     if model.binary and not dataset.binary:
@@ -100,10 +100,12 @@ def test_ensemble_method(base_method, learner, dataset, policy):
 
 def test_quanet_method():
     dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
+    dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()),
+                      dataset.test.sampling(100, *dataset.test.prevalence()))
     qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
 
     from quapy.classification.neural import CNNnet
-    cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
+    cnn = CNNnet(dataset.vocabulary_size, dataset.training.n_classes)
 
     from quapy.classification.neural import NeuralClassifierTrainer
     learner = NeuralClassifierTrainer(cnn, device='cuda')
@@ -123,3 +125,50 @@ def test_quanet_method():
     error = qp.error.mae(true_prevalences, estim_prevalences)
 
     assert type(error) == numpy.float64
+
+
+def models_to_test_for_str_label_names():
+    models = list()
+    learner = LogisticRegression
+    for method in AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS):
+        models.append(method(learner()))
+    for method in NON_AGGREGATIVE_METHODS:
+        models.append(method())
+    return models
+
+
+@pytest.mark.parametrize('model', models_to_test_for_str_label_names())
+def test_str_label_names(model):
+    dataset = qp.datasets.fetch_reviews('imdb', pickle=True)
+    dataset = Dataset(dataset.training.sampling(1000, *dataset.training.prevalence()),
+                      dataset.test.sampling(1000, *dataset.test.prevalence()))
+    qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
+
+    model.fit(dataset.training)
+
+    int_estim_prevalences = model.quantify(dataset.test.instances)
+    true_prevalences = dataset.test.prevalence()
+
+    error = qp.error.mae(true_prevalences, int_estim_prevalences)
+    assert type(error) == numpy.float64
+
+    dataset_str = Dataset(LabelledCollection(dataset.training.instances,
+                                             ['one' if label == 1 else 'zero' for label in dataset.training.labels]),
+                          LabelledCollection(dataset.test.instances,
+                                             ['one' if label == 1 else 'zero' for label in dataset.test.labels]))
+
+    model.fit(dataset_str.training)
+
+    str_estim_prevalences = model.quantify(dataset_str.test.instances)
+    true_prevalences = dataset_str.test.prevalence()
+
+    error = qp.error.mae(true_prevalences, str_estim_prevalences)
+    assert type(error) == numpy.float64
+
+    print(true_prevalences)
+    print(int_estim_prevalences)
+    print(str_estim_prevalences)
+
+    numpy.testing.assert_almost_equal(int_estim_prevalences[1],
+                                      str_estim_prevalences[list(model.classes_).index('one')])
+

From 147b2f2212bc111a3241eb26f60ec3ab499b6a0b Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Thu, 6 May 2021 16:28:30 +0200
Subject: [PATCH 07/10] Added back n_classes to Dataset

---
 quapy/data/base.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/quapy/data/base.py b/quapy/data/base.py
index e68bcfa..ffd7e31 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -189,6 +189,10 @@ class Dataset:
     def classes_(self):
         return self.training.classes_
 
+    @property
+    def n_classes(self):
+        return self.training.n_classes
+
     @property
     def binary(self):
         return self.training.binary

From 32b25146c129e522653703975fa9827fa4492175 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Mon, 10 May 2021 10:26:51 +0200
Subject: [PATCH 08/10] Tests

---
 TODO.txt                    | 1 +
 quapy/tests/test_methods.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/TODO.txt b/TODO.txt
index 3d22651..2e153a2 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -20,6 +20,7 @@ an instance of single-label with 2 labels. Check
 Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps)
 OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
 Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
+Add random seed management to support replicability (see temp_seed in util.py).
 
 Improvements:
 ==========================================
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index c036692..186b7c0 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -6,6 +6,7 @@ from sklearn.svm import LinearSVC
 import quapy as qp
 from quapy.data import Dataset, LabelledCollection
 from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS
+from quapy.method.aggregative import ACC, PACC, HDy
 from quapy.method.meta import Ensemble
 
 datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
@@ -21,7 +22,7 @@ def test_aggregative_methods(dataset: Dataset, aggregative_method, learner):
     model = aggregative_method(learner())
 
     if model.binary and not dataset.binary:
-        print(f'skipping the test of binary model {model} on non-binary dataset {dataset}')
+        print(f'skipping the test of binary model {type(model)} on non-binary dataset {dataset}')
         return
 
     model.fit(dataset.training)
@@ -139,6 +140,11 @@ def models_to_test_for_str_label_names():
 
 @pytest.mark.parametrize('model', models_to_test_for_str_label_names())
 def test_str_label_names(model):
+    if type(model) in {ACC, PACC, HDy}:
+        print(
+            f'skipping the test of binary model {type(model)} because it currently does not support random seed control.')
+        return
+
     dataset = qp.datasets.fetch_reviews('imdb', pickle=True)
     dataset = Dataset(dataset.training.sampling(1000, *dataset.training.prevalence()),
                       dataset.test.sampling(1000, *dataset.test.prevalence()))
@@ -171,4 +177,3 @@ def test_str_label_names(model):
 
     numpy.testing.assert_almost_equal(int_estim_prevalences[1],
                                       str_estim_prevalences[list(model.classes_).index('one')])
-

From 79fbbd9d80d03fc121cb5331b51d54b286e085a3 Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Mon, 10 May 2021 13:36:35 +0200
Subject: [PATCH 09/10] pip package

---
 TODO.txt                    |   1 -
 quapy/__init__.py           |   2 +-
 quapy/method/meta.py        |  61 ++++++++------
 quapy/tests/test_methods.py |   6 ++
 setup.py                    | 164 ++++++++++++++++++++++++++++++++++++
 5 files changed, 204 insertions(+), 30 deletions(-)
 create mode 100644 setup.py

diff --git a/TODO.txt b/TODO.txt
index 2e153a2..2496a4b 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -2,7 +2,6 @@ Packaging:
 ==========================================
 Documentation with sphinx
 Document methods with paper references
-allow for "pip install"
 unit-tests
 
 New features:
diff --git a/quapy/__init__.py b/quapy/__init__.py
index cffae1c..7fea635 100644
--- a/quapy/__init__.py
+++ b/quapy/__init__.py
@@ -10,7 +10,7 @@ from . import model_selection
 from . import classification
 from quapy.method.base import isprobabilistic, isaggregative
 
-__version__ = '0.1'
+__version__ = '0.1.4'
 
 environ = {
     'SAMPLE_SIZE': None,
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index e74c969..fc3efe3 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -1,28 +1,32 @@
 from copy import deepcopy
 from typing import Union
 
+import numpy as np
+from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import f1_score, make_scorer, accuracy_score
+from sklearn.model_selection import GridSearchCV, cross_val_predict
 from tqdm import tqdm
 
-import numpy as np
-from joblib import Parallel, delayed
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV, cross_val_predict
-
 import quapy as qp
-from quapy.data import LabelledCollection
 from quapy import functional as F
+from quapy.data import LabelledCollection
 from quapy.evaluation import evaluate
 from quapy.model_selection import GridSearchQ
-from . import neural
-from .base import BaseQuantifier
-from quapy.method.aggregative import CC, ACC, PCC, PACC, HDy, EMQ
 
-QuaNet = neural.QuaNetTrainer
+try:
+    from . import neural
+except ModuleNotFoundError:
+    neural = None
+from .base import BaseQuantifier
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ
+
+if neural:
+    QuaNet = neural.QuaNetTrainer
+else:
+    QuaNet = "QuaNet is not available due to missing torch package"
 
 
 class Ensemble(BaseQuantifier):
-
     VALID_POLICIES = {'ave', 'ptr', 'ds'} | qp.error.QUANTIFICATION_ERROR_NAMES
 
     """
@@ -65,9 +69,9 @@ class Ensemble(BaseQuantifier):
         if self.verbose:
             print('[Ensemble]' + msg)
 
-    def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float]=None):
+    def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float] = None):
         self.sout('Fit')
-        if self.policy=='ds' and not data.binary:
+        if self.policy == 'ds' and not data.binary:
             raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary')
         if val_split is None:
             val_split = self.val_split
@@ -132,7 +136,7 @@ class Ensemble(BaseQuantifier):
         tests = [m[3] for m in self.ensemble]
         scores = []
         for i, model in enumerate(self.ensemble):
-            scores.append(evaluate(model[0], tests[:i] + tests[i+1:], error, self.n_jobs))
+            scores.append(evaluate(model[0], tests[:i] + tests[i + 1:], error, self.n_jobs))
         order = np.argsort(scores)
 
         self.ensemble = _select_k(self.ensemble, order, k=self.red_size)
@@ -168,7 +172,7 @@ class Ensemble(BaseQuantifier):
         lr_base = LogisticRegression(class_weight='balanced', max_iter=1000)
 
         optim = GridSearchCV(
-            lr_base, param_grid={'C': np.logspace(-4,4,9)}, cv=5, n_jobs=self.n_jobs, refit=True
+            lr_base, param_grid={'C': np.logspace(-4, 4, 9)}, cv=5, n_jobs=self.n_jobs, refit=True
         ).fit(X, y)
 
         posteriors = cross_val_predict(
@@ -204,8 +208,8 @@ class Ensemble(BaseQuantifier):
 
 
 def get_probability_distribution(posterior_probabilities, bins=8):
-    assert posterior_probabilities.shape[1]==2, 'the posterior probabilities do not seem to be for a binary problem'
-    posterior_probabilities = posterior_probabilities[:,1]  # take the positive posteriors only
+    assert posterior_probabilities.shape[1] == 2, 'the posterior probabilities do not seem to be for a binary problem'
+    posterior_probabilities = posterior_probabilities[:, 1]  # take the positive posteriors only
     distribution, _ = np.histogram(posterior_probabilities, bins=bins, range=(0, 1), density=True)
     return distribution
 
@@ -223,7 +227,7 @@ def _delayed_new_instance(args):
     if val_split is not None:
         if isinstance(val_split, float):
             assert 0 < val_split < 1, 'val_split should be in (0,1)'
-            data, val_split = data.split_stratified(train_prop=1-val_split)
+            data, val_split = data.split_stratified(train_prop=1 - val_split)
 
     sample_index = data.sampling_index(sample_size, *prev)
     sample = data.sampling_from_index(sample_index)
@@ -255,7 +259,7 @@ def _draw_simplex(ndim, min_val, max_trials=100):
     :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex
     and R is the simplex subset containing dimensions lower than min_val
     """
-    if min_val >= 1/ndim:
+    if min_val >= 1 / ndim:
         raise ValueError(f'no sample can be draw from the {ndim}-dimensional simplex so that '
                          f'all its values are >={min_val} (try with a larger value for min_pos)')
     trials = 0
@@ -300,14 +304,15 @@ def _check_error(error):
                          f'the name of an error function in {qp.error.ERROR_NAMES}')
 
 
-def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel:dict=None, **kwargs):
-        if optim is not None:
-            if param_grid is None:
-                raise ValueError(f'param_grid is None but optim was requested.')
-            if param_model_sel is None:
-                raise ValueError(f'param_model_sel is None but optim was requested.')
-        error = _check_error(optim)
-        return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs)
+def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None,
+                    **kwargs):
+    if optim is not None:
+        if param_grid is None:
+            raise ValueError(f'param_grid is None but optim was requested.')
+        if param_model_sel is None:
+            raise ValueError(f'param_model_sel is None but optim was requested.')
+    error = _check_error(optim)
+    return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs)
 
 
 def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
@@ -327,4 +332,4 @@ def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
 
 
 def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
-    return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs)
\ No newline at end of file
+    return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs)
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index 186b7c0..bcf721c 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -100,6 +100,12 @@ def test_ensemble_method(base_method, learner, dataset: Dataset, policy):
 
 
 def test_quanet_method():
+    try:
+        import quapy.classification.neural
+    except ModuleNotFoundError:
+        print('skipping QuaNet test due to missing torch package')
+        return
+
     dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
     dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()),
                       dataset.test.sampling(100, *dataset.test.prevalence()))
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..898ff46
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,164 @@
+from setuptools import setup, find_packages
+import pathlib
+
+here = pathlib.Path(__file__).parent.resolve()
+
+long_description = (here / 'README.md').read_text(encoding='utf-8')
+
+
+def get_version(rel_path):
+    init_content = (here / rel_path).read_text(encoding='utf-8')
+    for line in init_content.split('\n'):
+        if line.startswith('__version__'):
+            delim = '"' if '"' in line else "'"
+            return line.split(delim)[1]
+    else:
+        raise RuntimeError("Unable to find version string.")
+# Arguments marked as "Required" below must be included for upload to PyPI.
+# Fields marked as "Optional" may be commented out.
+
+setup(
+    # This is the name of your project. The first time you publish this
+    # package, this name will be registered for you. It will determine how
+    # users can install this project, e.g.:
+    #
+    # $ pip install sampleproject
+    #
+    # And where it will live on PyPI: https://pypi.org/project/sampleproject/
+    #
+    # There are some restrictions on what makes a valid project name
+    # specification here:
+    # https://packaging.python.org/specifications/core-metadata/#name
+    name='QuaPy',  # Required
+
+    # Versions should comply with PEP 440:
+    # https://www.python.org/dev/peps/pep-0440/
+    #
+    # For a discussion on single-sourcing the version across setup.py and the
+    # project code, see
+    # https://packaging.python.org/en/latest/single_source_version.html
+    version=get_version("quapy/__init__.py"),  # Required
+
+    # This is a one-line description or tagline of what your project does. This
+    # corresponds to the "Summary" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#summary
+    description='QuaPy: a framework for Quantification in Python',  # Optional
+
+    # This is an optional longer description of your project that represents
+    # the body of text which users will see when they visit PyPI.
+    #
+    # Often, this is the same as your README, so you can just read it in from
+    # that file directly (as we have already done above)
+    #
+    # This field corresponds to the "Description" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-optional
+    long_description=long_description,  # Optional
+
+    # Denotes that our long_description is in Markdown; valid values are
+    # text/plain, text/x-rst, and text/markdown
+    #
+    # Optional if long_description is written in reStructuredText (rst) but
+    # required for plain-text or Markdown; if unspecified, "applications should
+    # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
+    # fall back to text/plain if it is not valid rst" (see link below)
+    #
+    # This field corresponds to the "Description-Content-Type" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
+    long_description_content_type='text/markdown',  # Optional (see note above)
+
+    # This should be a valid link to your project's main homepage.
+    #
+    # This field corresponds to the "Home-Page" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#home-page-optional
+    url='https://github.com/HLT-ISTI/QuaPy',  # Optional
+
+    maintainer='Alejandro Moreo',
+
+    maintainer_email='alejandro.moreo@isti.cnr.it',
+
+    classifiers=[
+        'Development Status :: 4 - Beta',
+
+        'Intended Audience :: Developers',
+        'Intended Audience :: Science/Research',
+        'Programming Language :: Python',
+        'Topic :: Software Development',
+        'Topic :: Scientific/Engineering',
+
+        'License :: OSI Approved :: BSD License',
+
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3 :: Only',
+    ],
+
+    keywords='machine learning, quantification, classification, prevalence estimation, priors estimate',
+
+    # When your source code is in a subdirectory under the project root, e.g.
+    # `src/`, it is necessary to specify the `package_dir` argument.
+    #package_dir={'': 'src'},  # Optional
+
+    # You can just specify package directories manually here if your project is
+    # simple. Or you can use find_packages().
+    #
+    # Alternatively, if you just want to distribute a single Python file, use
+    # the `py_modules` argument instead as follows, which will expect a file
+    # called `my_module.py` to exist:
+    #
+    #   py_modules=["my_module"],
+    #
+    packages=find_packages(include=['quapy', 'quapy.*']),  # Required
+
+    python_requires='>=3.6, <4',
+
+    install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib'],
+
+    # List additional groups of dependencies here (e.g. development
+    # dependencies). Users will be able to install these using the "extras"
+    # syntax, for example:
+    #
+    #   $ pip install sampleproject[dev]
+    #
+    # Similar to `install_requires` above, these must be valid existing
+    # projects.
+    # extras_require={  # Optional
+    #     'dev': ['check-manifest'],
+    #     'test': ['coverage'],
+    # },
+
+    # If there are data files included in your packages that need to be
+    # installed, specify them here.
+    # package_data={  # Optional
+    #     'sample': ['package_data.dat'],
+    # },
+
+    # Although 'package_data' is the preferred approach, in some case you may
+    # need to place data files outside of your packages. See:
+    # http://docs.python.org/distutils/setupscript.html#installing-additional-files
+    #
+    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+    # data_files=[('my_data', ['data/data_file'])],  # Optional
+
+    # To provide executable scripts, use entry points in preference to the
+    # "scripts" keyword. Entry points provide cross-platform support and allow
+    # `pip` to create the appropriate form of executable for the target
+    # platform.
+    #
+    # For example, the following would provide a command called `sample` which
+    # executes the function `main` from this package when invoked:
+    # entry_points={  # Optional
+    #     'console_scripts': [
+    #         'sample=sample:main',
+    #     ],
+    # },
+
+    project_urls={  # Optional
+        'Contributors': 'https://github.com/HLT-ISTI/QuaPy/graphs/contributors',
+        'Bug Reports': 'https://github.com/HLT-ISTI/QuaPy/issues',
+        'Documentation': 'https://github.com/HLT-ISTI/QuaPy/wiki',
+        'Source': 'https://github.com/HLT-ISTI/QuaPy/',
+    },
+)

From 29b9671fe14fe4e8f1df803d276cde72bf3626ee Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Mon, 10 May 2021 13:40:40 +0200
Subject: [PATCH 10/10] install command in README.md

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index aea3000..9a64c4f 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,12 @@ used for evaluating quantification methods.
 QuaPy also integrates commonly used datasets and offers visualization tools 
 for facilitating the analysis and interpretation of results.
 
+### Installation
+
+```commandline
+pip install quapy
+```
+
 ## A quick example:
 
 The following script fetchs a Twitter dataset, trains and evaluates an