testing baselines for lequa

2021-11-24 11:20:42 +01:00 · 2021-11-24 11:20:42 +01:00 · 7468519495
parent 1a3755eb58
commit 7468519495
14 changed files with 508 additions and 148 deletions
--- a/LeQua2022/TODO.txt
+++ b/LeQua2022/TODO.txt
@ -1,11 +1,6 @@
 2. tablas?
 3. fetch dataset (download, unzip, etc.)
-4. model selection
 5. plots
-8. No me convence que la lectura de los samples (caso en que no hay ground truth) viene en orden aleatorio
-9. Experimentar con vectores densos (PCA sobre tfidf por ejemplo)
-10. Si cambiamos el formato de los samples (por ejemplo, en lugar de svmlight con .txt a PCA con .dat) hay que cambiar
-    cosas en el código. Está escrito varias veces un glob(*.txt)
 11. Quitar las categorias como columnas de los ficheros de prevalences
 12. sample_size cannot be set to a non-integer in GridSearchQ whith protocol="gen" (it could, but is not indicated in doc)
 13. repair doc of GridSearchQ
--- a/LeQua2022/baselines_T1.py
+++ b/LeQua2022/baselines_T1.py
@ -2,13 +2,14 @@ import argparse
 import pickle
 from sklearn.linear_model import LogisticRegression as LR
 from quapy.method.aggregative import *
+from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
 import quapy.functional as F
 from data import *
 import os
 import constants


-# LeQua official baselines for task T1B (Multiclass/Vector)
+# LeQua official baselines for task T1A (Binary/Vector) and T1B (Multiclass/Vector)
 # =========================================================

 def baselines():
@ -17,7 +18,8 @@ def baselines():
    yield PCC(LR(n_jobs=-1)), "PCC"
    yield PACC(LR(n_jobs=-1)), "PACC"
    yield EMQ(CalibratedClassifierCV(LR(), n_jobs=-1)), "SLD"
-    yield HDy(LR(n_jobs=-1)) if args.task == 'T1A' else OneVsAll(HDy(LR()), n_jobs=-1), "HDy"
+    # yield HDy(LR(n_jobs=-1)) if args.task == 'T1A' else OneVsAll(HDy(LR()), n_jobs=-1), "HDy"
+    # yield MLPE(), "MLPE"


 def main(args):
@ -30,7 +32,7 @@ def main(args):

    qp.environ['SAMPLE_SIZE'] = constants.SAMPLE_SIZE[args.task]

-    train = LabelledCollection.load(path_train, load_binary_vectors)
+    train = LabelledCollection.load(path_train, load_vector_documents)
    nF = train.instances.shape[1]

    print(f'number of classes: {len(train.classes_)}')
@ -38,13 +40,19 @@ def main(args):
    print(f'training prevalence: {F.strprev(train.prevalence())}')
    print(f'training matrix shape: {train.instances.shape}')

+    # param_grid = {
+    #     'C': np.logspace(-3, 3, 7),
+    #     'class_weight': ['balanced', None]
+    # }
+
    param_grid = {
-        'C': np.logspace(-3,3,7),
-        'class_weight': ['balanced', None]
+        'C': [1],
+        'class_weight': ['balanced']
    }

    def gen_samples():
-        return gen_load_samples_T1(path_dev_vectors, nF, ground_truth_path=path_dev_prevs, return_id=False)
+        return gen_load_samples(path_dev_vectors, ground_truth_path=path_dev_prevs, return_id=False,
+                                load_fn=load_vector_documents, nF=nF)

    for quantifier, q_name in baselines():
        print(f'{q_name}: Model selection')
--- a/LeQua2022/constants.py
+++ b/LeQua2022/constants.py
@ -13,7 +13,7 @@ SAMPLE_SIZE={
    'T1A': T1A_SAMPLE_SIZE,
    'T1B': T1B_SAMPLE_SIZE,
    'T2A': T2A_SAMPLE_SIZE,
-    'T2A': T2B_SAMPLE_SIZE
+    'T2B': T2B_SAMPLE_SIZE
 }

 ERROR_TOL = 1E-3
--- a/LeQua2022/data.py
+++ b/LeQua2022/data.py
@ -12,17 +12,6 @@ from glob import glob
 import constants


-# def load_binary_raw_document(path):
-#     documents, labels = qp.data.from_text(path, verbose=0, class2int=True)
-#     labels = np.asarray(labels)
-#     labels[np.logical_or(labels == 1, labels == 2)] = 0
-#     labels[np.logical_or(labels == 4, labels == 5)] = 1
-#     return documents, labels
-
-
-# def load_multiclass_raw_document(path):
-#     return qp.data.from_text(path, verbose=0, class2int=False)
-
 def load_category_map(path):
    cat2code = {}
    with open(path, 'rt') as fin:
@ -33,7 +22,19 @@ def load_category_map(path):
    return cat2code, code2cat


-def load_binary_vectors(path, nF=None):
+def load_raw_documents(path):
+    return qp.data.from_text(path, verbose=0, class2int=True)
+
+
+def load_raw_unlabelled_documents(path, vectorizer=None):
+    with open(path, 'rt', encoding='utf-8') as file:
+        documents = [d.strip() for d in file.readlines()]
+    if vectorizer:
+        documents = vectorizer.transform(documents)
+    return documents, None
+
+
+def load_vector_documents(path, nF=None):
    X, y = sklearn.datasets.load_svmlight_file(path, n_features=nF)
    y = y.astype(int)
    return X, y
@ -53,13 +54,13 @@ def __gen_load_samples_without_groudtruth(path_dir:str, return_id:bool, load_fn,
        yield (id, sample) if return_id else sample


-def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, return_id=True):
+def gen_load_samples(path_dir:str, ground_truth_path:str = None, return_id=True, load_fn=load_vector_documents, **load_kwargs):
    if ground_truth_path is None:
-        # the generator function returns tuples (filename:str, sample:csr_matrix)
-        gen_fn = __gen_load_samples_without_groudtruth(path_dir, return_id, load_binary_vectors, nF=nF)
+        # the generator function returns tuples (docid:str, sample:csr_matrix or str)
+        gen_fn = __gen_load_samples_without_groudtruth(path_dir, return_id, load_fn, **load_kwargs)
    else:
-        # the generator function returns tuples (filename:str, sample:csr_matrix, prevalence:ndarray)
-        gen_fn = __gen_load_samples_with_groudtruth(path_dir, return_id, ground_truth_path, load_binary_vectors, nF=nF)
+        # the generator function returns tuples (docid:str, sample:csr_matrix or str, prevalence:ndarray)
+        gen_fn = __gen_load_samples_with_groudtruth(path_dir, return_id, ground_truth_path, load_fn, **load_kwargs)
    for r in gen_fn:
        yield r

@ -75,16 +76,6 @@ def genSVD_load_samples_T1(load_fn, path_dir:str, nF:int, ground_truth_path:str
        yield r


-def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None):
-    # for ... : yield
-    pass
-
-
-def gen_load_samples_T2B(path_dir:str, ground_truth_path:str = None):
-    # for ... : yield
-    pass
-
-
 class ResultSubmission:

    def __init__(self):
--- a/LeQua2022/predict.py
+++ b/LeQua2022/predict.py
@ -5,7 +5,7 @@ import constants
 import os
 import pickle
 from tqdm import tqdm
-from data import gen_load_samples_T1
+from data import gen_load_samples
 from glob import glob
 import constants

@ -27,7 +27,7 @@ def main(args):

    # predictions
    predictions = ResultSubmission()
-    for sampleid, sample in tqdm(gen_load_samples_T1(args.samples, args.nf), desc='predicting', total=nsamples):
+    for sampleid, sample in tqdm(gen_load_samples(args.samples, args.nf), desc='predicting', total=nsamples):
        predictions.add(sampleid, model.quantify(sample))

    # saving
--- a/docs/build/html/genindex.html
+++ b/docs/build/html/genindex.html
@ -941,8 +941,6 @@
      <li><a href="quapy.data.html#quapy.data.base.LabelledCollection.sampling_from_index">sampling_from_index() (quapy.data.base.LabelledCollection method)</a>
 </li>
      <li><a href="quapy.data.html#quapy.data.base.LabelledCollection.sampling_index">sampling_index() (quapy.data.base.LabelledCollection method)</a>
-</li>
-      <li><a href="quapy.html#quapy.plot.save_or_show">save_or_show() (in module quapy.plot)</a>
 </li>
      <li><a href="quapy.html#quapy.util.save_text_file">save_text_file() (in module quapy.util)</a>
 </li>
--- a/docs/build/html/objects.inv
+++ b/docs/build/html/objects.inv
--- a/docs/build/html/quapy.html
+++ b/docs/build/html/quapy.html
@ -721,12 +721,21 @@ being ignored, a TimeoutError exception is raised. If -1 (default) then no time
 <dl class="py method">
 <dt class="sig sig-object py" id="quapy.model_selection.GridSearchQ.best_model">
 <span class="sig-name descname"><span class="pre">best_model</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.model_selection.GridSearchQ.best_model" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Returns the best model found after calling the <a class="reference internal" href="#quapy.model_selection.GridSearchQ.fit" title="quapy.model_selection.GridSearchQ.fit"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fit()</span></code></a> method, i.e., the one trained on the combination
+of hyper-parameters that minimized the error function.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p>a trained quantifier</p>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py property">
 <dt class="sig sig-object py" id="quapy.model_selection.GridSearchQ.classes_">
 <em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.model_selection.GridSearchQ.classes_" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Classes on which the quantifier has been trained on.
+:return: a ndarray of shape <cite>(n_classes)</cite> with the class identifiers</p>
+</dd></dl>

 <dl class="py method">
 <dt class="sig sig-object py" id="quapy.model_selection.GridSearchQ.fit">
@ -743,6 +752,9 @@ being ignored, a TimeoutError exception is raised. If -1 (default) then no time
 a float in [0,1] indicating the proportion of labelled data to extract from the training set</p></li>
 </ul>
 </dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p>self</p>
+</dd>
 </dl>
 </dd></dl>

@ -763,11 +775,15 @@ a float in [0,1] indicating the proportion of labelled data to extract from the
 <dl class="py method">
 <dt class="sig sig-object py" id="quapy.model_selection.GridSearchQ.quantify">
 <span class="sig-name descname"><span class="pre">quantify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.model_selection.GridSearchQ.quantify" title="Permalink to this definition">¶</a></dt>
-<dd><p>Estimate class prevalence values</p>
+<dd><p>Estimate class prevalence values using the best model found after calling the <a class="reference internal" href="#quapy.model_selection.GridSearchQ.fit" title="quapy.model_selection.GridSearchQ.fit"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fit()</span></code></a> method.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
 <dd class="field-odd"><p><strong>instances</strong> – sample contanining the instances</p>
 </dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p>a ndarray of shape <cite>(n_classes)</cite> with class prevalence estimates as according to the best model found
+by the model selection process.</p>
+</dd>
 </dl>
 </dd></dl>

@ -790,7 +806,9 @@ a float in [0,1] indicating the proportion of labelled data to extract from the
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.plot.binary_bias_bins">
 <span class="sig-prename descclassname"><span class="pre">quapy.plot.</span></span><span class="sig-name descname"><span class="pre">binary_bias_bins</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">method_names</span></em>, <em class="sig-param"><span class="pre">true_prevs</span></em>, <em class="sig-param"><span class="pre">estim_prevs</span></em>, <em class="sig-param"><span class="pre">pos_class=1</span></em>, <em class="sig-param"><span class="pre">title=None</span></em>, <em class="sig-param"><span class="pre">nbins=5</span></em>, <em class="sig-param"><span class="pre">colormap=&lt;matplotlib.colors.ListedColormap</span> <span class="pre">object&gt;</span></em>, <em class="sig-param"><span class="pre">vertical_xticks=False</span></em>, <em class="sig-param"><span class="pre">legend=True</span></em>, <em class="sig-param"><span class="pre">savepath=None</span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.plot.binary_bias_bins" title="Permalink to this definition">¶</a></dt>
-<dd><dl class="field-list simple">
+<dd><p>Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)
+for different bins of (true) prevalence of the positive classs, for each quantification method.</p>
+<dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>method_names</strong> – array-like with the method names for each experiment</p></li>
@ -802,7 +820,7 @@ for each experiment</p></li>
 <li><p><strong>title</strong> – the title to be displayed in the plot</p></li>
 <li><p><strong>nbins</strong> – number of bins</p></li>
 <li><p><strong>colormap</strong> – the matplotlib colormap to use (default cm.tab10)</p></li>
-<li><p><strong>vertical_xticks</strong> – </p></li>
+<li><p><strong>vertical_xticks</strong> – whether or not to add secondary grid (default is False)</p></li>
 <li><p><strong>legend</strong> – whether or not to display the legend (default is True)</p></li>
 <li><p><strong>savepath</strong> – path where to save the plot. If not indicated (as default), the plot is shown.</p></li>
 </ul>
@ -865,17 +883,77 @@ listed in the legend and associated with matplotlib colors).</p></li>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.plot.brokenbar_supremacy_by_drift">
 <span class="sig-prename descclassname"><span class="pre">quapy.plot.</span></span><span class="sig-name descname"><span class="pre">brokenbar_supremacy_by_drift</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">method_names</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">true_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">estim_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tr_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_bins</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">20</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">binning</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'isomerous'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x_error</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_error</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ttest_alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.005</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tail_density_threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.005</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_order</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">savepath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.plot.brokenbar_supremacy_by_drift" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Displays (only) the top performing methods for different regions of the train-test shift in form of a broken
+bar chart, in which each method has bars only for those regions in which either one of the following conditions
+hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different
+(in average) as according to a two-sided t-test on independent samples at confidence <cite>ttest_alpha</cite>.
+The binning can be made “isometric” (same size), or “isomerous” (same number of experiments – default). A second
+plot is displayed on top, that displays the distribution of experiments for each bin (when binning=”isometric”) or
+the percentiles points of the distribution (when binning=”isomerous”).</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>method_names</strong> – array-like with the method names for each experiment</p></li>
+<li><p><strong>true_prevs</strong> – array-like with the true prevalence values (each being a ndarray with n_classes components) for
+each experiment</p></li>
+<li><p><strong>estim_prevs</strong> – array-like with the estimated prevalence values (each being a ndarray with n_classes components)
+for each experiment</p></li>
+<li><p><strong>tr_prevs</strong> – training prevalence of each experiment</p></li>
+<li><p><strong>n_bins</strong> – number of bins in which the y-axis is to be divided (default is 20)</p></li>
+<li><p><strong>binning</strong> – type of binning, either “isomerous” (default) or “isometric”</p></li>
+<li><p><strong>x_error</strong> – a string representing the name of an error function (as defined in <cite>quapy.error</cite>) to be used for
+measuring the amount of train-test shift (default is “ae”)</p></li>
+<li><p><strong>y_error</strong> – a string representing the name of an error function (as defined in <cite>quapy.error</cite>) to be used for
+measuring the amount of error in the prevalence estimations (default is “ae”)</p></li>
+<li><p><strong>ttest_alpha</strong> – the confidence interval above which a p-value (two-sided t-test on independent samples) is
+to be considered as an indicator that the two means are not statistically significantly different. Default is
+0.005, meaning that a <cite>p-value &gt; 0.005</cite> indicates the two methods involved are to be considered similar</p></li>
+<li><p><strong>tail_density_threshold</strong> – sets a threshold on the density of experiments (over the total number of experiments)
+below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some
+bins to be shown for train-test outliers.</p></li>
+<li><p><strong>method_order</strong> – if indicated (default is None), imposes the order in which the methods are processed (i.e.,
+listed in the legend and associated with matplotlib colors).</p></li>
+<li><p><strong>savepath</strong> – path where to save the plot. If not indicated (as default), the plot is shown.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p></p>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.plot.error_by_drift">
-<span class="sig-prename descclassname"><span class="pre">quapy.plot.</span></span><span class="sig-name descname"><span class="pre">error_by_drift</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">method_names</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">true_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">estim_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tr_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_bins</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">20</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">show_std</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">show_density</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logscale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">title</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Quantification</span> <span class="pre">error</span> <span class="pre">as</span> <span class="pre">a</span> <span class="pre">function</span> <span class="pre">of</span> <span class="pre">distribution</span> <span class="pre">shift'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">savepath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vlines</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_order</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.plot.error_by_drift" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
-
-<dl class="py function">
-<dt class="sig sig-object py" id="quapy.plot.save_or_show">
-<span class="sig-prename descclassname"><span class="pre">quapy.plot.</span></span><span class="sig-name descname"><span class="pre">save_or_show</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">savepath</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.plot.save_or_show" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<span class="sig-prename descclassname"><span class="pre">quapy.plot.</span></span><span class="sig-name descname"><span class="pre">error_by_drift</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">method_names</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">true_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">estim_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tr_prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_bins</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">20</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">show_std</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">show_density</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logscale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">title</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Quantification</span> <span class="pre">error</span> <span class="pre">as</span> <span class="pre">a</span> <span class="pre">function</span> <span class="pre">of</span> <span class="pre">distribution</span> <span class="pre">shift'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vlines</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_order</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">savepath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.plot.error_by_drift" title="Permalink to this definition">¶</a></dt>
+<dd><p>Plots the error (along the x-axis, as measured in terms of <cite>error_name</cite>) as a function of the train-test shift
+(along the y-axis, as measured in terms of <a class="reference internal" href="#quapy.error.ae" title="quapy.error.ae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.ae()</span></code></a>). This plot is useful especially for multiclass
+problems, in which “diagonal plots” may be cumbersone, and in order to gain understanding about how methods
+fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the
+high-shift regime).</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>method_names</strong> – array-like with the method names for each experiment</p></li>
+<li><p><strong>true_prevs</strong> – array-like with the true prevalence values (each being a ndarray with n_classes components) for
+each experiment</p></li>
+<li><p><strong>estim_prevs</strong> – array-like with the estimated prevalence values (each being a ndarray with n_classes components)
+for each experiment</p></li>
+<li><p><strong>tr_prevs</strong> – training prevalence of each experiment</p></li>
+<li><p><strong>n_bins</strong> – number of bins in which the y-axis is to be divided (default is 20)</p></li>
+<li><p><strong>error_name</strong> – a string representing the name of an error function (as defined in <cite>quapy.error</cite>, default is “ae”)</p></li>
+<li><p><strong>show_std</strong> – whether or not to show standard deviations as color bands (default is False)</p></li>
+<li><p><strong>show_density</strong> – whether or not to display the distribution of experiments for each bin (default is True)</p></li>
+<li><p><strong>logscale</strong> – whether or not to log-scale the y-error measure (default is False)</p></li>
+<li><p><strong>title</strong> – title of the plot (default is “Quantification error as a function of distribution shift”)</p></li>
+<li><p><strong>vlines</strong> – array-like list of values (default is None). If indicated, highlights some regions of the space
+using vertical dotted lines.</p></li>
+<li><p><strong>method_order</strong> – if indicated (default is None), imposes the order in which the methods are processed (i.e.,
+listed in the legend and associated with matplotlib colors).</p></li>
+<li><p><strong>savepath</strong> – path where to save the plot. If not indicated (as default), the plot is shown.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>

 </section>
 <section id="module-quapy.util">
@ -884,82 +962,184 @@ listed in the legend and associated with matplotlib colors).</p></li>
 <dt class="sig sig-object py" id="quapy.util.EarlyStop">
 <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">EarlyStop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">patience</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lower_is_better</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.EarlyStop" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>A class implementing the early-stopping condition typically used for training neural networks.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>patience</strong> – the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a</p>
+</dd>
+</dl>
+<p>held-out validation split) can be found to be worse than the best one obtained so far, before flagging the
+stopping condition. An instance of this class is <cite>callable</cite>, and is to be used as follows:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="n">patience</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="p">(</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="o">.</span><span class="n">IMPROVED</span>  <span class="c1"># is True</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="o">.</span><span class="n">STOP</span>  <span class="c1"># is False (patience=1)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="o">.</span><span class="n">STOP</span>  <span class="c1"># is True (patience=0)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="o">.</span><span class="n">best_epoch</span>  <span class="c1"># is 1</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">earlystop</span><span class="o">.</span><span class="n">best_score</span>  <span class="c1"># is 0.7</span>
+</pre></div>
+</div>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>lower_is_better</strong> – if True (default) the metric is to be minimized.</p>
+</dd>
+<dt class="field-even">Variables</dt>
+<dd class="field-even"><ul class="simple">
+<li><p><strong>best_score</strong> – keeps track of the best value seen so far</p></li>
+<li><p><strong>best_epoch</strong> – keeps track of the epoch in which the best score was set</p></li>
+<li><p><strong>STOP</strong> – flag (boolean) indicating the stopping condition</p></li>
+<li><p><strong>IMPROVED</strong> – flag (boolean) indicating whether there was an improvement in the last call</p></li>
+</ul>
+</dd>
+</dl>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.create_if_not_exist">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">create_if_not_exist</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.create_if_not_exist" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>An alias to <cite>os.makedirs(path, exist_ok=True)</cite> that also returns the path. This is useful in cases like, e.g.:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">path</span> <span class="o">=</span> <span class="n">create_if_not_exist</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">dir</span><span class="p">,</span> <span class="n">subdir</span><span class="p">,</span> <span class="n">anotherdir</span><span class="p">))</span>
+</pre></div>
+</div>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>path</strong> – path to create</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p>the path itself</p>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.create_parent_dir">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">create_parent_dir</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.create_parent_dir" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Creates the parent dir (if any) of a given path, if not exists. E.g., for <cite>./path/to/file.txt</cite>, the path <cite>./path/to</cite>
+is created.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>path</strong> – the path</p>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.download_file">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">download_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">archive_filename</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.download_file" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Downloads a file from a url</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>url</strong> – the url</p></li>
+<li><p><strong>archive_filename</strong> – destination filename</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.download_file_if_not_exists">
-<span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">download_file_if_not_exists</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">archive_path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.download_file_if_not_exists" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">download_file_if_not_exists</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">archive_filename</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.download_file_if_not_exists" title="Permalink to this definition">¶</a></dt>
+<dd><p>Dowloads a function (using <a class="reference internal" href="#quapy.util.download_file" title="quapy.util.download_file"><code class="xref py py-meth docutils literal notranslate"><span class="pre">download_file()</span></code></a>) if the file does not exist.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>url</strong> – the url</p></li>
+<li><p><strong>archive_filename</strong> – destination filename</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.get_quapy_home">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">get_quapy_home</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.get_quapy_home" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p>a string representing the path</p>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.map_parallel">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">map_parallel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.map_parallel" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then
 func is applied in two parallel processes to args[0:50] and to args[50:99]</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>func</strong> – function to be parallelized</p></li>
+<li><p><strong>args</strong> – array-like of arguments to be passed to the function in different parallel calls</p></li>
+<li><p><strong>n_jobs</strong> – the number of workers</p></li>
+</ul>
+</dd>
+</dl>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.parallel">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">parallel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.parallel" title="Permalink to this definition">¶</a></dt>
-<dd><p>A wrapper of multiprocessing:
-Parallel(n_jobs=n_jobs)(</p>
-<blockquote>
-<div><p>delayed(func)(args_i) for args_i in args</p>
-</div></blockquote>
-<p>)
-that takes the quapy.environ variable as input silently</p>
+<dd><p>A wrapper of multiprocessing:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)(</span>
+<span class="gp">&gt;&gt;&gt; </span>     <span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">args_i</span><span class="p">)</span> <span class="k">for</span> <span class="n">args_i</span> <span class="ow">in</span> <span class="n">args</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="p">)</span>
+</pre></div>
+</div>
+<p>that takes the <cite>quapy.environ</cite> variable as input silently</p>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.pickled_resource">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">pickled_resource</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pickle_path</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">generation_func</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.pickled_resource" title="Permalink to this definition">¶</a></dt>
 <dd><p>Allows for fast reuse of resources that are generated only once by calling generation_func(<a href="#id1"><span class="problematic" id="id2">*</span></a>args). The next times
-this function is invoked, it loads the pickled resource. Example:
-def some_array(n):</p>
-<blockquote>
-<div><p>return np.random.rand(n)</p>
-</div></blockquote>
-<p>pickled_resource(‘./my_array.pkl’, some_array, 10)  # the resource does not exist: it is created by some_array(10)
-pickled_resource(‘./my_array.pkl’, some_array, 10)  # the resource exists: it is loaded from ‘./my_array.pkl’
-:param pickle_path: the path where to save (first time) and load (next times) the resource
-:param generation_func: the function that generates the resource, in case it does not exist in pickle_path
-:param args: any arg that generation_func uses for generating the resources
-:return: the resource</p>
+this function is invoked, it loads the pickled resource. Example:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">some_array</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>  <span class="c1"># a mock resource created with one parameter (`n`)</span>
+<span class="gp">&gt;&gt;&gt; </span>    <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">pickled_resource</span><span class="p">(</span><span class="s1">&#39;./my_array.pkl&#39;</span><span class="p">,</span> <span class="n">some_array</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>  <span class="c1"># the resource does not exist: it is created by calling some_array(10)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">pickled_resource</span><span class="p">(</span><span class="s1">&#39;./my_array.pkl&#39;</span><span class="p">,</span> <span class="n">some_array</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>  <span class="c1"># the resource exists; it is loaded from &#39;./my_array.pkl&#39;</span>
+</pre></div>
+</div>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>pickle_path</strong> – the path where to save (first time) and load (next times) the resource</p></li>
+<li><p><strong>generation_func</strong> – the function that generates the resource, in case it does not exist in pickle_path</p></li>
+<li><p><strong>args</strong> – any arg that generation_func uses for generating the resources</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p>the resource</p>
+</dd>
+</dl>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.save_text_file">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">save_text_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">text</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.save_text_file" title="Permalink to this definition">¶</a></dt>
-<dd></dd></dl>
+<dd><p>Saves a text file to disk, given its full path, and creates the parent directory if missing.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>path</strong> – path where to save the path.</p></li>
+<li><p><strong>text</strong> – text to save.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.util.temp_seed">
 <span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">temp_seed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">seed</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.temp_seed" title="Permalink to this definition">¶</a></dt>
-<dd><p>Can be used in a “with” context to set a temporal seed without modifying the outer numpy’s current state. E.g.:
-with temp_seed(random_seed):</p>
-<blockquote>
-<div><p># do any computation depending on np.random functionality</p>
-</div></blockquote>
+<dd><p>Can be used in a “with” context to set a temporal seed without modifying the outer numpy’s current state. E.g.:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">temp_seed</span><span class="p">(</span><span class="n">random_seed</span><span class="p">):</span>
+<span class="gp">&gt;&gt;&gt; </span> <span class="k">pass</span> <span class="c1"># do any computation depending on np.random functionality</span>
+</pre></div>
+</div>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
 <dd class="field-odd"><p><strong>seed</strong> – the seed to set within the “with” context</p>
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@ -18,13 +18,16 @@ def from_text(path, encoding='utf-8', verbose=1, class2int=True):
    for line in file:
        line = line.strip()
        if line:
-            label, sentence = line.split('\t')
-            sentence = sentence.strip()
-            if class2int:
-                label = int(label)
-            if sentence:
-                all_sentences.append(sentence)
-                all_labels.append(label)
+            try:
+                label, sentence = line.split('\t')
+                sentence = sentence.strip()
+                if class2int:
+                    label = int(label)
+                if sentence:
+                    all_sentences.append(sentence)
+                    all_labels.append(label)
+            except ValueError:
+                print(f'format error in {line}')
    return all_sentences, all_labels


--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -5,6 +5,25 @@ import numpy as np


 def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
+    """
+    Generates vectors of prevalence values artificially drawn from an exhaustive grid of prevalence values. The
+    number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example,
+    `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only
+    valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each
+    valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be
+    implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained
+    to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to 1).
+
+    :param dimensions: the number of classes
+    :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid
+        (default is 21)
+    :param repeat: number of copies for each valid prevalence vector (default is 1)
+    :param return_constrained_dim: set to True to return all dimensions, or to False (default) for ommitting the
+        constrained dimension
+    :return: an ndarray of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)`
+        if `return_constrained_dim=False`, where `n` is the number of valid combinations found in the grid multiplied
+        by `repeat`
+    """
    s = np.linspace(0., 1., n_prevalences, endpoint=True)
    s = [s] * (dimensions - 1)
    prevs = [p for p in itertools.product(*s, repeat=1) if sum(p)<=1]
@ -18,9 +37,10 @@ def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, retur

 def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
    """
-    Produces a uniformly separated values of prevalence. By default, produces an array 21 prevalences, with step 0.05
-    and with the limits smoothed, i.e.:
+    Produces a uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with
+    step 0.05 and with the limits smoothed, i.e.:
    [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]
+
    :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)
    :param repeat: number of times each prevalence is to be repeated (defaults to 1)
    :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1
@ -36,12 +56,20 @@ def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
    return p


-def prevalence_from_labels(labels, classes_):
+def prevalence_from_labels(labels, classes):
+    """
+    Computed the prevalence values from a vector of labels.
+
+    :param labels: array-like of shape `(n_instances)` with the label for each instance
+    :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when
+        some classes have no examples.
+    :return: an ndarray of shape `(len(classes))` with the class prevalence values
+    """
    if labels.ndim != 1:
        raise ValueError(f'param labels does not seem to be a ndarray of label predictions')
    unique, counts = np.unique(labels, return_counts=True)
    by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
-    prevalences = np.asarray([by_class[class_] for class_ in classes_], dtype=np.float)
+    prevalences = np.asarray([by_class[class_] for class_ in classes], dtype=np.float)
    prevalences /= prevalences.sum()
    return prevalences

--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -151,9 +151,11 @@ class GridSearchQ(BaseQuantifier):
    def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float, Callable] = None):
        """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
            the error metric.
+
        :param training: the training set on which to optimize the hyperparameters
        :param val_split: either a LabelledCollection on which to test the performance of the different settings, or
            a float in [0,1] indicating the proportion of labelled data to extract from the training set
+        :return: self
        """
        if val_split is None:
            val_split = self.val_split
@ -213,15 +215,21 @@ class GridSearchQ(BaseQuantifier):
        return self

    def quantify(self, instances):
-        """Estimate class prevalence values
+        """Estimate class prevalence values using the best model found after calling the :meth:`fit` method.

        :param instances: sample contanining the instances
+        :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
+            by the model selection process.
        """
        assert hasattr(self, 'best_model_'), 'quantify called before fit'
        return self.best_model().quantify(instances)

    @property
    def classes_(self):
+        """
+        Classes on which the quantifier has been trained on.
+        :return: a ndarray of shape `(n_classes)` with the class identifiers
+        """
        return self.best_model().classes_

    def set_params(self, **parameters):
@ -240,6 +248,12 @@ class GridSearchQ(BaseQuantifier):
        return self.param_grid

    def best_model(self):
+        """
+        Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
+        of hyper-parameters that minimized the error function.
+
+        :return: a trained quantifier
+        """
        if hasattr(self, 'best_model_'):
            return self.best_model_
        raise ValueError('best_model called before fit')
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -82,7 +82,7 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
                  bbox_to_anchor=(1, -0.5),
                  ncol=(len(method_names)+1)//2)

-    save_or_show(savepath)
+    _save_or_show(savepath)


 def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
@ -116,12 +116,14 @@ def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title
    plt.xticks(rotation=45)
    ax.set(ylabel='error bias', title=title)

-    save_or_show(savepath)
+    _save_or_show(savepath)


 def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=cm.tab10,
                     vertical_xticks=False, legend=True, savepath=None):
    """
+    Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)
+    for different bins of (true) prevalence of the positive classs, for each quantification method.

    :param method_names: array-like with the method names for each experiment
    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
@ -132,7 +134,7 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
    :param title: the title to be displayed in the plot
    :param nbins: number of bins
    :param colormap: the matplotlib colormap to use (default cm.tab10)
-    :param vertical_xticks:
+    :param vertical_xticks: whether or not to add secondary grid (default is False)
    :param legend: whether or not to display the legend (default is True)
    :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
    """
@ -202,39 +204,44 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N

    # x-axis and y-axis labels and limits
    ax.set(xlabel='prevalence', ylabel='error bias', title=title)
-    # ax.set_ylim(-1, 1)
    ax.set_xlim(0, 1)

-    save_or_show(savepath)
+    _save_or_show(savepath)


-def _merge(method_names, true_prevs, estim_prevs):
-    ndims = true_prevs[0].shape[1]
-    data = defaultdict(lambda: {'true': np.empty(shape=(0, ndims)), 'estim': np.empty(shape=(0, ndims))})
-    method_order=[]
-    for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
-        data[method]['true'] = np.concatenate([data[method]['true'], true_prev])
-        data[method]['estim'] = np.concatenate([data[method]['estim'], estim_prev])
-        if method not in method_order:
-            method_order.append(method)
-    true_prevs_ = [data[m]['true'] for m in method_order]
-    estim_prevs_ = [data[m]['estim'] for m in method_order]
-    return method_order, true_prevs_, estim_prevs_
-
-
-def _set_colors(ax, n_methods):
-    NUM_COLORS = n_methods
-    cm = plt.get_cmap('tab20')
-    ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
-
-
-def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, error_name='ae', show_std=False,
+def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
+                   n_bins=20, error_name='ae', show_std=False,
                   show_density=True,
                   logscale=False,
                   title=f'Quantification error as a function of distribution shift',
-                   savepath=None,
                   vlines=None,
-                   method_order=None):
+                   method_order=None,
+                   savepath=None):
+    """
+    Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift
+    (along the y-axis, as measured in terms of :meth:`quapy.error.ae`). This plot is useful especially for multiclass
+    problems, in which "diagonal plots" may be cumbersone, and in order to gain understanding about how methods
+    fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the
+    high-shift regime).
+
+    :param method_names: array-like with the method names for each experiment
+    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
+        each experiment
+    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
+        for each experiment
+    :param tr_prevs: training prevalence of each experiment
+    :param n_bins: number of bins in which the y-axis is to be divided (default is 20)
+    :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")
+    :param show_std: whether or not to show standard deviations as color bands (default is False)
+    :param show_density: whether or not to display the distribution of experiments for each bin (default is True)
+    :param logscale: whether or not to log-scale the y-error measure (default is False)
+    :param title: title of the plot (default is "Quantification error as a function of distribution shift")
+    :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space
+        using vertical dotted lines.
+    :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
+        listed in the legend and associated with matplotlib colors).
+    :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
+    """

    fig, ax = plt.subplots()
    ax.grid()
@ -245,7 +252,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
    # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
    # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
    # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
-    data = __join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
+    data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)

    _set_colors(ax, n_methods=len(method_order))

@ -302,13 +309,46 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
    ax.set_xlim(0, max_x)
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

-    save_or_show(savepath)
+    _save_or_show(savepath)


-def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, binning='isomerous',
+def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
+                                 n_bins=20, binning='isomerous',
                                 x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005,
                                 method_order=None,
                                 savepath=None):
+    """
+    Displays (only) the top performing methods for different regions of the train-test shift in form of a broken
+    bar chart, in which each method has bars only for those regions in which either one of the following conditions
+    hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different
+    (in average) as according to a two-sided t-test on independent samples at confidence `ttest_alpha`.
+    The binning can be made "isometric" (same size), or "isomerous" (same number of experiments -- default). A second
+    plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or
+    the percentiles points of the distribution (when binning="isomerous").
+
+    :param method_names: array-like with the method names for each experiment
+    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
+        each experiment
+    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
+        for each experiment
+    :param tr_prevs: training prevalence of each experiment
+    :param n_bins: number of bins in which the y-axis is to be divided (default is 20)
+    :param binning: type of binning, either "isomerous" (default) or "isometric"
+    :param x_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for
+        measuring the amount of train-test shift (default is "ae")
+    :param y_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for
+        measuring the amount of error in the prevalence estimations (default is "ae")
+    :param ttest_alpha: the confidence interval above which a p-value (two-sided t-test on independent samples) is
+        to be considered as an indicator that the two means are not statistically significantly different. Default is
+        0.005, meaning that a `p-value > 0.005` indicates the two methods involved are to be considered similar
+    :param tail_density_threshold: sets a threshold on the density of experiments (over the total number of experiments)
+        below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some
+        bins to be shown for train-test outliers.
+    :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
+        listed in the legend and associated with matplotlib colors).
+    :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
+    :return:
+    """
    assert binning in ['isomerous', 'isometric'], 'unknown binning type; valid types are "isomerous" and "isometric"'

    x_error = getattr(qp.error, x_error)
@ -317,7 +357,7 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
    # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
    # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
    # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
-    data = __join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
+    data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)

    if binning == 'isomerous':
        # take bins containing the same amount of examples
@ -449,10 +489,30 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
        ax.get_xaxis().set_visible(False)
        plt.subplots_adjust(wspace=0, hspace=0)

-    save_or_show(savepath)
+    _save_or_show(savepath)


-def save_or_show(savepath):
+def _merge(method_names, true_prevs, estim_prevs):
+    ndims = true_prevs[0].shape[1]
+    data = defaultdict(lambda: {'true': np.empty(shape=(0, ndims)), 'estim': np.empty(shape=(0, ndims))})
+    method_order=[]
+    for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
+        data[method]['true'] = np.concatenate([data[method]['true'], true_prev])
+        data[method]['estim'] = np.concatenate([data[method]['estim'], estim_prev])
+        if method not in method_order:
+            method_order.append(method)
+    true_prevs_ = [data[m]['true'] for m in method_order]
+    estim_prevs_ = [data[m]['estim'] for m in method_order]
+    return method_order, true_prevs_, estim_prevs_
+
+
+def _set_colors(ax, n_methods):
+    NUM_COLORS = n_methods
+    cm = plt.get_cmap('tab20')
+    ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
+
+
+def _save_or_show(savepath):
    # if savepath is specified, then saves the plot in that path; otherwise the plot is shown
    if savepath is not None:
        qp.util.create_parent_dir(savepath)
@ -462,7 +522,7 @@ def save_or_show(savepath):
        plt.show()


-def __join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order):
+def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order):
    data = defaultdict(lambda: {'x': np.empty(shape=(0)), 'y': np.empty(shape=(0))})

    if method_order is None:
--- a/quapy/util.py
+++ b/quapy/util.py
@ -23,6 +23,10 @@ def map_parallel(func, args, n_jobs):
    """
    Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then
    func is applied in two parallel processes to args[0:50] and to args[50:99]
+
+    :param func: function to be parallelized
+    :param args: array-like of arguments to be passed to the function in different parallel calls
+    :param n_jobs: the number of workers
    """
    args = np.asarray(args)
    slices = _get_parallel_slices(len(args), n_jobs)
@ -35,10 +39,12 @@ def map_parallel(func, args, n_jobs):
 def parallel(func, args, n_jobs):
    """
    A wrapper of multiprocessing:
-    Parallel(n_jobs=n_jobs)(
-         delayed(func)(args_i) for args_i in args
-    )
-    that takes the quapy.environ variable as input silently
+
+    >>> Parallel(n_jobs=n_jobs)(
+    >>>      delayed(func)(args_i) for args_i in args
+    >>> )
+
+    that takes the `quapy.environ` variable as input silently
    """
    def func_dec(environ, *args):
        qp.environ = environ
@ -52,8 +58,10 @@ def parallel(func, args, n_jobs):
 def temp_seed(seed):
    """
    Can be used in a "with" context to set a temporal seed without modifying the outer numpy's current state. E.g.:
-    with temp_seed(random_seed):
-      # do any computation depending on np.random functionality
+
+    >>> with temp_seed(random_seed):
+    >>>  pass # do any computation depending on np.random functionality
+
    :param seed: the seed to set within the "with" context
    """
    state = np.random.get_state()
@ -65,6 +73,12 @@ def temp_seed(seed):


 def download_file(url, archive_filename):
+    """
+    Downloads a file from a url
+
+    :param url: the url
+    :param archive_filename: destination filename
+    """
    def progress(blocknum, bs, size):
        total_sz_mb = '%.2f MB' % (size / 1e6)
        current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
@ -74,31 +88,62 @@ def download_file(url, archive_filename):
    print("")


-def download_file_if_not_exists(url, archive_path):
-    if os.path.exists(archive_path):
+def download_file_if_not_exists(url, archive_filename):
+    """
+    Dowloads a function (using :meth:`download_file`) if the file does not exist.
+
+    :param url: the url
+    :param archive_filename: destination filename
+    """
+    if os.path.exists(archive_filename):
        return
-    create_if_not_exist(os.path.dirname(archive_path))
-    download_file(url,archive_path)
+    create_if_not_exist(os.path.dirname(archive_filename))
+    download_file(url, archive_filename)


 def create_if_not_exist(path):
+    """
+    An alias to `os.makedirs(path, exist_ok=True)` that also returns the path. This is useful in cases like, e.g.:
+
+    >>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))
+
+    :param path: path to create
+    :return: the path itself
+    """
    os.makedirs(path, exist_ok=True)
    return path


 def get_quapy_home():
+    """
+    Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets.
+
+    :return: a string representing the path
+    """
    home = os.path.join(str(Path.home()), 'quapy_data')
    os.makedirs(home, exist_ok=True)
    return home


 def create_parent_dir(path):
+    """
+    Creates the parent dir (if any) of a given path, if not exists. E.g., for `./path/to/file.txt`, the path `./path/to`
+    is created.
+
+    :param path: the path
+    """
    parentdir = Path(path).parent
    if parentdir:
        os.makedirs(parentdir, exist_ok=True)


 def save_text_file(path, text):
+    """
+    Saves a text file to disk, given its full path, and creates the parent directory if missing.
+
+    :param path: path where to save the path.
+    :param text: text to save.
+    """
    create_parent_dir(path)
    with open(text, 'wt') as fout:
        fout.write(text)
@ -108,10 +153,12 @@ def pickled_resource(pickle_path:str, generation_func:callable, *args):
    """
    Allows for fast reuse of resources that are generated only once by calling generation_func(*args). The next times
    this function is invoked, it loads the pickled resource. Example:
-    def some_array(n):
-        return np.random.rand(n)
-    pickled_resource('./my_array.pkl', some_array, 10)  # the resource does not exist: it is created by some_array(10)
-    pickled_resource('./my_array.pkl', some_array, 10)  # the resource exists: it is loaded from './my_array.pkl'
+
+    >>> def some_array(n):  # a mock resource created with one parameter (`n`)
+    >>>     return np.random.rand(n)
+    >>> pickled_resource('./my_array.pkl', some_array, 10)  # the resource does not exist: it is created by calling some_array(10)
+    >>> pickled_resource('./my_array.pkl', some_array, 10)  # the resource exists; it is loaded from './my_array.pkl'
+
    :param pickle_path: the path where to save (first time) and load (next times) the resource
    :param generation_func: the function that generates the resource, in case it does not exist in pickle_path
    :param args: any arg that generation_func uses for generating the resources
@ -130,8 +177,36 @@ def pickled_resource(pickle_path:str, generation_func:callable, *args):


 class EarlyStop:
+    """
+    A class implementing the early-stopping condition typically used for training neural networks.
+
+    :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a
+    held-out validation split) can be found to be worse than the best one obtained so far, before flagging the
+    stopping condition. An instance of this class is `callable`, and is to be used as follows:
+
+    >>> earlystop = EarlyStop(patience=2, lower_is_better=True)
+    >>> earlystop(0.9, epoch=0)
+    >>> earlystop(0.7, epoch=1)
+    >>> earlystop.IMPROVED  # is True
+    >>> earlystop(1.0, epoch=2)
+    >>> earlystop.STOP  # is False (patience=1)
+    >>> earlystop(1.0, epoch=3)
+    >>> earlystop.STOP  # is True (patience=0)
+    >>> earlystop.best_epoch  # is 1
+    >>> earlystop.best_score  # is 0.7
+
+
+    :param lower_is_better: if True (default) the metric is to be minimized.
+
+    :ivar best_score: keeps track of the best value seen so far
+    :ivar best_epoch: keeps track of the epoch in which the best score was set
+    :ivar STOP: flag (boolean) indicating the stopping condition
+    :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call
+
+    """

    def __init__(self, patience, lower_is_better=True):
+
        self.PATIENCE_LIMIT = patience
        self.better = lambda a,b: a<b if lower_is_better else a>b
        self.patience = patience
@ -141,6 +216,14 @@ class EarlyStop:
        self.IMPROVED = False

    def __call__(self, watch_score, epoch):
+        """
+        Commits the new score found in epoch `epoch`. If the score improves over the best score found so far, then
+        the patiente counter gets reset. If otherwise, the patience counter is decreased, and in case it reachs 0,
+        the flag STOP becomes True.
+
+        :param watch_score: the new score
+        :param epoch: the current epoch
+        """
        self.IMPROVED = (self.best_score is None or self.better(watch_score, self.best_score))
        if self.IMPROVED:
            self.best_score = watch_score