From d1b449d2e9c7657b02fc601392723e5e0907f335 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 7 Jan 2021 17:58:48 +0100
Subject: [PATCH] plot functionality added

---
 plot_example.py             |  48 +++++++++
 quapy/__init__.py           |   4 +-
 quapy/evaluation.py         |   4 +-
 quapy/method/__init__.py    |  14 +--
 quapy/method/aggregative.py |  70 ++++++-------
 quapy/method/base.py        |  12 +++
 quapy/method/neural.py      |  10 +-
 quapy/plot.py               | 202 ++++++++++++++++++++++++++++++++++++
 quapy/util.py               |   4 +
 9 files changed, 318 insertions(+), 50 deletions(-)
 create mode 100644 plot_example.py
 create mode 100644 quapy/plot.py

diff --git a/plot_example.py b/plot_example.py
new file mode 100644
index 0000000..346455e
--- /dev/null
+++ b/plot_example.py
@@ -0,0 +1,48 @@
+from sklearn.model_selection import GridSearchCV
+import numpy as np
+import quapy as qp
+from sklearn.linear_model import LogisticRegression
+
+sample_size = 500
+qp.environ['SAMPLE_SIZE'] = sample_size
+
+
+
+def gen_data():
+
+    data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
+
+    models = [
+        qp.method.aggregative.CC,
+        qp.method.aggregative.ACC,
+        qp.method.aggregative.PCC,
+        qp.method.aggregative.PACC,
+        qp.method.aggregative.HDy,
+        qp.method.aggregative.EMQ,
+        qp.method.meta.ECC,
+        qp.method.meta.EACC,
+        qp.method.meta.EHDy,
+    ]
+
+    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
+    for Quantifier in models:
+        print(f'training {Quantifier.__name__}')
+        lr = LogisticRegression(max_iter=1000, class_weight='balanced')
+        # lr = GridSearchCV(lr, param_grid={'C':np.logspace(-3,3,7)}, n_jobs=-1)
+        model = Quantifier(lr).fit(data.training)
+        true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
+            model, data.test, sample_size, n_repetitions=20, n_prevpoints=11)
+
+        method_names.append(Quantifier.__name__)
+        true_prevs.append(true_prev)
+        estim_prevs.append(estim_prev)
+        tr_prevs.append(data.training.prevalence())
+
+    return method_names, true_prevs, estim_prevs, tr_prevs
+
+method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('./plots/plot_data.pkl', gen_data)
+
+qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=11, savepath='./plots/err_drift.png')
+qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots/bin_diag.png')
+qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
+qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=11, savepath='./plots/bin_bias_bin.png')
diff --git a/quapy/__init__.py b/quapy/__init__.py
index ac09f24..a2f98fd 100644
--- a/quapy/__init__.py
+++ b/quapy/__init__.py
@@ -4,6 +4,8 @@ from . import functional
 from . import method
 from . import data
 from . import evaluation
+from . import plot
+from . import util
 from method.aggregative import isaggregative, isprobabilistic
 
 
@@ -17,4 +19,4 @@ environ = {
 
 
 def isbinary(x):
-    return data.isbinary(x) or method.aggregative.isbinary(x)
\ No newline at end of file
+    return data.isbinary(x) or method.isbinary(x)
\ No newline at end of file
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 4c56b1e..3fc3907 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -30,9 +30,9 @@ def artificial_sampling_prediction(
     :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
     any other random process.
     :param verbose: if True, shows a progress bar
-    :return: two ndarrays of [m,n] with m the number of samples (n_prevpoints*n_repetitions) and n the
+    :return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the
      number of classes. The first one contains the true prevalences for the samples generated while the second one
-     containing the the prevalences estimations
+     contains the the prevalence estimations
     """
 
     with temp_seed(random_seed):
diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py
index f6bc8d5..47d2b3a 100644
--- a/quapy/method/__init__.py
+++ b/quapy/method/__init__.py
@@ -5,13 +5,13 @@ from . import meta
 
 
 AGGREGATIVE_METHODS = {
-    aggregative.ClassifyAndCount,
-    aggregative.AdjustedClassifyAndCount,
-    aggregative.ProbabilisticClassifyAndCount,
-    aggregative.ProbabilisticAdjustedClassifyAndCount,
-    aggregative.ExplicitLossMinimisation,
-    aggregative.ExpectationMaximizationQuantifier,
-    aggregative.HellingerDistanceY
+    aggregative.CC,
+    aggregative.ACC,
+    aggregative.PCC,
+    aggregative.PACC,
+    aggregative.ELM,
+    aggregative.EMQ,
+    aggregative.HDy
 }
 
 NON_AGGREGATIVE_METHODS = {
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index c973613..2613d8e 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1,5 +1,6 @@
 import numpy as np
 from copy import deepcopy
+from sklearn.base import BaseEstimator, clone
 import functional as F
 import error
 from method.base import BaseQuantifier, BinaryQuantifier
@@ -130,13 +131,13 @@ def training_helper(learner,
 
 # Methods
 # ------------------------------------
-class ClassifyAndCount(AggregativeQuantifier):
+class CC(AggregativeQuantifier):
     """
     The most basic Quantification method. One that simply classifies all instances and countes how many have been
     attributed each of the classes in order to compute class prevalence estimates.
     """
 
-    def __init__(self, learner):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True):
@@ -153,9 +154,9 @@ class ClassifyAndCount(AggregativeQuantifier):
         return F.prevalence_from_labels(classif_predictions, self.n_classes)
 
 
-class AdjustedClassifyAndCount(AggregativeQuantifier):
+class ACC(AggregativeQuantifier):
 
-    def __init__(self, learner):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@@ -169,7 +170,7 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
         :return: self
         """
         self.learner, validation = training_helper(self.learner, data, fit_learner, val_split=val_split)
-        self.cc = ClassifyAndCount(self.learner)
+        self.cc = CC(self.learner)
         y_ = self.classify(validation.instances)
         y  = validation.labels
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
@@ -182,7 +183,7 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
 
     def aggregate(self, classif_predictions):
         prevs_estim = self.cc.aggregate(classif_predictions)
-        return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
 
     @classmethod
     def solve_adjustment(cls, PteCondEstim, prevs_estim):
@@ -198,8 +199,8 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
         return adjusted_prevs
 
 
-class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
-    def __init__(self, learner):
+class PCC(AggregativeProbabilisticQuantifier):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data : LabelledCollection, fit_learner=True):
@@ -210,9 +211,9 @@ class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
         return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
 
 
-class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
+class PACC(AggregativeProbabilisticQuantifier):
 
-    def __init__(self, learner):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@@ -228,7 +229,7 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
         self.learner, validation = training_helper(
             self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split
         )
-        self.pcc = ProbabilisticClassifyAndCount(self.learner)
+        self.pcc = PCC(self.learner)
         y_ = self.soft_classify(validation.instances)
         y  = validation.labels
         confusion = np.empty(shape=(data.n_classes, data.n_classes))
@@ -246,7 +247,7 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
 
     def aggregate(self, classif_posteriors):
         prevs_estim = self.pcc.aggregate(classif_posteriors)
-        return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
 
     def classify(self, data):
         return self.pcc.classify(data)
@@ -255,12 +256,12 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
         return self.pcc.posterior_probabilities(data)
 
 
-class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
+class EMQ(AggregativeProbabilisticQuantifier):
 
     MAX_ITER = 1000
     EPSILON = 1e-4
 
-    def __init__(self, learner):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True):
@@ -279,7 +280,7 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
 
         s, converged = 0, False
         qs_prev_ = None
-        while not converged and s < ExpectationMaximizationQuantifier.MAX_ITER:
+        while not converged and s < EMQ.MAX_ITER:
             # E-step: ps is Ps(y=+1|xi)
             ps_unnormalized = (qs / Ptr) * Px
             ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1)
@@ -299,14 +300,14 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
         return qs
 
 
-class HellingerDistanceY(AggregativeProbabilisticQuantifier, BinaryQuantifier):
+class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
     """
     Implementation of the method based on the Hellinger Distance y (HDy) proposed by
     González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution
     estimation based on the Hellinger distance. Information Sciences, 218:146–164.
     """
 
-    def __init__(self, learner):
+    def __init__(self, learner:BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@@ -353,7 +354,7 @@ class HellingerDistanceY(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         return np.asarray([1-pos_class_prev, pos_class_prev])
 
 
-class ExplicitLossMinimisation(AggregativeQuantifier, BinaryQuantifier):
+class ELM(AggregativeQuantifier, BinaryQuantifier):
 
     def __init__(self, svmperf_base, loss, **kwargs):
         self.svmperf_base = svmperf_base
@@ -374,38 +375,38 @@ class ExplicitLossMinimisation(AggregativeQuantifier, BinaryQuantifier):
 
 
 
-class SVMQ(ExplicitLossMinimisation):
+class SVMQ(ELM):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
 
 
-class SVMKLD(ExplicitLossMinimisation):
+class SVMKLD(ELM):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
 
 
-class SVMNKLD(ExplicitLossMinimisation):
+class SVMNKLD(ELM):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
 
 
-class SVMAE(ExplicitLossMinimisation):
+class SVMAE(ELM):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
 
 
-class SVMRAE(ExplicitLossMinimisation):
+class SVMRAE(ELM):
     def __init__(self, svmperf_base, **kwargs):
         super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
 
 
-CC = ClassifyAndCount
-ACC = AdjustedClassifyAndCount
-PCC = ProbabilisticClassifyAndCount
-PACC = ProbabilisticAdjustedClassifyAndCount
-ELM = ExplicitLossMinimisation
-EMQ = ExpectationMaximizationQuantifier
-HDy = HellingerDistanceY
+ClassifyAndCount = CC
+AdjustedClassifyAndCount = ACC
+ProbabilisticClassifyAndCount = PCC
+ProbabilisticAdjustedClassifyAndCount = PACC
+ExplicitLossMinimisation = ELM
+ExpectationMaximizationQuantifier = EMQ
+HellingerDistanceY = HDy
 
 
 class OneVsAll(AggregativeQuantifier):
@@ -414,7 +415,7 @@ class OneVsAll(AggregativeQuantifier):
     quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
     This variant was used, along with the ExplicitLossMinimization quantifier in
     Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
-    Social Network Analysis and Mining6(19), 1–22 (2016)
+    Social Network Analysis and Mining 6(19), 1–22 (2016)
     """
 
     def __init__(self, binary_quantifier, n_jobs=-1):
@@ -484,15 +485,14 @@ class OneVsAll(AggregativeQuantifier):
         self.dict_binary_quantifiers[c].fit(bindata)
 
 
-def isaggregative(model):
+def isaggregative(model:BaseQuantifier):
     return isinstance(model, AggregativeQuantifier)
 
 
-def isprobabilistic(model):
+def isprobabilistic(model:BaseQuantifier):
     return isinstance(model, AggregativeProbabilisticQuantifier)
 
 
-def isbinary(model):
-    return isinstance(model, BinaryQuantifier)
+
 
 
diff --git a/quapy/method/base.py b/quapy/method/base.py
index 6134dea..2627b87 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -20,12 +20,24 @@ class BaseQuantifier(metaclass=ABCMeta):
     @abstractmethod
     def get_params(self, deep=True): ...
 
+    @property
+    def binary(self):
+        return False
+
 
 class BinaryQuantifier(BaseQuantifier):
     def _check_binary(self, data: LabelledCollection, quantifier_name):
         assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \
                             f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
 
+    @property
+    def binary(self):
+        return True
+
+
+def isbinary(model:BaseQuantifier):
+    return model.binary
+
 
 # class OneVsAll(AggregativeQuantifier):
 #     """
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index da4b35c..9adf175 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -76,11 +76,11 @@ class QuaNetTrainer(BaseQuantifier):
         self.tr_prev = data.prevalence()
 
         self.quantifiers = {
-            'cc': ClassifyAndCount(self.learner).fit(data, fit_learner=False),
-            'acc': AdjustedClassifyAndCount(self.learner).fit(data, fit_learner=False),
-            'pcc': ProbabilisticClassifyAndCount(self.learner).fit(data, fit_learner=False),
-            'pacc': ProbabilisticAdjustedClassifyAndCount(self.learner).fit(data, fit_learner=False),
-            'emq': ExpectationMaximizationQuantifier(self.learner).fit(data, fit_learner=False),
+            'cc': CC(self.learner).fit(data, fit_learner=False),
+            'acc': ACC(self.learner).fit(data, fit_learner=False),
+            'pcc': PCC(self.learner).fit(data, fit_learner=False),
+            'pacc': PACC(self.learner).fit(data, fit_learner=False),
+            'emq': EMQ(self.learner).fit(data, fit_learner=False),
         }
 
         self.status = {
diff --git a/quapy/plot.py b/quapy/plot.py
new file mode 100644
index 0000000..2b9375b
--- /dev/null
+++ b/quapy/plot.py
@@ -0,0 +1,202 @@
+from collections import defaultdict
+import matplotlib.pyplot as plt
+from matplotlib import cm
+import numpy as np
+import quapy as qp
+
+
+plt.rcParams['figure.figsize'] = [12, 8]
+plt.rcParams['figure.dpi'] = 200
+
+
+def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
+    fig, ax = plt.subplots()
+    ax.set_aspect('equal')
+    ax.grid()
+    ax.plot([0, 1], [0, 1], '--k', label='ideal', zorder=1)
+
+    for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
+        true_prev = true_prev[:,pos_class]
+        estim_prev = estim_prev[:,pos_class]
+
+        x_ticks = np.unique(true_prev)
+        x_ticks.sort()
+        y_ave = np.asarray([estim_prev[true_prev == x].mean() for x in x_ticks])
+        y_std = np.asarray([estim_prev[true_prev == x].std() for x in x_ticks])
+
+        ax.errorbar(x_ticks, y_ave, fmt='-', marker='o', label=method, markersize=3, zorder=2)
+        ax.fill_between(x_ticks, y_ave - y_std, y_ave + y_std, alpha=0.25)
+
+    ax.set(xlabel='true prevalence', ylabel='estimated prevalence', title=title)
+    ax.set_ylim(0, 1)
+    ax.set_xlim(0, 1)
+
+    box = ax.get_position()
+    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+    save_or_show(savepath)
+
+
+def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
+    fig, ax = plt.subplots()
+    ax.grid()
+
+    data, labels = [], []
+    for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
+        true_prev = true_prev[:,pos_class]
+        estim_prev = estim_prev[:,pos_class]
+        data.append(estim_prev-true_prev)
+        labels.append(method)
+
+    ax.boxplot(data, labels=labels, patch_artist=False, showmeans=True)
+    ax.set(ylabel='error bias', title=title)
+
+    save_or_show(savepath)
+
+
+def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=21, colormap=cm.tab10,
+                     vertical_xticks=False, savepath=None):
+    from pylab import boxplot, plot, setp
+
+    fig, ax = plt.subplots()
+    ax.grid()
+
+    bins = np.linspace(0, 1, nbins)
+    binwidth = 1/(nbins - 1)
+    data = {}
+    for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
+        true_prev = true_prev[:,pos_class]
+        estim_prev = estim_prev[:,pos_class]
+
+        data[method] = []
+        inds = np.digitize(true_prev, bins, right=True)
+        for ind in range(len(bins)):
+            selected = inds==ind
+            data[method].append(estim_prev[selected] - true_prev[selected])
+
+    nmethods = len(method_names)
+    boxwidth = binwidth/(nmethods+1)
+    for i,bin in enumerate(bins[:-1]):
+        boxdata = [data[method][i] for method in method_names]
+        positions = [bin+(i*boxwidth)+boxwidth for i,_ in enumerate(method_names)]
+        box = boxplot(boxdata, showmeans=False, positions=positions, widths = boxwidth, sym='+', patch_artist=True)
+        for boxid in range(len(method_names)):
+            c = colormap.colors[boxid]
+            setp(box['fliers'][boxid], color=c, marker='+', markersize=3., markeredgecolor=c)
+            setp(box['boxes'][boxid], color=c)
+            setp(box['medians'][boxid], color='k')
+
+    major_xticks_positions, minor_xticks_positions = [], []
+    major_xticks_labels, minor_xticks_labels = [], []
+    for i,b in enumerate(bins[:-1]):
+        major_xticks_positions.append(b)
+        minor_xticks_positions.append(b + binwidth / 2)
+        major_xticks_labels.append('')
+        minor_xticks_labels.append(f'[{bins[i]:.2f}-{bins[i + 1]:.2f})')
+    ax.set_xticks(major_xticks_positions)
+    ax.set_xticks(minor_xticks_positions, minor=True)
+    ax.set_xticklabels(major_xticks_labels)
+    ax.set_xticklabels(minor_xticks_labels, minor=True, rotation='vertical' if vertical_xticks else 'horizontal')
+
+    if vertical_xticks:
+        # Pad margins so that markers don't get clipped by the axes
+        plt.margins(0.2)
+        # Tweak spacing to prevent clipping of tick-labels
+        plt.subplots_adjust(bottom=0.15)
+
+    # adds the legend to the list hs, initialized with the "ideal" quantifier (one that has 0 bias across all bins. i.e.
+    # a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting
+    # set_visible to False for all but the first element) after the legend has been placed
+    hs=[ax.plot([0, 1], [0, 0], '-k', zorder=2)[0]]
+    for colorid in range(len(method_names)):
+        h, = plot([1, 1], '-s', markerfacecolor=colormap.colors[colorid], color='k',
+                  mec=colormap.colors[colorid], linewidth=1.)
+        hs.append(h)
+    box = ax.get_position()
+    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    ax.legend(hs, ['ideal']+method_names, loc='center left', bbox_to_anchor=(1, 0.5))
+    [h.set_visible(False) for h in hs[1:]]
+
+    # x-axis and y-axis labels and limits
+    ax.set(xlabel='prevalence', ylabel='error bias', title=title)
+    # ax.set_ylim(-1, 1)
+    ax.set_xlim(0, 1)
+
+    save_or_show(savepath)
+
+
+def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=21, error_name='ae', show_std=True,
+                        title=f'Quantification error as a function of distribution shift',
+                        savepath=None):
+
+    fig, ax = plt.subplots()
+    ax.grid()
+
+    x_error = qp.error.ae
+    y_error = getattr(qp.error, error_name)
+    ndims = tr_prevs[0].shape[-1]
+
+    # join all data, and keep the order in which the methods appeared for the first time
+    data = defaultdict(lambda:{'x':np.empty(shape=(0)), 'y':np.empty(shape=(0))})
+    method_order = []
+    for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs):
+        tr_prev_i = np.repeat(tr_prev_i.reshape(1,-1), repeats=test_prevs_i.shape[0], axis=0)
+
+        tr_test_drifts = x_error(test_prevs_i, tr_prev_i)
+        data[method]['x'] = np.concatenate([data[method]['x'], tr_test_drifts])
+
+        method_drifts = y_error(test_prevs_i, estim_prevs_i)
+        data[method]['y'] = np.concatenate([data[method]['y'], method_drifts])
+
+        if method not in method_order:
+            method_order.append(method)
+
+    bins = np.linspace(0, 1, n_bins)
+    binwidth = 1 / (n_bins - 1)
+    min_x, max_x = None, None
+    for method in method_order:
+        tr_test_drifts = data[method]['x']
+        method_drifts = data[method]['y']
+
+        inds = np.digitize(tr_test_drifts, bins, right=True)
+        xs, ys, ystds = [], [], []
+        for ind in range(len(bins)):
+            selected = inds==ind
+            if selected.sum() > 0:
+                xs.append(ind*binwidth)
+                ys.append(np.mean(method_drifts[selected]))
+                ystds.append(np.std(method_drifts[selected]))
+
+        xs = np.asarray(xs)
+        ys = np.asarray(ys)
+        ystds = np.asarray(ystds)
+
+        min_x_method, max_x_method = xs.min(), xs.max()
+        min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
+        max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
+
+        ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
+        if show_std:
+            ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
+
+    ax.set(xlabel=f'Distribution shift between training set and test sample',
+           ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
+           title=title)
+    box = ax.get_position()
+    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+    ax.set_xlim(min_x, max_x)
+
+    save_or_show(savepath)
+
+
+def save_or_show(savepath):
+    # if savepath is specified, then saves the plot in that path; otherwise the plot is shown
+    if savepath is not None:
+        qp.util.create_parent_dir(savepath)
+        # plt.tight_layout()
+        plt.savefig(savepath)
+    else:
+        plt.show()
+
diff --git a/quapy/util.py b/quapy/util.py
index bdfbfb9..3bb9446 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -64,6 +64,10 @@ def get_quapy_home():
     return home
 
 
+def create_parent_dir(path):
+    os.makedirs(Path(path).parent, exist_ok=True)
+
+
 def pickled_resource(pickle_path:str, generation_func:callable, *args):
     if pickle_path is None:
         return generation_func(*args)