From aea65f4e2bac66e976baf7cb662eee2649aef6d1 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 6 Jul 2021 10:18:08 +0200
Subject: [PATCH] adding density to error by drift plots

---
 TweetSentQuant/gen_plots.py | 10 +++++-----
 quapy/plot.py               | 24 ++++++++++++++----------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/TweetSentQuant/gen_plots.py b/TweetSentQuant/gen_plots.py
index dc5a0a9..360a96b 100644
--- a/TweetSentQuant/gen_plots.py
+++ b/TweetSentQuant/gen_plots.py
@@ -82,13 +82,13 @@ new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
 new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
 
 plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
-# plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
+plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
 
-# diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
-# diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
+diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
+diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
 
-# binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
-# binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
+binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
+binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
 
 #binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
 #binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
diff --git a/quapy/plot.py b/quapy/plot.py
index d77b82a..28e9716 100644
--- a/quapy/plot.py
+++ b/quapy/plot.py
@@ -196,9 +196,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
     _set_colors(ax, n_methods=len(method_order))
 
     bins = np.linspace(0, 1, n_bins+1)
-    inds_histogram_global = np.zeros(n_bins, dtype=np.float)  # we use this to keep track of how many datapoits contribute to each bin
+    # inds_histogram_global = np.zeros(n_bins, dtype=np.float)  # we use this to keep track of how many datapoits contribute to each bin
     binwidth = 1 / n_bins
-    min_x, max_x = None, None
+    min_x, max_x, max_y = None, None, None
+    npoints = np.zeros(len(bins), dtype=float)
     for method in method_order:
         tr_test_drifts = data[method]['x']
         method_drifts = data[method]['y']
@@ -206,31 +207,34 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
             method_drifts=np.log(1+method_drifts)
 
         inds = np.digitize(tr_test_drifts, bins, right=True)
-        inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0]
+        # inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0]
 
-        xs, ys, ystds, npoints = [], [], [], []
-        for ind in range(len(bins)):
+        xs, ys, ystds = [], [], []
+        for p,ind in enumerate(range(len(bins))):
             selected = inds==ind
             if selected.sum() > 0:
-                xs.append(ind*binwidth)
+                xs.append(ind*binwidth-binwidth/2)
                 ys.append(np.mean(method_drifts[selected]))
                 ystds.append(np.std(method_drifts[selected]))
-                npoints.append(len(method_drifts[selected]))
+                npoints[p]+=len(method_drifts[selected])
 
         xs = np.asarray(xs)
         ys = np.asarray(ys)
         ystds = np.asarray(ystds)
 
-        min_x_method, max_x_method = xs.min(), xs.max()
+        min_x_method, max_x_method, max_y_method = xs.min(), xs.max(), ys.max()
         min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
         max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
+        max_y = max_y_method if max_y is None or max_y_method > max_y else max_y
 
-        p = ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
-        ax.scatter(xs, ys, s=npoints, marker="h", color=p[-1].get_color())
+        ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
 
         if show_std:
             ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
 
+    # ax.scatter([ind*binwidth for ind in range(len(bins))], [0]*len(npoints), s=npoints, marker="h", alpha=0.15, color='k')
+    ax.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], max_y*npoints/np.max(npoints), alpha=0.15, color='k', width=binwidth, label='density')
+
     # xs = bins[:-1]
     # ys = inds_histogram_global
     # print(xs.shape, ys.shape)