From aea65f4e2bac66e976baf7cb662eee2649aef6d1 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Tue, 6 Jul 2021 10:18:08 +0200 Subject: [PATCH] adding density to error by drift plots --- TweetSentQuant/gen_plots.py | 10 +++++----- quapy/plot.py | 24 ++++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/TweetSentQuant/gen_plots.py b/TweetSentQuant/gen_plots.py index dc5a0a9..360a96b 100644 --- a/TweetSentQuant/gen_plots.py +++ b/TweetSentQuant/gen_plots.py @@ -82,13 +82,13 @@ new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet'] new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet'] plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -# plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir) +plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir) -# diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -# diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) +diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) +diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) -# binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -# binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) +binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) +binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) #binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) #binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) diff --git a/quapy/plot.py b/quapy/plot.py index d77b82a..28e9716 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -196,9 +196,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e _set_colors(ax, n_methods=len(method_order)) bins = np.linspace(0, 1, n_bins+1) - inds_histogram_global = np.zeros(n_bins, dtype=np.float) # we use this to keep track of how many datapoits contribute to each bin + # inds_histogram_global = np.zeros(n_bins, dtype=np.float) # we use this to keep track of how many datapoits contribute to each bin binwidth = 1 / n_bins - min_x, max_x = None, None + min_x, max_x, max_y = None, None, None + npoints = np.zeros(len(bins), dtype=float) for method in method_order: tr_test_drifts = data[method]['x'] method_drifts = data[method]['y'] @@ -206,31 +207,34 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e method_drifts=np.log(1+method_drifts) inds = np.digitize(tr_test_drifts, bins, right=True) - inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0] + # inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0] - xs, ys, ystds, npoints = [], [], [], [] - for ind in range(len(bins)): + xs, ys, ystds = [], [], [] + for p,ind in enumerate(range(len(bins))): selected = inds==ind if selected.sum() > 0: - xs.append(ind*binwidth) + xs.append(ind*binwidth-binwidth/2) ys.append(np.mean(method_drifts[selected])) ystds.append(np.std(method_drifts[selected])) - npoints.append(len(method_drifts[selected])) + npoints[p]+=len(method_drifts[selected]) xs = np.asarray(xs) ys = np.asarray(ys) ystds = np.asarray(ystds) - min_x_method, max_x_method = xs.min(), xs.max() + min_x_method, max_x_method, max_y_method = xs.min(), xs.max(), ys.max() min_x = min_x_method if min_x is None or min_x_method < min_x else min_x max_x = max_x_method if max_x is None or max_x_method > max_x else max_x + max_y = max_y_method if max_y is None or max_y_method > max_y else max_y - p = ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2) - ax.scatter(xs, ys, s=npoints, marker="h", color=p[-1].get_color()) + ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2) if show_std: ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25) + # ax.scatter([ind*binwidth for ind in range(len(bins))], [0]*len(npoints), s=npoints, marker="h", alpha=0.15, color='k') + ax.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], max_y*npoints/np.max(npoints), alpha=0.15, color='k', width=binwidth, label='density') + # xs = bins[:-1] # ys = inds_histogram_global # print(xs.shape, ys.shape)