1
0
Fork 0

adding density to error by drift plots

This commit is contained in:
Alejandro Moreo Fernandez 2021-07-06 10:18:08 +02:00
parent fc228a722d
commit aea65f4e2b
2 changed files with 19 additions and 15 deletions

View File

@ -82,13 +82,13 @@ new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
# plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
# diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
# diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
# binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
# binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
#binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
#binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)

View File

@ -196,9 +196,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
_set_colors(ax, n_methods=len(method_order))
bins = np.linspace(0, 1, n_bins+1)
inds_histogram_global = np.zeros(n_bins, dtype=np.float) # we use this to keep track of how many datapoits contribute to each bin
# inds_histogram_global = np.zeros(n_bins, dtype=np.float) # we use this to keep track of how many datapoits contribute to each bin
binwidth = 1 / n_bins
min_x, max_x = None, None
min_x, max_x, max_y = None, None, None
npoints = np.zeros(len(bins), dtype=float)
for method in method_order:
tr_test_drifts = data[method]['x']
method_drifts = data[method]['y']
@ -206,31 +207,34 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
method_drifts=np.log(1+method_drifts)
inds = np.digitize(tr_test_drifts, bins, right=True)
inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0]
# inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0]
xs, ys, ystds, npoints = [], [], [], []
for ind in range(len(bins)):
xs, ys, ystds = [], [], []
for p,ind in enumerate(range(len(bins))):
selected = inds==ind
if selected.sum() > 0:
xs.append(ind*binwidth)
xs.append(ind*binwidth-binwidth/2)
ys.append(np.mean(method_drifts[selected]))
ystds.append(np.std(method_drifts[selected]))
npoints.append(len(method_drifts[selected]))
npoints[p]+=len(method_drifts[selected])
xs = np.asarray(xs)
ys = np.asarray(ys)
ystds = np.asarray(ystds)
min_x_method, max_x_method = xs.min(), xs.max()
min_x_method, max_x_method, max_y_method = xs.min(), xs.max(), ys.max()
min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
max_y = max_y_method if max_y is None or max_y_method > max_y else max_y
p = ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
ax.scatter(xs, ys, s=npoints, marker="h", color=p[-1].get_color())
ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
if show_std:
ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
# ax.scatter([ind*binwidth for ind in range(len(bins))], [0]*len(npoints), s=npoints, marker="h", alpha=0.15, color='k')
ax.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], max_y*npoints/np.max(npoints), alpha=0.15, color='k', width=binwidth, label='density')
# xs = bins[:-1]
# ys = inds_histogram_global
# print(xs.shape, ys.shape)