plot for avg on test added

This commit is contained in:
Lorenzo Volpi 2023-11-05 14:16:17 +01:00
parent 5f26bc7059
commit 2d73d583f4
2 changed files with 76 additions and 24 deletions

View File

@ -115,7 +115,7 @@ class CompReport:
shift_data = self._data.copy()
shift_data.index = pd.MultiIndex.from_arrays([shift_idx_0, shift_idx_1])
shift_data.sort_index(axis=0, level=0)
shift_data = shift_data.sort_index(axis=0, level=0)
_metric = _get_metric(metric)
_estimators = _get_estimators(estimators, shift_data.columns.unique(1))
@ -246,7 +246,7 @@ class DatasetReport:
)
_crs_train, _crs_data = zip(*_crs_sorted)
_data = pd.concat(_crs_data, axis=0, keys=_crs_train)
_data = pd.concat(_crs_data, axis=0, keys=np.around(_crs_train, decimals=2))
_data = _data.sort_index(axis=0, level=0)
return _data
@ -296,44 +296,90 @@ class DatasetReport:
_data = self.data(metric=metric, estimators=estimators)
_shift_data = self.shift_data(metric=metric, estimators=estimators)
avg_x_test = _data.groupby(level=1).mean()
prevs_x_test = np.sort(avg_x_test.index.unique(0))
stdev_x_test = _data.groupby(level=1).std() if stdev else None
avg_x_test_tbl = _data.groupby(level=1).mean()
avg_x_test_tbl.loc["avg", :] = _data.mean()
avg_x_shift = _shift_data.groupby(level=0).mean()
prevs_x_shift = np.sort(avg_x_shift.index.unique(0))
res += "## avg\n"
res += avg_x_test_tbl.to_html() + "\n\n"
######################## avg on train ########################
res += "### avg on train\n"
avg_on_train = _data.groupby(level=1).mean()
prevs_on_train = np.sort(avg_on_train.index.unique(0))
stdev_on_train = _data.groupby(level=1).std() if stdev else None
avg_on_train_tbl = _data.groupby(level=1).mean()
avg_on_train_tbl.loc["avg", :] = _data.mean()
res += avg_on_train_tbl.to_html() + "\n\n"
delta_op = plot.plot_delta(
base_prevs=np.around([(1.0 - p, p) for p in prevs_x_test], decimals=2),
columns=avg_x_test.columns.to_numpy(),
data=avg_x_test.T.to_numpy(),
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_train], decimals=2),
columns=avg_on_train.columns.to_numpy(),
data=avg_on_train.T.to_numpy(),
metric=metric,
name=conf,
train_prev=None,
avg="train",
)
res += f"![plot_delta]({delta_op.relative_to(env.OUT_DIR).as_posix()})\n"
if stdev:
delta_stdev_op = plot.plot_delta(
base_prevs=np.around([(1.0 - p, p) for p in prevs_x_test], decimals=2),
columns=avg_x_test.columns.to_numpy(),
data=avg_x_test.T.to_numpy(),
base_prevs=np.around(
[(1.0 - p, p) for p in prevs_on_train], decimals=2
),
columns=avg_on_train.columns.to_numpy(),
data=avg_on_train.T.to_numpy(),
metric=metric,
name=conf,
train_prev=None,
stdevs=stdev_x_test.T.to_numpy(),
stdevs=stdev_on_train.T.to_numpy(),
avg="train",
)
res += f"![plot_delta_stdev]({delta_stdev_op.relative_to(env.OUT_DIR).as_posix()})\n"
######################## avg on test ########################
res += "### avg on test\n"
avg_on_test = _data.groupby(level=0).mean()
prevs_on_test = np.sort(avg_on_test.index.unique(0))
stdev_on_test = _data.groupby(level=0).std() if stdev else None
avg_on_test_tbl = _data.groupby(level=0).mean()
avg_on_test_tbl.loc["avg", :] = _data.mean()
res += avg_on_test_tbl.to_html() + "\n\n"
delta_op = plot.plot_delta(
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_test], decimals=2),
columns=avg_on_test.columns.to_numpy(),
data=avg_on_test.T.to_numpy(),
metric=metric,
name=conf,
train_prev=None,
avg="test",
)
res += f"![plot_delta]({delta_op.relative_to(env.OUT_DIR).as_posix()})\n"
if stdev:
delta_stdev_op = plot.plot_delta(
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_test], decimals=2),
columns=avg_on_test.columns.to_numpy(),
data=avg_on_test.T.to_numpy(),
metric=metric,
name=conf,
train_prev=None,
stdevs=stdev_on_test.T.to_numpy(),
avg="test",
)
res += f"![plot_delta_stdev]({delta_stdev_op.relative_to(env.OUT_DIR).as_posix()})\n"
######################## avg shift ########################
res += "### avg dataset shift\n"
avg_shift = _shift_data.groupby(level=0).mean()
prevs_shift = np.sort(avg_shift.index.unique(0))
shift_op = plot.plot_shift(
shift_prevs=np.around([(1.0 - p, p) for p in prevs_x_shift], decimals=2),
columns=avg_x_shift.columns.to_numpy(),
data=avg_x_shift.T.to_numpy(),
shift_prevs=np.around([(1.0 - p, p) for p in prevs_shift], decimals=2),
columns=avg_shift.columns.to_numpy(),
data=avg_shift.T.to_numpy(),
metric=metric,
name=conf,
train_prev=None,

View File

@ -29,13 +29,14 @@ def plot_delta(
train_prev=None,
fit_scores=None,
legend=True,
avg=None,
) -> Path:
_base_title = "delta_stdev" if stdevs is not None else "delta"
if train_prev is not None:
t_prev_pos = int(round(train_prev[pos_class] * 100))
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
else:
title = f"{_base_title}_{name}_avg_{metric}"
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
fig, ax = plt.subplots()
ax.set_aspect("auto")
@ -83,7 +84,12 @@ def plot_delta(
markersize=0,
)
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
x_label = "test" if avg is None or avg == "train" else "train"
ax.set(
xlabel=f"{x_label} prevalence",
ylabel=metric,
title=title,
)
if legend:
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))