tesi updated
This commit is contained in:
parent
6afe3ddb31
commit
feb9e0a59b
34
conf.yaml
34
conf.yaml
|
@ -72,13 +72,13 @@ test_conf: &test_conf
|
|||
main:
|
||||
confs: &main_confs
|
||||
- DATASET_NAME: imdb
|
||||
other_confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
other_confs:
|
||||
|
||||
sld_lr_conf: &sld_lr_conf
|
||||
|
||||
|
@ -423,22 +423,34 @@ timing_conf: &timing_conf
|
|||
- bin_kde_lr_a
|
||||
- mul_kde_lr_a
|
||||
- m3w_kde_lr_a
|
||||
- doc
|
||||
- atc_mc
|
||||
- rca
|
||||
- rca_star
|
||||
- mandoline
|
||||
- naive
|
||||
N_JOBS: 1
|
||||
PROTOCOL_REPEATS: 1
|
||||
|
||||
confs: *main_confs
|
||||
|
||||
timing_gs_conf: &timing_gs_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
OUT_DIR_NAME: output/timing_gs
|
||||
DATASET_N_PREVS: 1
|
||||
COMP_ESTIMATORS:
|
||||
- bin_sld_lr_gs
|
||||
- mul_sld_lr_gs
|
||||
- m3w_sld_lr_gs
|
||||
- bin_kde_lr_gs
|
||||
- mul_kde_lr_gs
|
||||
- m3w_kde_lr_gs
|
||||
- doc
|
||||
- atc_mc
|
||||
- rca
|
||||
- rca_star
|
||||
- mandoline
|
||||
N_JOBS: 1
|
||||
PROTOCOL_N_PREVS: 1,
|
||||
PROTOCOL_REPEATS: 1,
|
||||
SAMPLE_SIZE: 1000,
|
||||
N_JOBS: -1
|
||||
PROTOCOL_REPEATS: 1
|
||||
|
||||
confs: *main_confs
|
||||
|
||||
exec: *baselines_conf
|
||||
exec: *timing_gs_conf
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#!/bin/bash
|
||||
|
||||
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
|
||||
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
|
||||
scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
|
||||
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
|
||||
scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
|
||||
|
||||
# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||
# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||
scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||
# scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||
scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||
|
|
|
@ -13,7 +13,7 @@ from dash import Dash, Input, Output, State, callback, ctx, dash_table, dcc, htm
|
|||
from dash.dash_table.Format import Align, Format, Scheme
|
||||
|
||||
from quacc import plot
|
||||
from quacc.evaluation.estimators import CE
|
||||
from quacc.evaluation.estimators import CE, _renames
|
||||
from quacc.evaluation.report import CompReport, DatasetReport
|
||||
from quacc.evaluation.stats import wilcoxon
|
||||
|
||||
|
@ -26,6 +26,23 @@ def _get_prev_str(prev: np.ndarray):
|
|||
return str(tuple(np.around(prev, decimals=2)))
|
||||
|
||||
|
||||
def rename_estimators(estimators, rev=False):
|
||||
_rnm = _renames
|
||||
if rev:
|
||||
_rnm = {v: k for k, v in _renames.items()}
|
||||
|
||||
new_estimators = []
|
||||
for c in estimators:
|
||||
nc = c
|
||||
for old, new in _rnm.items():
|
||||
if c.startswith(old):
|
||||
nc = new + c[len(old) :]
|
||||
|
||||
new_estimators.append(nc)
|
||||
|
||||
return new_estimators
|
||||
|
||||
|
||||
def get_datasets(root: str | Path) -> List[DatasetReport]:
|
||||
def load_dataset(dataset):
|
||||
dataset = Path(dataset)
|
||||
|
@ -153,7 +170,7 @@ def get_DataTable(df, mode):
|
|||
columns = {
|
||||
c: dict(
|
||||
id=c,
|
||||
name=_index_name[mode] if c == "index" else c,
|
||||
name=_index_name[mode] if c == "index" else rename_estimators([c])[0],
|
||||
type="numeric",
|
||||
format=columns_format,
|
||||
)
|
||||
|
@ -412,12 +429,13 @@ def update_estimators(href, dataset, metric, curr_estimators, root):
|
|||
old_estimators = json.loads(old_estimators)
|
||||
except JSONDecodeError:
|
||||
old_estimators = []
|
||||
old_estimators = rename_estimators(old_estimators, rev=True)
|
||||
valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy()
|
||||
new_estimators = valid_estimators[
|
||||
np.isin(valid_estimators, old_estimators)
|
||||
].tolist()
|
||||
valid_estimators = CE.name.sort(valid_estimators.tolist())
|
||||
return valid_estimators, new_estimators
|
||||
return rename_estimators(valid_estimators), rename_estimators(new_estimators)
|
||||
|
||||
|
||||
@callback(
|
||||
|
@ -473,6 +491,7 @@ def update_content(dataset, metric, estimators, view, mode, root):
|
|||
quote_via=quote,
|
||||
)
|
||||
dr = get_dr(root, dataset)
|
||||
estimators = rename_estimators(estimators, rev=True)
|
||||
match mode:
|
||||
case m if m.endswith("table"):
|
||||
df = get_table(
|
||||
|
|
|
@ -78,3 +78,33 @@ class CompEstimator:
|
|||
|
||||
|
||||
CE = CompEstimator()
|
||||
|
||||
_renames = {
|
||||
"bin_sld_lr": "(2x2)_SLD_LR",
|
||||
"mul_sld_lr": "(1x4)_SLD_LR",
|
||||
"m3w_sld_lr": "(1x3)_SLD_LR",
|
||||
"d_bin_sld_lr": "d_(2x2)_SLD_LR",
|
||||
"d_mul_sld_lr": "d_(1x4)_SLD_LR",
|
||||
"d_m3w_sld_lr": "d_(1x3)_SLD_LR",
|
||||
"d_bin_sld_rbf": "(2x2)_SLD_RBF",
|
||||
"d_mul_sld_rbf": "(1x4)_SLD_RBF",
|
||||
"d_m3w_sld_rbf": "(1x3)_SLD_RBF",
|
||||
"sld_lr": "SLD_LR",
|
||||
"bin_kde_lr": "(2x2)_KDEy_LR",
|
||||
"mul_kde_lr": "(1x4)_KDEy_LR",
|
||||
"m3w_kde_lr": "(1x3)_KDEy_LR",
|
||||
"d_bin_kde_lr": "d_(2x2)_KDEy_LR",
|
||||
"d_mul_kde_lr": "d_(1x4)_KDEy_LR",
|
||||
"d_m3w_kde_lr": "d_(1x3)_KDEy_LR",
|
||||
"bin_cc_lr": "(2x2)_CC_LR",
|
||||
"mul_cc_lr": "(1x4)_CC_LR",
|
||||
"m3w_cc_lr": "(1x3)_CC_LR",
|
||||
"kde_lr": "KDEy_LR",
|
||||
"cc_lr": "CC_LR",
|
||||
"atc_mc": "ATC",
|
||||
"doc": "DoC",
|
||||
"mandoline": "Mandoline",
|
||||
"rca": "RCA",
|
||||
"rca_star": "RCA*",
|
||||
"naive": "Naive",
|
||||
}
|
||||
|
|
|
@ -39,8 +39,16 @@ def plot_delta(
|
|||
else:
|
||||
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||
|
||||
x_label = f"{'test' if avg is None or avg == 'train' else 'train'} prevalence"
|
||||
y_label = f"{metric} error"
|
||||
if avg is None or avg == "train":
|
||||
x_label = "Test Prevalence"
|
||||
else:
|
||||
x_label = "Train Prevalence"
|
||||
if metric == "acc":
|
||||
y_label = "Prediction Error for Vanilla Accuracy"
|
||||
elif metric == "f1":
|
||||
y_label = "Prediction Error for F1"
|
||||
else:
|
||||
y_label = f"{metric} error"
|
||||
fig = backend.plot_delta(
|
||||
base_prevs,
|
||||
columns,
|
||||
|
@ -81,8 +89,12 @@ def plot_diagonal(
|
|||
else:
|
||||
title = f"diagonal_{name}_{metric}"
|
||||
|
||||
x_label = f"true {metric}"
|
||||
y_label = f"estim. {metric}"
|
||||
if metric == "acc":
|
||||
x_label = "True Vanilla Accuracy"
|
||||
y_label = "Estimated Vanilla Accuracy"
|
||||
else:
|
||||
x_label = f"true {metric}"
|
||||
y_label = f"estim. {metric}"
|
||||
fig = backend.plot_diagonal(
|
||||
reference,
|
||||
columns,
|
||||
|
@ -123,8 +135,13 @@ def plot_shift(
|
|||
else:
|
||||
title = f"shift_{name}_avg_{metric}"
|
||||
|
||||
x_label = "dataset shift"
|
||||
y_label = f"{metric} error"
|
||||
x_label = "Amount of Prior Probability Shift"
|
||||
if metric == "acc":
|
||||
y_label = "Prediction Error for Vanilla Accuracy"
|
||||
elif metric == "f1":
|
||||
y_label = "Prediction Error for F1"
|
||||
else:
|
||||
y_label = f"{metric} error"
|
||||
fig = backend.plot_shift(
|
||||
shift_prevs,
|
||||
columns,
|
||||
|
|
|
@ -5,6 +5,7 @@ import numpy as np
|
|||
import plotly
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from quacc.evaluation.estimators import _renames
|
||||
from quacc.plot.base import BasePlot
|
||||
|
||||
|
||||
|
@ -50,6 +51,7 @@ class PlotlyPlot(BasePlot):
|
|||
|
||||
def __init__(self, theme=None):
|
||||
self.theme = PlotlyPlot.__themes[theme]
|
||||
self.rename = True
|
||||
|
||||
def hex_to_rgb(self, hex: str, t: float | None = None):
|
||||
hex = hex.lstrip("#")
|
||||
|
@ -85,6 +87,24 @@ class PlotlyPlot(BasePlot):
|
|||
def save_fig(self, fig, base_path, title) -> Path:
|
||||
return None
|
||||
|
||||
def rename_plots(
|
||||
self,
|
||||
columns,
|
||||
):
|
||||
if not self.rename:
|
||||
return columns
|
||||
|
||||
new_columns = []
|
||||
for c in columns:
|
||||
nc = c
|
||||
for old, new in _renames.items():
|
||||
if c.startswith(old):
|
||||
nc = new + c[len(old) :]
|
||||
|
||||
new_columns.append(nc)
|
||||
|
||||
return np.array(new_columns)
|
||||
|
||||
def plot_delta(
|
||||
self,
|
||||
base_prevs,
|
||||
|
@ -102,6 +122,7 @@ class PlotlyPlot(BasePlot):
|
|||
if isinstance(base_prevs[0], float):
|
||||
base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4)
|
||||
x = [str(tuple(bp)) for bp in base_prevs]
|
||||
columns = self.rename_plots(columns)
|
||||
line_colors = self.get_colors(len(columns))
|
||||
for name, delta in zip(columns, data):
|
||||
color = next(line_colors)
|
||||
|
@ -150,6 +171,7 @@ class PlotlyPlot(BasePlot):
|
|||
) -> go.Figure:
|
||||
fig = go.Figure()
|
||||
x = reference
|
||||
columns = self.rename_plots(columns)
|
||||
line_colors = self.get_colors(len(columns))
|
||||
|
||||
_edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)]))
|
||||
|
@ -211,6 +233,7 @@ class PlotlyPlot(BasePlot):
|
|||
fig = go.Figure()
|
||||
# x = shift_prevs[:, pos_class]
|
||||
x = shift_prevs
|
||||
columns = self.rename_plots(columns)
|
||||
line_colors = self.get_colors(len(columns))
|
||||
for name, delta in zip(columns, data):
|
||||
col_idx = (columns == name).nonzero()[0][0]
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
import numpy as np
|
||||
|
||||
from quacc.evaluation.report import DatasetReport
|
||||
|
||||
datasets = [
|
||||
"imdb/imdb.pickle",
|
||||
"rcv1_CCAT/rcv1_CCAT.pickle",
|
||||
"rcv1_GCAT/rcv1_GCAT.pickle",
|
||||
"rcv1_MCAT/rcv1_MCAT.pickle",
|
||||
]
|
||||
|
||||
gs = {
|
||||
"sld_lr_gs": [
|
||||
"bin_sld_lr_gs",
|
||||
"mul_sld_lr_gs",
|
||||
"m3w_sld_lr_gs",
|
||||
],
|
||||
"kde_lr_gs": [
|
||||
"bin_kde_lr_gs",
|
||||
"mul_kde_lr_gs",
|
||||
"m3w_kde_lr_gs",
|
||||
],
|
||||
}
|
||||
|
||||
for dst in datasets:
|
||||
dr = DatasetReport.unpickle("output/main/" + dst)
|
||||
print(f"{dst}\n")
|
||||
for name, methods in gs.items():
|
||||
print(f"{name}")
|
||||
sel_methods = [
|
||||
{k: v for k, v in cr.fit_scores.items() if k in methods} for cr in dr.crs
|
||||
]
|
||||
|
||||
best_methods = [
|
||||
list(ms.keys())[np.argmin(list(ms.values()))] for ms in sel_methods
|
||||
]
|
||||
m_cnt = []
|
||||
for m in methods:
|
||||
m_cnt.append((np.array(best_methods) == m).nonzero()[0].shape[0])
|
||||
m_cnt = np.array(m_cnt)
|
||||
m_freq = m_cnt / len(best_methods)
|
||||
|
||||
for n in methods:
|
||||
print(n, end="\t")
|
||||
print()
|
||||
for v in m_freq:
|
||||
print(f"{v*100:.2f}", end="\t")
|
||||
print("\n\n")
|
Loading…
Reference in New Issue