Dataset refactored, training sampling added

This commit is contained in:
Lorenzo Volpi 2023-10-20 23:36:05 +02:00
parent 31e91c1626
commit d906502c29
11 changed files with 426 additions and 277 deletions

3
.gitignore vendored
View File

@ -11,4 +11,5 @@ lipton_bbse/__pycache__/*
elsahar19_rca/__pycache__/*
*.coverage
.coverage
scp_sync.py
scp_sync.py
out/*

55
TODO.html Normal file
View File

@ -0,0 +1,55 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title></title>
<style>
/* From extension vscode.github */
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
.vscode-dark img[src$=\#gh-light-mode-only],
.vscode-light img[src$=\#gh-dark-mode-only] {
display: none;
}
</style>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
font-size: 14px;
line-height: 1.6;
}
</style>
<style>
.task-list-item {
list-style-type: none;
}
.task-list-item-checkbox {
margin-left: -20px;
vertical-align: middle;
pointer-events: none;
}
</style>
</head>
<body class="vscode-body vscode-light">
<ul class="contains-task-list">
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere media tabelle</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> plot; 3 tipi (appunti + email + garg)</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> sistemare kfcv baseline</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere metodo con CC oltre SLD</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> variare parametro recalibration in SLD</li>
</ul>
</body>
</html>

14
TODO.md
View File

@ -1,8 +1,6 @@
- aggiungere media tabelle
- plot
- 3 tipi (vedi appunti + garg)
- sistemare kfcv baseline
- aggiungere metodo con CC oltre SLD
- prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50
poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
- variare parametro recalibration in SLD
- [ ] aggiungere media tabelle
- [ ] plot; 3 tipi (appunti + email + garg)
- [ ] sistemare kfcv baseline
- [ ] aggiungere metodo con CC oltre SLD
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
- [ ] variare parametro recalibration in SLD

View File

@ -1,26 +1,102 @@
from typing import Tuple
import math
from typing import List
import numpy as np
from quapy.data.base import LabelledCollection
import quapy as qp
from quapy.data.base import LabelledCollection
from sklearn.conftest import fetch_rcv1
TRAIN_VAL_PROP = 0.5
def get_imdb(**kwargs) -> Tuple[LabelledCollection]:
train, test = qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
train, validation = train.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0
)
return train, validation, test
class DatasetSample:
def __init__(
self,
train: LabelledCollection,
validation: LabelledCollection,
test: LabelledCollection,
):
self.train = train
self.validation = validation
self.test = test
@property
def train_prev(self):
return self.train.prevalence()
@property
def validation_prev(self):
return self.validation.prevalence()
@property
def prevs(self):
return {"train": self.train_prev, "validation": self.validation_prev}
def get_spambase(**kwargs) -> Tuple[LabelledCollection]:
train, test = qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
train, validation = train.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0
)
return train, validation, test
class Dataset:
def __init__(self, name, n_prevalences=9, target=None):
self._name = name
self._target = target
self.n_prevs = n_prevalences
def __spambase(self):
return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
def __imdb(self):
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
def __rcv1(self):
n_train = 23149
available_targets = ["CCAT", "GCAT", "MCAT"]
if self._target is None or self._target not in available_targets:
raise ValueError("Invalid target")
dataset = fetch_rcv1()
target_index = np.where(dataset.target_names == self._target)[0]
all_train_d, test_d = dataset.data[:n_train, :], dataset.data[n_train:, :]
labels = dataset.target[:, target_index].toarray().flatten()
all_train_l, test_l = labels[:n_train], labels[n_train:]
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
test = LabelledCollection(test_d, test_l, classes=[0, 1])
return all_train, test
def get(self) -> List[DatasetSample]:
all_train, test = {
"spambase": self.__spambase,
"imdb": self.__imdb,
"rcv1": self.__rcv1,
}[self._name]()
# resample all_train set to have (0.5, 0.5) prevalence
at_positives = np.sum(all_train.y)
all_train = all_train.sampling(
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
)
# sample prevalences
prevalences = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevalences)
datasets = []
for p in prevalences:
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
train, validation = all_train_sampled.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0
)
datasets.append(DatasetSample(train, validation, test))
return datasets
def __call__(self):
return self.get()
@property
def name(self):
if self._name == "rcv1":
return f"{self._name}_{self._target}"
else:
return self._name
# >>> fetch_rcv1().target_names
@ -39,33 +115,30 @@ def get_spambase(**kwargs) -> Tuple[LabelledCollection]:
# 'M142', 'M143', 'MCAT'], dtype=object)
def get_rcv1(target = "default", **kwargs):
sample_size = qp.environ["SAMPLE_SIZE"]
n_train = 23149
def rcv1_info():
dataset = fetch_rcv1()
n_train = 23149
if target == "default":
target = "C12"
if target not in dataset.target_names:
raise ValueError("Invalid target")
def dataset_split(data, labels, classes=[0, 1]) -> Tuple[LabelledCollection]:
all_train_d, test_d = data[:n_train, :], data[n_train:, :]
all_train_l, test_l = labels[:n_train], labels[n_train:]
all_train = LabelledCollection(all_train_d, all_train_l, classes=classes)
test = LabelledCollection(test_d, test_l, classes=classes)
train, validation = all_train.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0
targets = []
for target in range(103):
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
targets.append(
(
dataset.target_names[target],
{
"train": (1.0 - train_t_prev, train_t_prev),
"test": (1.0 - test_t_prev, test_t_prev),
},
)
)
return train, validation, test
target_index = np.where(dataset.target_names == target)[0]
target_labels = dataset.target[:, target_index].toarray().flatten()
targets.sort(key=lambda t: t[1]["train"][1])
for n, d in targets:
print(f"{n}:")
for k, (fp, tp) in d.items():
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
if np.sum(target_labels[n_train:]) < sample_size:
raise ValueError("Target has too few positive samples")
d = dataset_split(dataset.data, target_labels, classes=[0, 1])
return d
if __name__ == "__main__":
rcv1_info()

31
quacc/environ.py Normal file
View File

@ -0,0 +1,31 @@
from pathlib import Path
defalut_env = {
"DATASET_NAME": "rcv1",
"DATASET_TARGET": "CCAT",
"COMP_ESTIMATORS": [
"OUR_BIN_SLD",
"OUR_MUL_SLD",
"KFCV",
"ATC_MC",
"ATC_NE",
"DOC_FEAT",
# "RCA",
# "RCA_STAR",
],
"DATASET_N_PREVS": 9,
"OUT_DIR": Path("out"),
"PLOT_OUT_DIR": Path("out/plot"),
"PROTOCOL_N_PREVS": 21,
"PROTOCOL_REPEATS": 100,
"SAMPLE_SIZE": 1000,
}
class Environ:
def __init__(self, **kwargs):
for k, v in kwargs.items():
self.__setattr__(k, v)
env = Environ(**defalut_env)

View File

@ -1,29 +1,28 @@
from statistics import mean
from typing import Dict
import numpy as np
from quapy.data import LabelledCollection
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_validate
import sklearn.metrics as metrics
from quapy.data import LabelledCollection
from quapy.protocol import (
AbstractStochasticSeededProtocol,
OnLabelledCollectionProtocol,
)
from .report import EvaluationReport
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_validate
import elsahar19_rca.rca as rca
import garg22_ATC.ATC_helper as atc
import guillory21_doc.doc as doc
import jiang18_trustscore.trustscore as trustscore
from .report import EvaluationReport
def kfcv(
c_model: BaseEstimator,
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict"
predict_method="predict",
):
c_model_predict = getattr(c_model, predict_method)
@ -42,12 +41,12 @@ def kfcv(
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
report.append_row(
test.prevalence(),
acc_score=(1. - acc_score),
acc_score=(1.0 - acc_score),
f1_score=f1_score,
acc=meta_acc,
f1=meta_f1,
)
return report
@ -63,7 +62,7 @@ def reference(
test_probs = c_model_predict(test.X)
test_preds = np.argmax(test_probs, axis=-1)
report.append_row(
test.prevalence(),
test.prevalence(),
acc_score=(1 - metrics.accuracy_score(test.y, test_preds)),
f1_score=metrics.f1_score(test.y, test_preds),
)

91
quacc/evaluation/comp.py Normal file
View File

@ -0,0 +1,91 @@
import multiprocessing
import time
from typing import List
import pandas as pd
import quapy as qp
from quapy.protocol import APP
from sklearn.linear_model import LogisticRegression
from quacc.dataset import Dataset
from quacc.environ import env
from quacc.evaluation import baseline, method
from quacc.evaluation.report import DatasetReport, EvaluationReport
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
pd.set_option("display.float_format", "{:.4f}".format)
class CompEstimator:
__dict = {
"OUR_BIN_SLD": method.evaluate_bin_sld,
"OUR_MUL_SLD": method.evaluate_mul_sld,
"KFCV": baseline.kfcv,
"ATC_MC": baseline.atc_mc,
"ATC_NE": baseline.atc_ne,
"DOC_FEAT": baseline.doc_feat,
"RCA": baseline.rca_score,
"RCA_STAR": baseline.rca_star_score,
}
def __class_getitem__(cls, e: str | List[str]):
if isinstance(e, str):
try:
return cls.__dict[e]
except KeyError:
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
elif isinstance(e, list):
try:
return [cls.__dict[est] for est in e]
except KeyError as ke:
raise KeyError(
f"Invalid estimator: estimator {ke.args[0]} does not exist"
)
CE = CompEstimator
def fit_and_estimate(_estimate, train, validation, test):
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(
test, n_prevalences=env.PROTOCOL_N_PREVS, repeats=env.PROTOCOL_REPEATS
)
start = time.time()
result = _estimate(model, validation, protocol)
end = time.time()
print(f"{_estimate.__name__}: {end-start:.2f}s")
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison(
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
) -> EvaluationReport:
with multiprocessing.Pool(8) as pool:
dr = DatasetReport(dataset.name)
for d in dataset():
print(f"train prev.: {d.train_prev}")
start = time.time()
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
results = list(map(lambda r: r.get(), results))
er = EvaluationReport.combine_reports(
*list(map(lambda r: r["result"], results)), name=dataset.name
)
times = {r["name"]: r["time"] for r in results}
end = time.time()
times["tot"] = end - start
er.times = times
er.train_prevs = d.prevs
dr.add(er)
print()
return dr

View File

@ -1,20 +1,11 @@
import multiprocessing
import time
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection
from quapy.protocol import (
APP,
AbstractStochasticSeededProtocol,
OnLabelledCollectionProtocol,
)
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import quacc.error as error
import quacc.evaluation.baseline as baseline
from quacc.dataset import get_imdb, get_rcv1, get_spambase
from quacc.evaluation.report import EvaluationReport
from ..estimator import (
@ -23,13 +14,6 @@ from ..estimator import (
MulticlassAccuracyEstimator,
)
qp.environ["SAMPLE_SIZE"] = 100
pd.set_option("display.float_format", "{:.4f}".format)
n_prevalences = 21
repreats = 100
def estimate(
estimator: AccuracyEstimator,
@ -61,11 +45,11 @@ def evaluation_report(
acc_score = error.acc(estim_prev)
f1_score = error.f1(estim_prev)
report.append_row(
base_prev,
acc_score=1. - acc_score,
acc = abs(error.acc(true_prev) - acc_score),
base_prev,
acc_score=1.0 - acc_score,
acc=abs(error.acc(true_prev) - acc_score),
f1_score=f1_score,
f1=abs(error.f1(true_prev) - f1_score)
f1=abs(error.f1(true_prev) - f1_score),
)
return report
@ -77,7 +61,7 @@ def evaluate(
protocol: AbstractStochasticSeededProtocol,
method: str,
):
estimator : AccuracyEstimator = {
estimator: AccuracyEstimator = {
"bin": BinaryQuantifierAccuracyEstimator,
"mul": MulticlassAccuracyEstimator,
}[method](c_model)
@ -85,65 +69,17 @@ def evaluate(
return evaluation_report(estimator, protocol, method)
def evaluate_binary(model, validation, protocol):
return evaluate(model, validation, protocol, "bin")
def evaluate_bin_sld(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
) -> EvaluationReport:
return evaluate(c_model, validation, protocol, "bin")
def evaluate_multiclass(model, validation, protocol):
return evaluate(model, validation, protocol, "mul")
def fit_and_estimate(_estimate, train, validation, test):
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
start = time.time()
result = _estimate(model, validation, protocol)
end = time.time()
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison(dataset: str, **kwargs) -> EvaluationReport:
train, validation, test = {
"spambase": get_spambase,
"imdb": get_imdb,
"rcv1": get_rcv1,
}[dataset](**kwargs)
for k,v in kwargs.items():
print(k, ":", v)
prevs = {
"train": train.prevalence(),
"validation": validation.prevalence(),
}
start = time.time()
with multiprocessing.Pool(8) as pool:
estimators = [
evaluate_binary,
evaluate_multiclass,
baseline.kfcv,
baseline.atc_mc,
baseline.atc_ne,
baseline.doc_feat,
baseline.rca_score,
baseline.rca_star_score,
]
tasks = [(estim, train, validation, test) for estim in estimators]
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
results = list(map(lambda r: r.get(), results))
er = EvaluationReport.combine_reports(*list(map(lambda r: r["result"], results)))
times = {r["name"]:r["time"] for r in results}
end = time.time()
times["tot"] = end - start
er.times = times
er.prevs = prevs
return er
def evaluate_mul_sld(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
) -> EvaluationReport:
return evaluate(c_model, validation, protocol, "mul")

View File

@ -1,143 +1,122 @@
from typing import Tuple
import math
import statistics as stats
from typing import List, Tuple
import numpy as np
import pandas as pd
def _fmt_line(s):
return f"> {s} \n"
from quacc import plot
from quacc.utils import fmt_line_md
class EvaluationReport:
def __init__(self, prefix=None):
self.base = []
self.dict = {}
self._grouped = False
self._grouped_base = []
self._grouped_dict = {}
self._dataframe = None
self.prefix = prefix if prefix is not None else "default"
self._times = {}
self._prevs = {}
self._target = "default"
self._prevs = []
self._dict = {}
self._g_prevs = None
self._g_dict = None
self.name = prefix if prefix is not None else "default"
self.times = {}
self.train_prevs = {}
self.target = "default"
def append_row(self, base: np.ndarray | Tuple, **row):
if isinstance(base, np.ndarray):
base = tuple(base.tolist())
self.base.append(base)
self._prevs.append(base)
for k, v in row.items():
if (k, self.prefix) in self.dict:
self.dict[(k, self.prefix)].append(v)
if (k, self.name) in self._dict:
self._dict[(k, self.name)].append(v)
else:
self.dict[(k, self.prefix)] = [v]
self._grouped = False
self._dataframe = None
self._dict[(k, self.name)] = [v]
self._g_prevs = None
@property
def columns(self):
return self.dict.keys()
return self._dict.keys()
@property
def grouped(self):
if self._grouped:
return self._grouped_dict
def groupby_prevs(self, metric: str = None):
if self._g_dict is None:
self._g_prevs = []
self._g_dict = {k: [] for k in self._dict.keys()}
self._grouped_base = []
self._grouped_dict = {k: [] for k in self.dict.keys()}
last_end = 0
for ind, bp in enumerate(self._prevs):
if ind < (len(self._prevs) - 1) and bp == self._prevs[ind + 1]:
continue
last_end = 0
for ind, bp in enumerate(self.base):
if ind < (len(self.base) - 1) and bp == self.base[ind + 1]:
continue
self._g_prevs.append(bp)
for col in self._dict.keys():
self._g_dict[col].append(
stats.mean(self._dict[col][last_end : ind + 1])
)
self._grouped_base.append(bp)
for col in self.dict.keys():
self._grouped_dict[col].append(
stats.mean(self.dict[col][last_end : ind + 1])
)
last_end = ind + 1
last_end = ind + 1
filtered_g_dict = self._g_dict
if metric is not None:
filtered_g_dict = {
c1: ls for ((c0, c1), ls) in self._g_dict.items() if c0 == metric
}
self._grouped = True
return self._grouped_dict
return self._g_prevs, filtered_g_dict
@property
def gbase(self):
self.grouped
return self._grouped_base
def get_dataframe(self, metrics=None):
if self._dataframe is None:
self_columns = sorted(self.columns, key=lambda c: c[0])
self._dataframe = pd.DataFrame(
self.grouped,
index=self.gbase,
columns=pd.MultiIndex.from_tuples(self_columns),
)
df = pd.DataFrame(self._dataframe)
if metrics is not None:
df = df.drop(
[(c0, c1) for (c0, c1) in df.columns if c0 not in metrics], axis=1
)
if len(set(k0 for k0, k1 in df.columns)) == 1:
df = df.droplevel(0, axis=1)
return df
def merge(self, other):
if not all(v1 == v2 for v1, v2 in zip(self.base, other.base)):
raise ValueError("other has not same base prevalences of self")
if len(set(self.dict.keys()).intersection(set(other.dict.keys()))) > 0:
raise ValueError("self and other have matching keys")
report = EvaluationReport()
report.base = self.base
report.dict = self.dict | other.dict
return report
@property
def times(self):
return self._times
@times.setter
def times(self, val):
self._times = val
@property
def prevs(self):
return self._prevs
@prevs.setter
def prevs(self, val):
self._prevs = val
@property
def target(self):
return self._target
@target.setter
def target(self, val):
self._target = val
def get_dataframe(self, metric="acc"):
g_prevs, g_dict = self.groupby_prevs(metric=metric)
return pd.DataFrame(
g_dict,
index=g_prevs,
columns=g_dict.keys(),
)
def to_md(self, *metrics):
res = _fmt_line("target: " + self.target)
for k, v in self.prevs.items():
res += _fmt_line(f"{k}: {str(v)}")
res = ""
for k, v in self.train_prevs.items():
res += fmt_line_md(f"{k}: {str(v)}")
for k, v in self.times.items():
res += _fmt_line(f"{k}: {v:.3f}s")
res += fmt_line_md(f"{k}: {v:.3f}s")
res += "\n"
for m in metrics:
res += self.get_dataframe(metrics=m).to_html() + "\n\n"
res += self.get_dataframe(metric=m).to_html() + "\n\n"
return res
def merge(self, other):
if not all(v1 == v2 for v1, v2 in zip(self._prevs, other._prevs)):
raise ValueError("other has not same base prevalences of self")
if len(set(self._dict.keys()).intersection(set(other._dict.keys()))) > 0:
raise ValueError("self and other have matching keys")
report = EvaluationReport()
report._prevs = self._prevs
report._dict = self._dict | other._dict
return report
@staticmethod
def combine_reports(*args):
def combine_reports(*args, name="default"):
er = args[0]
for r in args[1:]:
er = er.merge(r)
er.name = name
return er
class DatasetReport:
def __init__(self, name):
self.name = name
self.ers: List[EvaluationReport] = []
def add(self, er: EvaluationReport):
self.ers.append(er)
def to_md(self, *metrics):
res = f"{self.name}\n\n"
for er in self.ers:
res += f"{er.to_md(*metrics)}\n\n"
return res
def __iter__(self):
return (er for er in self.ers)

View File

@ -1,33 +1,16 @@
import traceback
import quacc.evaluation.method as method
import quacc.evaluation.comp as comp
from quacc.dataset import Dataset
from quacc.environ import env
DATASET = "imdb"
OUTPUT_FILE = "out_" + DATASET + ".md"
TARGETS = {
"rcv1" : [
'C12',
'C13', 'C15', 'C151', 'C1511', 'C152', 'C17', 'C172',
'C18', 'C181', 'C21', 'C24', 'C31', 'C42', 'CCAT'
'E11', 'E12', 'E21', 'E211', 'E212', 'E41', 'E51', 'ECAT',
'G15', 'GCAT', 'GCRIM', 'GDIP', 'GPOL', 'GVIO', 'GVOTE', 'GWEA',
'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
'M142', 'M143', 'MCAT'
],
"spambase": ["default"],
"imdb": ["default"],
}
def estimate_comparison():
open(OUTPUT_FILE, "w").close()
targets = TARGETS[DATASET]
for target in targets:
try:
er = method.evaluate_comparison(DATASET, target=target)
er.target = target
with open(OUTPUT_FILE, "a") as f:
f.write(er.to_md(["acc"], ["f1"]))
except Exception:
traceback.print_exc()
dataset = Dataset(
env.DATASET_NAME, target=env.DATASET_TARGET, n_prevalences=env.DATASET_N_PREVS
)
output_path = env.OUT_DIR / f"{dataset.name}.md"
with open(output_path, "w") as f:
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
f.write(dr.to_md("acc"))
# print(df.to_latex(float_format="{:.4f}".format))
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))

View File

@ -1,7 +1,8 @@
import functools
import pandas as pd
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
if len(dfs) < 1:
raise ValueError
@ -10,15 +11,13 @@ def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
df = dfs[0]
for ndf in dfs[1:]:
df = df.join(ndf.set_index(df_index), on=df_index)
return df
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
def _reduce_func(s1, s2):
return {
(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()
}
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
lst = df.to_dict(orient="records")[1:-1]
summed_series = functools.reduce(_reduce_func, lst)
@ -28,4 +27,8 @@ def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
for ((n1, n2), v) in summed_series.items()
if n1 != "base"
}
return pd.DataFrame([avg_report], columns=idx)
return pd.DataFrame([avg_report], columns=idx)
def fmt_line_md(s):
return f"> {s} \n"