Dataset refactored, training sampling added

This commit is contained in:
Lorenzo Volpi 2023-10-20 23:36:05 +02:00
parent 31e91c1626
commit d906502c29
11 changed files with 426 additions and 277 deletions

1
.gitignore vendored
View File

@ -12,3 +12,4 @@ elsahar19_rca/__pycache__/*
*.coverage *.coverage
.coverage .coverage
scp_sync.py scp_sync.py
out/*

55
TODO.html Normal file
View File

@ -0,0 +1,55 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title></title>
<style>
/* From extension vscode.github */
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
.vscode-dark img[src$=\#gh-light-mode-only],
.vscode-light img[src$=\#gh-dark-mode-only] {
display: none;
}
</style>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
font-size: 14px;
line-height: 1.6;
}
</style>
<style>
.task-list-item {
list-style-type: none;
}
.task-list-item-checkbox {
margin-left: -20px;
vertical-align: middle;
pointer-events: none;
}
</style>
</head>
<body class="vscode-body vscode-light">
<ul class="contains-task-list">
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere media tabelle</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> plot; 3 tipi (appunti + email + garg)</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> sistemare kfcv baseline</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere metodo con CC oltre SLD</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</li>
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> variare parametro recalibration in SLD</li>
</ul>
</body>
</html>

14
TODO.md
View File

@ -1,8 +1,6 @@
- aggiungere media tabelle - [ ] aggiungere media tabelle
- plot - [ ] plot; 3 tipi (appunti + email + garg)
- 3 tipi (vedi appunti + garg) - [ ] sistemare kfcv baseline
- sistemare kfcv baseline - [ ] aggiungere metodo con CC oltre SLD
- aggiungere metodo con CC oltre SLD - [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
- prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50 - [ ] variare parametro recalibration in SLD
poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
- variare parametro recalibration in SLD

View File

@ -1,26 +1,102 @@
from typing import Tuple import math
from typing import List
import numpy as np import numpy as np
from quapy.data.base import LabelledCollection
import quapy as qp import quapy as qp
from quapy.data.base import LabelledCollection
from sklearn.conftest import fetch_rcv1 from sklearn.conftest import fetch_rcv1
TRAIN_VAL_PROP = 0.5 TRAIN_VAL_PROP = 0.5
def get_imdb(**kwargs) -> Tuple[LabelledCollection]: class DatasetSample:
train, test = qp.datasets.fetch_reviews("imdb", tfidf=True).train_test def __init__(
train, validation = train.split_stratified( self,
train_prop=TRAIN_VAL_PROP, random_state=0 train: LabelledCollection,
) validation: LabelledCollection,
return train, validation, test test: LabelledCollection,
):
self.train = train
self.validation = validation
self.test = test
@property
def train_prev(self):
return self.train.prevalence()
@property
def validation_prev(self):
return self.validation.prevalence()
@property
def prevs(self):
return {"train": self.train_prev, "validation": self.validation_prev}
def get_spambase(**kwargs) -> Tuple[LabelledCollection]: class Dataset:
train, test = qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test def __init__(self, name, n_prevalences=9, target=None):
train, validation = train.split_stratified( self._name = name
train_prop=TRAIN_VAL_PROP, random_state=0 self._target = target
) self.n_prevs = n_prevalences
return train, validation, test
def __spambase(self):
return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
def __imdb(self):
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
def __rcv1(self):
n_train = 23149
available_targets = ["CCAT", "GCAT", "MCAT"]
if self._target is None or self._target not in available_targets:
raise ValueError("Invalid target")
dataset = fetch_rcv1()
target_index = np.where(dataset.target_names == self._target)[0]
all_train_d, test_d = dataset.data[:n_train, :], dataset.data[n_train:, :]
labels = dataset.target[:, target_index].toarray().flatten()
all_train_l, test_l = labels[:n_train], labels[n_train:]
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
test = LabelledCollection(test_d, test_l, classes=[0, 1])
return all_train, test
def get(self) -> List[DatasetSample]:
all_train, test = {
"spambase": self.__spambase,
"imdb": self.__imdb,
"rcv1": self.__rcv1,
}[self._name]()
# resample all_train set to have (0.5, 0.5) prevalence
at_positives = np.sum(all_train.y)
all_train = all_train.sampling(
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
)
# sample prevalences
prevalences = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevalences)
datasets = []
for p in prevalences:
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
train, validation = all_train_sampled.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0
)
datasets.append(DatasetSample(train, validation, test))
return datasets
def __call__(self):
return self.get()
@property
def name(self):
if self._name == "rcv1":
return f"{self._name}_{self._target}"
else:
return self._name
# >>> fetch_rcv1().target_names # >>> fetch_rcv1().target_names
@ -39,33 +115,30 @@ def get_spambase(**kwargs) -> Tuple[LabelledCollection]:
# 'M142', 'M143', 'MCAT'], dtype=object) # 'M142', 'M143', 'MCAT'], dtype=object)
def get_rcv1(target = "default", **kwargs): def rcv1_info():
sample_size = qp.environ["SAMPLE_SIZE"]
n_train = 23149
dataset = fetch_rcv1() dataset = fetch_rcv1()
n_train = 23149
if target == "default": targets = []
target = "C12" for target in range(103):
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
if target not in dataset.target_names: test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
raise ValueError("Invalid target") targets.append(
(
def dataset_split(data, labels, classes=[0, 1]) -> Tuple[LabelledCollection]: dataset.target_names[target],
all_train_d, test_d = data[:n_train, :], data[n_train:, :] {
all_train_l, test_l = labels[:n_train], labels[n_train:] "train": (1.0 - train_t_prev, train_t_prev),
all_train = LabelledCollection(all_train_d, all_train_l, classes=classes) "test": (1.0 - test_t_prev, test_t_prev),
test = LabelledCollection(test_d, test_l, classes=classes) },
train, validation = all_train.split_stratified( )
train_prop=TRAIN_VAL_PROP, random_state=0
) )
return train, validation, test
target_index = np.where(dataset.target_names == target)[0] targets.sort(key=lambda t: t[1]["train"][1])
target_labels = dataset.target[:, target_index].toarray().flatten() for n, d in targets:
print(f"{n}:")
for k, (fp, tp) in d.items():
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
if np.sum(target_labels[n_train:]) < sample_size:
raise ValueError("Target has too few positive samples")
d = dataset_split(dataset.data, target_labels, classes=[0, 1]) if __name__ == "__main__":
rcv1_info()
return d

31
quacc/environ.py Normal file
View File

@ -0,0 +1,31 @@
from pathlib import Path
defalut_env = {
"DATASET_NAME": "rcv1",
"DATASET_TARGET": "CCAT",
"COMP_ESTIMATORS": [
"OUR_BIN_SLD",
"OUR_MUL_SLD",
"KFCV",
"ATC_MC",
"ATC_NE",
"DOC_FEAT",
# "RCA",
# "RCA_STAR",
],
"DATASET_N_PREVS": 9,
"OUT_DIR": Path("out"),
"PLOT_OUT_DIR": Path("out/plot"),
"PROTOCOL_N_PREVS": 21,
"PROTOCOL_REPEATS": 100,
"SAMPLE_SIZE": 1000,
}
class Environ:
def __init__(self, **kwargs):
for k, v in kwargs.items():
self.__setattr__(k, v)
env = Environ(**defalut_env)

View File

@ -1,29 +1,28 @@
from statistics import mean from statistics import mean
from typing import Dict
import numpy as np import numpy as np
from quapy.data import LabelledCollection
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_validate
import sklearn.metrics as metrics import sklearn.metrics as metrics
from quapy.data import LabelledCollection
from quapy.protocol import ( from quapy.protocol import (
AbstractStochasticSeededProtocol, AbstractStochasticSeededProtocol,
OnLabelledCollectionProtocol, OnLabelledCollectionProtocol,
) )
from sklearn.base import BaseEstimator
from .report import EvaluationReport from sklearn.model_selection import cross_validate
import elsahar19_rca.rca as rca import elsahar19_rca.rca as rca
import garg22_ATC.ATC_helper as atc import garg22_ATC.ATC_helper as atc
import guillory21_doc.doc as doc import guillory21_doc.doc as doc
import jiang18_trustscore.trustscore as trustscore import jiang18_trustscore.trustscore as trustscore
from .report import EvaluationReport
def kfcv( def kfcv(
c_model: BaseEstimator, c_model: BaseEstimator,
validation: LabelledCollection, validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol, protocol: AbstractStochasticSeededProtocol,
predict_method="predict" predict_method="predict",
): ):
c_model_predict = getattr(c_model, predict_method) c_model_predict = getattr(c_model, predict_method)
@ -42,7 +41,7 @@ def kfcv(
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds)) meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
report.append_row( report.append_row(
test.prevalence(), test.prevalence(),
acc_score=(1. - acc_score), acc_score=(1.0 - acc_score),
f1_score=f1_score, f1_score=f1_score,
acc=meta_acc, acc=meta_acc,
f1=meta_f1, f1=meta_f1,

91
quacc/evaluation/comp.py Normal file
View File

@ -0,0 +1,91 @@
import multiprocessing
import time
from typing import List
import pandas as pd
import quapy as qp
from quapy.protocol import APP
from sklearn.linear_model import LogisticRegression
from quacc.dataset import Dataset
from quacc.environ import env
from quacc.evaluation import baseline, method
from quacc.evaluation.report import DatasetReport, EvaluationReport
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
pd.set_option("display.float_format", "{:.4f}".format)
class CompEstimator:
__dict = {
"OUR_BIN_SLD": method.evaluate_bin_sld,
"OUR_MUL_SLD": method.evaluate_mul_sld,
"KFCV": baseline.kfcv,
"ATC_MC": baseline.atc_mc,
"ATC_NE": baseline.atc_ne,
"DOC_FEAT": baseline.doc_feat,
"RCA": baseline.rca_score,
"RCA_STAR": baseline.rca_star_score,
}
def __class_getitem__(cls, e: str | List[str]):
if isinstance(e, str):
try:
return cls.__dict[e]
except KeyError:
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
elif isinstance(e, list):
try:
return [cls.__dict[est] for est in e]
except KeyError as ke:
raise KeyError(
f"Invalid estimator: estimator {ke.args[0]} does not exist"
)
CE = CompEstimator
def fit_and_estimate(_estimate, train, validation, test):
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(
test, n_prevalences=env.PROTOCOL_N_PREVS, repeats=env.PROTOCOL_REPEATS
)
start = time.time()
result = _estimate(model, validation, protocol)
end = time.time()
print(f"{_estimate.__name__}: {end-start:.2f}s")
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison(
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
) -> EvaluationReport:
with multiprocessing.Pool(8) as pool:
dr = DatasetReport(dataset.name)
for d in dataset():
print(f"train prev.: {d.train_prev}")
start = time.time()
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
results = list(map(lambda r: r.get(), results))
er = EvaluationReport.combine_reports(
*list(map(lambda r: r["result"], results)), name=dataset.name
)
times = {r["name"]: r["time"] for r in results}
end = time.time()
times["tot"] = end - start
er.times = times
er.train_prevs = d.prevs
dr.add(er)
print()
return dr

View File

@ -1,20 +1,11 @@
import multiprocessing
import time
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
from quapy.protocol import ( from quapy.protocol import (
APP,
AbstractStochasticSeededProtocol, AbstractStochasticSeededProtocol,
OnLabelledCollectionProtocol, OnLabelledCollectionProtocol,
) )
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import quacc.error as error import quacc.error as error
import quacc.evaluation.baseline as baseline
from quacc.dataset import get_imdb, get_rcv1, get_spambase
from quacc.evaluation.report import EvaluationReport from quacc.evaluation.report import EvaluationReport
from ..estimator import ( from ..estimator import (
@ -23,13 +14,6 @@ from ..estimator import (
MulticlassAccuracyEstimator, MulticlassAccuracyEstimator,
) )
qp.environ["SAMPLE_SIZE"] = 100
pd.set_option("display.float_format", "{:.4f}".format)
n_prevalences = 21
repreats = 100
def estimate( def estimate(
estimator: AccuracyEstimator, estimator: AccuracyEstimator,
@ -62,10 +46,10 @@ def evaluation_report(
f1_score = error.f1(estim_prev) f1_score = error.f1(estim_prev)
report.append_row( report.append_row(
base_prev, base_prev,
acc_score=1. - acc_score, acc_score=1.0 - acc_score,
acc = abs(error.acc(true_prev) - acc_score), acc=abs(error.acc(true_prev) - acc_score),
f1_score=f1_score, f1_score=f1_score,
f1=abs(error.f1(true_prev) - f1_score) f1=abs(error.f1(true_prev) - f1_score),
) )
return report return report
@ -77,7 +61,7 @@ def evaluate(
protocol: AbstractStochasticSeededProtocol, protocol: AbstractStochasticSeededProtocol,
method: str, method: str,
): ):
estimator : AccuracyEstimator = { estimator: AccuracyEstimator = {
"bin": BinaryQuantifierAccuracyEstimator, "bin": BinaryQuantifierAccuracyEstimator,
"mul": MulticlassAccuracyEstimator, "mul": MulticlassAccuracyEstimator,
}[method](c_model) }[method](c_model)
@ -85,65 +69,17 @@ def evaluate(
return evaluation_report(estimator, protocol, method) return evaluation_report(estimator, protocol, method)
def evaluate_binary(model, validation, protocol): def evaluate_bin_sld(
return evaluate(model, validation, protocol, "bin") c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
) -> EvaluationReport:
return evaluate(c_model, validation, protocol, "bin")
def evaluate_multiclass(model, validation, protocol): def evaluate_mul_sld(
return evaluate(model, validation, protocol, "mul") c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
def fit_and_estimate(_estimate, train, validation, test): ) -> EvaluationReport:
model = LogisticRegression() return evaluate(c_model, validation, protocol, "mul")
model.fit(*train.Xy)
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
start = time.time()
result = _estimate(model, validation, protocol)
end = time.time()
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison(dataset: str, **kwargs) -> EvaluationReport:
train, validation, test = {
"spambase": get_spambase,
"imdb": get_imdb,
"rcv1": get_rcv1,
}[dataset](**kwargs)
for k,v in kwargs.items():
print(k, ":", v)
prevs = {
"train": train.prevalence(),
"validation": validation.prevalence(),
}
start = time.time()
with multiprocessing.Pool(8) as pool:
estimators = [
evaluate_binary,
evaluate_multiclass,
baseline.kfcv,
baseline.atc_mc,
baseline.atc_ne,
baseline.doc_feat,
baseline.rca_score,
baseline.rca_star_score,
]
tasks = [(estim, train, validation, test) for estim in estimators]
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
results = list(map(lambda r: r.get(), results))
er = EvaluationReport.combine_reports(*list(map(lambda r: r["result"], results)))
times = {r["name"]:r["time"] for r in results}
end = time.time()
times["tot"] = end - start
er.times = times
er.prevs = prevs
return er

View File

@ -1,143 +1,122 @@
from typing import Tuple import math
import statistics as stats import statistics as stats
from typing import List, Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from quacc import plot
def _fmt_line(s): from quacc.utils import fmt_line_md
return f"> {s} \n"
class EvaluationReport: class EvaluationReport:
def __init__(self, prefix=None): def __init__(self, prefix=None):
self.base = [] self._prevs = []
self.dict = {} self._dict = {}
self._grouped = False self._g_prevs = None
self._grouped_base = [] self._g_dict = None
self._grouped_dict = {} self.name = prefix if prefix is not None else "default"
self._dataframe = None self.times = {}
self.prefix = prefix if prefix is not None else "default" self.train_prevs = {}
self._times = {} self.target = "default"
self._prevs = {}
self._target = "default"
def append_row(self, base: np.ndarray | Tuple, **row): def append_row(self, base: np.ndarray | Tuple, **row):
if isinstance(base, np.ndarray): if isinstance(base, np.ndarray):
base = tuple(base.tolist()) base = tuple(base.tolist())
self.base.append(base) self._prevs.append(base)
for k, v in row.items(): for k, v in row.items():
if (k, self.prefix) in self.dict: if (k, self.name) in self._dict:
self.dict[(k, self.prefix)].append(v) self._dict[(k, self.name)].append(v)
else: else:
self.dict[(k, self.prefix)] = [v] self._dict[(k, self.name)] = [v]
self._grouped = False self._g_prevs = None
self._dataframe = None
@property @property
def columns(self): def columns(self):
return self.dict.keys() return self._dict.keys()
@property def groupby_prevs(self, metric: str = None):
def grouped(self): if self._g_dict is None:
if self._grouped: self._g_prevs = []
return self._grouped_dict self._g_dict = {k: [] for k in self._dict.keys()}
self._grouped_base = [] last_end = 0
self._grouped_dict = {k: [] for k in self.dict.keys()} for ind, bp in enumerate(self._prevs):
if ind < (len(self._prevs) - 1) and bp == self._prevs[ind + 1]:
continue
last_end = 0 self._g_prevs.append(bp)
for ind, bp in enumerate(self.base): for col in self._dict.keys():
if ind < (len(self.base) - 1) and bp == self.base[ind + 1]: self._g_dict[col].append(
continue stats.mean(self._dict[col][last_end : ind + 1])
)
self._grouped_base.append(bp) last_end = ind + 1
for col in self.dict.keys():
self._grouped_dict[col].append(
stats.mean(self.dict[col][last_end : ind + 1])
)
last_end = ind + 1 filtered_g_dict = self._g_dict
if metric is not None:
filtered_g_dict = {
c1: ls for ((c0, c1), ls) in self._g_dict.items() if c0 == metric
}
self._grouped = True return self._g_prevs, filtered_g_dict
return self._grouped_dict
@property def get_dataframe(self, metric="acc"):
def gbase(self): g_prevs, g_dict = self.groupby_prevs(metric=metric)
self.grouped return pd.DataFrame(
return self._grouped_base g_dict,
index=g_prevs,
def get_dataframe(self, metrics=None): columns=g_dict.keys(),
if self._dataframe is None: )
self_columns = sorted(self.columns, key=lambda c: c[0])
self._dataframe = pd.DataFrame(
self.grouped,
index=self.gbase,
columns=pd.MultiIndex.from_tuples(self_columns),
)
df = pd.DataFrame(self._dataframe)
if metrics is not None:
df = df.drop(
[(c0, c1) for (c0, c1) in df.columns if c0 not in metrics], axis=1
)
if len(set(k0 for k0, k1 in df.columns)) == 1:
df = df.droplevel(0, axis=1)
return df
def merge(self, other):
if not all(v1 == v2 for v1, v2 in zip(self.base, other.base)):
raise ValueError("other has not same base prevalences of self")
if len(set(self.dict.keys()).intersection(set(other.dict.keys()))) > 0:
raise ValueError("self and other have matching keys")
report = EvaluationReport()
report.base = self.base
report.dict = self.dict | other.dict
return report
@property
def times(self):
return self._times
@times.setter
def times(self, val):
self._times = val
@property
def prevs(self):
return self._prevs
@prevs.setter
def prevs(self, val):
self._prevs = val
@property
def target(self):
return self._target
@target.setter
def target(self, val):
self._target = val
def to_md(self, *metrics): def to_md(self, *metrics):
res = _fmt_line("target: " + self.target) res = ""
for k, v in self.prevs.items(): for k, v in self.train_prevs.items():
res += _fmt_line(f"{k}: {str(v)}") res += fmt_line_md(f"{k}: {str(v)}")
for k, v in self.times.items(): for k, v in self.times.items():
res += _fmt_line(f"{k}: {v:.3f}s") res += fmt_line_md(f"{k}: {v:.3f}s")
res += "\n" res += "\n"
for m in metrics: for m in metrics:
res += self.get_dataframe(metrics=m).to_html() + "\n\n" res += self.get_dataframe(metric=m).to_html() + "\n\n"
return res return res
def merge(self, other):
if not all(v1 == v2 for v1, v2 in zip(self._prevs, other._prevs)):
raise ValueError("other has not same base prevalences of self")
if len(set(self._dict.keys()).intersection(set(other._dict.keys()))) > 0:
raise ValueError("self and other have matching keys")
report = EvaluationReport()
report._prevs = self._prevs
report._dict = self._dict | other._dict
return report
@staticmethod @staticmethod
def combine_reports(*args): def combine_reports(*args, name="default"):
er = args[0] er = args[0]
for r in args[1:]: for r in args[1:]:
er = er.merge(r) er = er.merge(r)
er.name = name
return er return er
class DatasetReport:
def __init__(self, name):
self.name = name
self.ers: List[EvaluationReport] = []
def add(self, er: EvaluationReport):
self.ers.append(er)
def to_md(self, *metrics):
res = f"{self.name}\n\n"
for er in self.ers:
res += f"{er.to_md(*metrics)}\n\n"
return res
def __iter__(self):
return (er for er in self.ers)

View File

@ -1,33 +1,16 @@
import traceback import quacc.evaluation.comp as comp
import quacc.evaluation.method as method from quacc.dataset import Dataset
from quacc.environ import env
DATASET = "imdb"
OUTPUT_FILE = "out_" + DATASET + ".md"
TARGETS = {
"rcv1" : [
'C12',
'C13', 'C15', 'C151', 'C1511', 'C152', 'C17', 'C172',
'C18', 'C181', 'C21', 'C24', 'C31', 'C42', 'CCAT'
'E11', 'E12', 'E21', 'E211', 'E212', 'E41', 'E51', 'ECAT',
'G15', 'GCAT', 'GCRIM', 'GDIP', 'GPOL', 'GVIO', 'GVOTE', 'GWEA',
'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
'M142', 'M143', 'MCAT'
],
"spambase": ["default"],
"imdb": ["default"],
}
def estimate_comparison(): def estimate_comparison():
open(OUTPUT_FILE, "w").close() dataset = Dataset(
targets = TARGETS[DATASET] env.DATASET_NAME, target=env.DATASET_TARGET, n_prevalences=env.DATASET_N_PREVS
for target in targets: )
try: output_path = env.OUT_DIR / f"{dataset.name}.md"
er = method.evaluate_comparison(DATASET, target=target) with open(output_path, "w") as f:
er.target = target dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
with open(OUTPUT_FILE, "a") as f: f.write(dr.to_md("acc"))
f.write(er.to_md(["acc"], ["f1"]))
except Exception:
traceback.print_exc()
# print(df.to_latex(float_format="{:.4f}".format)) # print(df.to_latex(float_format="{:.4f}".format))
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format)) # print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))

View File

@ -1,7 +1,8 @@
import functools import functools
import pandas as pd import pandas as pd
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame: def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
if len(dfs) < 1: if len(dfs) < 1:
raise ValueError raise ValueError
@ -16,9 +17,7 @@ def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame: def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
def _reduce_func(s1, s2): def _reduce_func(s1, s2):
return { return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()
}
lst = df.to_dict(orient="records")[1:-1] lst = df.to_dict(orient="records")[1:-1]
summed_series = functools.reduce(_reduce_func, lst) summed_series = functools.reduce(_reduce_func, lst)
@ -29,3 +28,7 @@ def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
if n1 != "base" if n1 != "base"
} }
return pd.DataFrame([avg_report], columns=idx) return pd.DataFrame([avg_report], columns=idx)
def fmt_line_md(s):
return f"> {s} \n"