Dataset refactored, training sampling added
This commit is contained in:
parent
31e91c1626
commit
d906502c29
|
@ -12,3 +12,4 @@ elsahar19_rca/__pycache__/*
|
||||||
*.coverage
|
*.coverage
|
||||||
.coverage
|
.coverage
|
||||||
scp_sync.py
|
scp_sync.py
|
||||||
|
out/*
|
|
@ -0,0 +1,55 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title></title>
|
||||||
|
<style>
|
||||||
|
/* From extension vscode.github */
|
||||||
|
/*---------------------------------------------------------------------------------------------
|
||||||
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||||
|
*--------------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
.vscode-dark img[src$=\#gh-light-mode-only],
|
||||||
|
.vscode-light img[src$=\#gh-dark-mode-only] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
|
||||||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<style>
|
||||||
|
.task-list-item {
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.task-list-item-checkbox {
|
||||||
|
margin-left: -20px;
|
||||||
|
vertical-align: middle;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
<body class="vscode-body vscode-light">
|
||||||
|
<ul class="contains-task-list">
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere media tabelle</li>
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> plot; 3 tipi (appunti + email + garg)</li>
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> sistemare kfcv baseline</li>
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> aggiungere metodo con CC oltre SLD</li>
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</li>
|
||||||
|
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> variare parametro recalibration in SLD</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
14
TODO.md
14
TODO.md
|
@ -1,8 +1,6 @@
|
||||||
- aggiungere media tabelle
|
- [ ] aggiungere media tabelle
|
||||||
- plot
|
- [ ] plot; 3 tipi (appunti + email + garg)
|
||||||
- 3 tipi (vedi appunti + garg)
|
- [ ] sistemare kfcv baseline
|
||||||
- sistemare kfcv baseline
|
- [ ] aggiungere metodo con CC oltre SLD
|
||||||
- aggiungere metodo con CC oltre SLD
|
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
||||||
- prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50
|
- [ ] variare parametro recalibration in SLD
|
||||||
poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
|
||||||
- variare parametro recalibration in SLD
|
|
145
quacc/dataset.py
145
quacc/dataset.py
|
@ -1,26 +1,102 @@
|
||||||
from typing import Tuple
|
import math
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from quapy.data.base import LabelledCollection
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from quapy.data.base import LabelledCollection
|
||||||
from sklearn.conftest import fetch_rcv1
|
from sklearn.conftest import fetch_rcv1
|
||||||
|
|
||||||
TRAIN_VAL_PROP = 0.5
|
TRAIN_VAL_PROP = 0.5
|
||||||
|
|
||||||
|
|
||||||
def get_imdb(**kwargs) -> Tuple[LabelledCollection]:
|
class DatasetSample:
|
||||||
train, test = qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
|
def __init__(
|
||||||
train, validation = train.split_stratified(
|
self,
|
||||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
train: LabelledCollection,
|
||||||
)
|
validation: LabelledCollection,
|
||||||
return train, validation, test
|
test: LabelledCollection,
|
||||||
|
):
|
||||||
|
self.train = train
|
||||||
|
self.validation = validation
|
||||||
|
self.test = test
|
||||||
|
|
||||||
|
@property
|
||||||
|
def train_prev(self):
|
||||||
|
return self.train.prevalence()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def validation_prev(self):
|
||||||
|
return self.validation.prevalence()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def prevs(self):
|
||||||
|
return {"train": self.train_prev, "validation": self.validation_prev}
|
||||||
|
|
||||||
|
|
||||||
def get_spambase(**kwargs) -> Tuple[LabelledCollection]:
|
class Dataset:
|
||||||
train, test = qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
def __init__(self, name, n_prevalences=9, target=None):
|
||||||
train, validation = train.split_stratified(
|
self._name = name
|
||||||
|
self._target = target
|
||||||
|
self.n_prevs = n_prevalences
|
||||||
|
|
||||||
|
def __spambase(self):
|
||||||
|
return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
|
||||||
|
|
||||||
|
def __imdb(self):
|
||||||
|
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
||||||
|
|
||||||
|
def __rcv1(self):
|
||||||
|
n_train = 23149
|
||||||
|
available_targets = ["CCAT", "GCAT", "MCAT"]
|
||||||
|
|
||||||
|
if self._target is None or self._target not in available_targets:
|
||||||
|
raise ValueError("Invalid target")
|
||||||
|
|
||||||
|
dataset = fetch_rcv1()
|
||||||
|
target_index = np.where(dataset.target_names == self._target)[0]
|
||||||
|
all_train_d, test_d = dataset.data[:n_train, :], dataset.data[n_train:, :]
|
||||||
|
labels = dataset.target[:, target_index].toarray().flatten()
|
||||||
|
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
||||||
|
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
||||||
|
test = LabelledCollection(test_d, test_l, classes=[0, 1])
|
||||||
|
|
||||||
|
return all_train, test
|
||||||
|
|
||||||
|
def get(self) -> List[DatasetSample]:
|
||||||
|
all_train, test = {
|
||||||
|
"spambase": self.__spambase,
|
||||||
|
"imdb": self.__imdb,
|
||||||
|
"rcv1": self.__rcv1,
|
||||||
|
}[self._name]()
|
||||||
|
|
||||||
|
# resample all_train set to have (0.5, 0.5) prevalence
|
||||||
|
at_positives = np.sum(all_train.y)
|
||||||
|
all_train = all_train.sampling(
|
||||||
|
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# sample prevalences
|
||||||
|
prevalences = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||||
|
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevalences)
|
||||||
|
datasets = []
|
||||||
|
for p in prevalences:
|
||||||
|
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
||||||
|
train, validation = all_train_sampled.split_stratified(
|
||||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||||
)
|
)
|
||||||
return train, validation, test
|
datasets.append(DatasetSample(train, validation, test))
|
||||||
|
|
||||||
|
return datasets
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
return self.get()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
if self._name == "rcv1":
|
||||||
|
return f"{self._name}_{self._target}"
|
||||||
|
else:
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
|
||||||
# >>> fetch_rcv1().target_names
|
# >>> fetch_rcv1().target_names
|
||||||
|
@ -39,33 +115,30 @@ def get_spambase(**kwargs) -> Tuple[LabelledCollection]:
|
||||||
# 'M142', 'M143', 'MCAT'], dtype=object)
|
# 'M142', 'M143', 'MCAT'], dtype=object)
|
||||||
|
|
||||||
|
|
||||||
def get_rcv1(target = "default", **kwargs):
|
def rcv1_info():
|
||||||
sample_size = qp.environ["SAMPLE_SIZE"]
|
|
||||||
n_train = 23149
|
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1()
|
||||||
|
n_train = 23149
|
||||||
|
|
||||||
if target == "default":
|
targets = []
|
||||||
target = "C12"
|
for target in range(103):
|
||||||
|
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
||||||
if target not in dataset.target_names:
|
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
||||||
raise ValueError("Invalid target")
|
targets.append(
|
||||||
|
(
|
||||||
def dataset_split(data, labels, classes=[0, 1]) -> Tuple[LabelledCollection]:
|
dataset.target_names[target],
|
||||||
all_train_d, test_d = data[:n_train, :], data[n_train:, :]
|
{
|
||||||
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
"train": (1.0 - train_t_prev, train_t_prev),
|
||||||
all_train = LabelledCollection(all_train_d, all_train_l, classes=classes)
|
"test": (1.0 - test_t_prev, test_t_prev),
|
||||||
test = LabelledCollection(test_d, test_l, classes=classes)
|
},
|
||||||
train, validation = all_train.split_stratified(
|
)
|
||||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
|
||||||
)
|
)
|
||||||
return train, validation, test
|
|
||||||
|
|
||||||
target_index = np.where(dataset.target_names == target)[0]
|
targets.sort(key=lambda t: t[1]["train"][1])
|
||||||
target_labels = dataset.target[:, target_index].toarray().flatten()
|
for n, d in targets:
|
||||||
|
print(f"{n}:")
|
||||||
|
for k, (fp, tp) in d.items():
|
||||||
|
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
||||||
|
|
||||||
if np.sum(target_labels[n_train:]) < sample_size:
|
|
||||||
raise ValueError("Target has too few positive samples")
|
|
||||||
|
|
||||||
d = dataset_split(dataset.data, target_labels, classes=[0, 1])
|
if __name__ == "__main__":
|
||||||
|
rcv1_info()
|
||||||
return d
|
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
defalut_env = {
|
||||||
|
"DATASET_NAME": "rcv1",
|
||||||
|
"DATASET_TARGET": "CCAT",
|
||||||
|
"COMP_ESTIMATORS": [
|
||||||
|
"OUR_BIN_SLD",
|
||||||
|
"OUR_MUL_SLD",
|
||||||
|
"KFCV",
|
||||||
|
"ATC_MC",
|
||||||
|
"ATC_NE",
|
||||||
|
"DOC_FEAT",
|
||||||
|
# "RCA",
|
||||||
|
# "RCA_STAR",
|
||||||
|
],
|
||||||
|
"DATASET_N_PREVS": 9,
|
||||||
|
"OUT_DIR": Path("out"),
|
||||||
|
"PLOT_OUT_DIR": Path("out/plot"),
|
||||||
|
"PROTOCOL_N_PREVS": 21,
|
||||||
|
"PROTOCOL_REPEATS": 100,
|
||||||
|
"SAMPLE_SIZE": 1000,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Environ:
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
self.__setattr__(k, v)
|
||||||
|
|
||||||
|
|
||||||
|
env = Environ(**defalut_env)
|
|
@ -1,29 +1,28 @@
|
||||||
from statistics import mean
|
from statistics import mean
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from quapy.data import LabelledCollection
|
|
||||||
from sklearn.base import BaseEstimator
|
|
||||||
from sklearn.model_selection import cross_validate
|
|
||||||
import sklearn.metrics as metrics
|
import sklearn.metrics as metrics
|
||||||
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import (
|
from quapy.protocol import (
|
||||||
AbstractStochasticSeededProtocol,
|
AbstractStochasticSeededProtocol,
|
||||||
OnLabelledCollectionProtocol,
|
OnLabelledCollectionProtocol,
|
||||||
)
|
)
|
||||||
|
from sklearn.base import BaseEstimator
|
||||||
from .report import EvaluationReport
|
from sklearn.model_selection import cross_validate
|
||||||
|
|
||||||
import elsahar19_rca.rca as rca
|
import elsahar19_rca.rca as rca
|
||||||
import garg22_ATC.ATC_helper as atc
|
import garg22_ATC.ATC_helper as atc
|
||||||
import guillory21_doc.doc as doc
|
import guillory21_doc.doc as doc
|
||||||
import jiang18_trustscore.trustscore as trustscore
|
import jiang18_trustscore.trustscore as trustscore
|
||||||
|
|
||||||
|
from .report import EvaluationReport
|
||||||
|
|
||||||
|
|
||||||
def kfcv(
|
def kfcv(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict"
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
|
@ -42,7 +41,7 @@ def kfcv(
|
||||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc_score=(1. - acc_score),
|
acc_score=(1.0 - acc_score),
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
acc=meta_acc,
|
acc=meta_acc,
|
||||||
f1=meta_f1,
|
f1=meta_f1,
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
import multiprocessing
|
||||||
|
import time
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.protocol import APP
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
from quacc.dataset import Dataset
|
||||||
|
from quacc.environ import env
|
||||||
|
from quacc.evaluation import baseline, method
|
||||||
|
from quacc.evaluation.report import DatasetReport, EvaluationReport
|
||||||
|
|
||||||
|
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||||
|
|
||||||
|
pd.set_option("display.float_format", "{:.4f}".format)
|
||||||
|
|
||||||
|
|
||||||
|
class CompEstimator:
|
||||||
|
__dict = {
|
||||||
|
"OUR_BIN_SLD": method.evaluate_bin_sld,
|
||||||
|
"OUR_MUL_SLD": method.evaluate_mul_sld,
|
||||||
|
"KFCV": baseline.kfcv,
|
||||||
|
"ATC_MC": baseline.atc_mc,
|
||||||
|
"ATC_NE": baseline.atc_ne,
|
||||||
|
"DOC_FEAT": baseline.doc_feat,
|
||||||
|
"RCA": baseline.rca_score,
|
||||||
|
"RCA_STAR": baseline.rca_star_score,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __class_getitem__(cls, e: str | List[str]):
|
||||||
|
if isinstance(e, str):
|
||||||
|
try:
|
||||||
|
return cls.__dict[e]
|
||||||
|
except KeyError:
|
||||||
|
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
||||||
|
elif isinstance(e, list):
|
||||||
|
try:
|
||||||
|
return [cls.__dict[est] for est in e]
|
||||||
|
except KeyError as ke:
|
||||||
|
raise KeyError(
|
||||||
|
f"Invalid estimator: estimator {ke.args[0]} does not exist"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
CE = CompEstimator
|
||||||
|
|
||||||
|
|
||||||
|
def fit_and_estimate(_estimate, train, validation, test):
|
||||||
|
model = LogisticRegression()
|
||||||
|
|
||||||
|
model.fit(*train.Xy)
|
||||||
|
protocol = APP(
|
||||||
|
test, n_prevalences=env.PROTOCOL_N_PREVS, repeats=env.PROTOCOL_REPEATS
|
||||||
|
)
|
||||||
|
start = time.time()
|
||||||
|
result = _estimate(model, validation, protocol)
|
||||||
|
end = time.time()
|
||||||
|
print(f"{_estimate.__name__}: {end-start:.2f}s")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": _estimate.__name__,
|
||||||
|
"result": result,
|
||||||
|
"time": end - start,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_comparison(
|
||||||
|
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
|
||||||
|
) -> EvaluationReport:
|
||||||
|
with multiprocessing.Pool(8) as pool:
|
||||||
|
dr = DatasetReport(dataset.name)
|
||||||
|
for d in dataset():
|
||||||
|
print(f"train prev.: {d.train_prev}")
|
||||||
|
start = time.time()
|
||||||
|
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
|
||||||
|
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
|
||||||
|
results = list(map(lambda r: r.get(), results))
|
||||||
|
er = EvaluationReport.combine_reports(
|
||||||
|
*list(map(lambda r: r["result"], results)), name=dataset.name
|
||||||
|
)
|
||||||
|
times = {r["name"]: r["time"] for r in results}
|
||||||
|
end = time.time()
|
||||||
|
times["tot"] = end - start
|
||||||
|
er.times = times
|
||||||
|
er.train_prevs = d.prevs
|
||||||
|
dr.add(er)
|
||||||
|
print()
|
||||||
|
|
||||||
|
return dr
|
|
@ -1,20 +1,11 @@
|
||||||
import multiprocessing
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import quapy as qp
|
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import (
|
from quapy.protocol import (
|
||||||
APP,
|
|
||||||
AbstractStochasticSeededProtocol,
|
AbstractStochasticSeededProtocol,
|
||||||
OnLabelledCollectionProtocol,
|
OnLabelledCollectionProtocol,
|
||||||
)
|
)
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.linear_model import LogisticRegression
|
|
||||||
|
|
||||||
import quacc.error as error
|
import quacc.error as error
|
||||||
import quacc.evaluation.baseline as baseline
|
|
||||||
from quacc.dataset import get_imdb, get_rcv1, get_spambase
|
|
||||||
from quacc.evaluation.report import EvaluationReport
|
from quacc.evaluation.report import EvaluationReport
|
||||||
|
|
||||||
from ..estimator import (
|
from ..estimator import (
|
||||||
|
@ -23,13 +14,6 @@ from ..estimator import (
|
||||||
MulticlassAccuracyEstimator,
|
MulticlassAccuracyEstimator,
|
||||||
)
|
)
|
||||||
|
|
||||||
qp.environ["SAMPLE_SIZE"] = 100
|
|
||||||
|
|
||||||
pd.set_option("display.float_format", "{:.4f}".format)
|
|
||||||
|
|
||||||
n_prevalences = 21
|
|
||||||
repreats = 100
|
|
||||||
|
|
||||||
|
|
||||||
def estimate(
|
def estimate(
|
||||||
estimator: AccuracyEstimator,
|
estimator: AccuracyEstimator,
|
||||||
|
@ -62,10 +46,10 @@ def evaluation_report(
|
||||||
f1_score = error.f1(estim_prev)
|
f1_score = error.f1(estim_prev)
|
||||||
report.append_row(
|
report.append_row(
|
||||||
base_prev,
|
base_prev,
|
||||||
acc_score=1. - acc_score,
|
acc_score=1.0 - acc_score,
|
||||||
acc=abs(error.acc(true_prev) - acc_score),
|
acc=abs(error.acc(true_prev) - acc_score),
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
f1=abs(error.f1(true_prev) - f1_score)
|
f1=abs(error.f1(true_prev) - f1_score),
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
@ -85,65 +69,17 @@ def evaluate(
|
||||||
return evaluation_report(estimator, protocol, method)
|
return evaluation_report(estimator, protocol, method)
|
||||||
|
|
||||||
|
|
||||||
def evaluate_binary(model, validation, protocol):
|
def evaluate_bin_sld(
|
||||||
return evaluate(model, validation, protocol, "bin")
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
|
) -> EvaluationReport:
|
||||||
|
return evaluate(c_model, validation, protocol, "bin")
|
||||||
|
|
||||||
|
|
||||||
def evaluate_multiclass(model, validation, protocol):
|
def evaluate_mul_sld(
|
||||||
return evaluate(model, validation, protocol, "mul")
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
def fit_and_estimate(_estimate, train, validation, test):
|
) -> EvaluationReport:
|
||||||
model = LogisticRegression()
|
return evaluate(c_model, validation, protocol, "mul")
|
||||||
|
|
||||||
model.fit(*train.Xy)
|
|
||||||
protocol = APP(test, n_prevalences=n_prevalences, repeats=repreats)
|
|
||||||
start = time.time()
|
|
||||||
result = _estimate(model, validation, protocol)
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"name": _estimate.__name__,
|
|
||||||
"result": result,
|
|
||||||
"time": end - start,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_comparison(dataset: str, **kwargs) -> EvaluationReport:
|
|
||||||
train, validation, test = {
|
|
||||||
"spambase": get_spambase,
|
|
||||||
"imdb": get_imdb,
|
|
||||||
"rcv1": get_rcv1,
|
|
||||||
}[dataset](**kwargs)
|
|
||||||
|
|
||||||
for k,v in kwargs.items():
|
|
||||||
print(k, ":", v)
|
|
||||||
|
|
||||||
prevs = {
|
|
||||||
"train": train.prevalence(),
|
|
||||||
"validation": validation.prevalence(),
|
|
||||||
}
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
with multiprocessing.Pool(8) as pool:
|
|
||||||
estimators = [
|
|
||||||
evaluate_binary,
|
|
||||||
evaluate_multiclass,
|
|
||||||
baseline.kfcv,
|
|
||||||
baseline.atc_mc,
|
|
||||||
baseline.atc_ne,
|
|
||||||
baseline.doc_feat,
|
|
||||||
baseline.rca_score,
|
|
||||||
baseline.rca_star_score,
|
|
||||||
]
|
|
||||||
tasks = [(estim, train, validation, test) for estim in estimators]
|
|
||||||
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
|
|
||||||
results = list(map(lambda r: r.get(), results))
|
|
||||||
er = EvaluationReport.combine_reports(*list(map(lambda r: r["result"], results)))
|
|
||||||
times = {r["name"]:r["time"] for r in results}
|
|
||||||
end = time.time()
|
|
||||||
times["tot"] = end - start
|
|
||||||
er.times = times
|
|
||||||
er.prevs = prevs
|
|
||||||
|
|
||||||
return er
|
|
||||||
|
|
|
@ -1,143 +1,122 @@
|
||||||
from typing import Tuple
|
import math
|
||||||
import statistics as stats
|
import statistics as stats
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from quacc import plot
|
||||||
def _fmt_line(s):
|
from quacc.utils import fmt_line_md
|
||||||
return f"> {s} \n"
|
|
||||||
|
|
||||||
|
|
||||||
class EvaluationReport:
|
class EvaluationReport:
|
||||||
def __init__(self, prefix=None):
|
def __init__(self, prefix=None):
|
||||||
self.base = []
|
self._prevs = []
|
||||||
self.dict = {}
|
self._dict = {}
|
||||||
self._grouped = False
|
self._g_prevs = None
|
||||||
self._grouped_base = []
|
self._g_dict = None
|
||||||
self._grouped_dict = {}
|
self.name = prefix if prefix is not None else "default"
|
||||||
self._dataframe = None
|
self.times = {}
|
||||||
self.prefix = prefix if prefix is not None else "default"
|
self.train_prevs = {}
|
||||||
self._times = {}
|
self.target = "default"
|
||||||
self._prevs = {}
|
|
||||||
self._target = "default"
|
|
||||||
|
|
||||||
def append_row(self, base: np.ndarray | Tuple, **row):
|
def append_row(self, base: np.ndarray | Tuple, **row):
|
||||||
if isinstance(base, np.ndarray):
|
if isinstance(base, np.ndarray):
|
||||||
base = tuple(base.tolist())
|
base = tuple(base.tolist())
|
||||||
self.base.append(base)
|
self._prevs.append(base)
|
||||||
for k, v in row.items():
|
for k, v in row.items():
|
||||||
if (k, self.prefix) in self.dict:
|
if (k, self.name) in self._dict:
|
||||||
self.dict[(k, self.prefix)].append(v)
|
self._dict[(k, self.name)].append(v)
|
||||||
else:
|
else:
|
||||||
self.dict[(k, self.prefix)] = [v]
|
self._dict[(k, self.name)] = [v]
|
||||||
self._grouped = False
|
self._g_prevs = None
|
||||||
self._dataframe = None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def columns(self):
|
def columns(self):
|
||||||
return self.dict.keys()
|
return self._dict.keys()
|
||||||
|
|
||||||
@property
|
def groupby_prevs(self, metric: str = None):
|
||||||
def grouped(self):
|
if self._g_dict is None:
|
||||||
if self._grouped:
|
self._g_prevs = []
|
||||||
return self._grouped_dict
|
self._g_dict = {k: [] for k in self._dict.keys()}
|
||||||
|
|
||||||
self._grouped_base = []
|
|
||||||
self._grouped_dict = {k: [] for k in self.dict.keys()}
|
|
||||||
|
|
||||||
last_end = 0
|
last_end = 0
|
||||||
for ind, bp in enumerate(self.base):
|
for ind, bp in enumerate(self._prevs):
|
||||||
if ind < (len(self.base) - 1) and bp == self.base[ind + 1]:
|
if ind < (len(self._prevs) - 1) and bp == self._prevs[ind + 1]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._grouped_base.append(bp)
|
self._g_prevs.append(bp)
|
||||||
for col in self.dict.keys():
|
for col in self._dict.keys():
|
||||||
self._grouped_dict[col].append(
|
self._g_dict[col].append(
|
||||||
stats.mean(self.dict[col][last_end : ind + 1])
|
stats.mean(self._dict[col][last_end : ind + 1])
|
||||||
)
|
)
|
||||||
|
|
||||||
last_end = ind + 1
|
last_end = ind + 1
|
||||||
|
|
||||||
self._grouped = True
|
filtered_g_dict = self._g_dict
|
||||||
return self._grouped_dict
|
if metric is not None:
|
||||||
|
filtered_g_dict = {
|
||||||
|
c1: ls for ((c0, c1), ls) in self._g_dict.items() if c0 == metric
|
||||||
|
}
|
||||||
|
|
||||||
@property
|
return self._g_prevs, filtered_g_dict
|
||||||
def gbase(self):
|
|
||||||
self.grouped
|
|
||||||
return self._grouped_base
|
|
||||||
|
|
||||||
def get_dataframe(self, metrics=None):
|
def get_dataframe(self, metric="acc"):
|
||||||
if self._dataframe is None:
|
g_prevs, g_dict = self.groupby_prevs(metric=metric)
|
||||||
self_columns = sorted(self.columns, key=lambda c: c[0])
|
return pd.DataFrame(
|
||||||
self._dataframe = pd.DataFrame(
|
g_dict,
|
||||||
self.grouped,
|
index=g_prevs,
|
||||||
index=self.gbase,
|
columns=g_dict.keys(),
|
||||||
columns=pd.MultiIndex.from_tuples(self_columns),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
df = pd.DataFrame(self._dataframe)
|
|
||||||
if metrics is not None:
|
|
||||||
df = df.drop(
|
|
||||||
[(c0, c1) for (c0, c1) in df.columns if c0 not in metrics], axis=1
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(set(k0 for k0, k1 in df.columns)) == 1:
|
|
||||||
df = df.droplevel(0, axis=1)
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def merge(self, other):
|
|
||||||
if not all(v1 == v2 for v1, v2 in zip(self.base, other.base)):
|
|
||||||
raise ValueError("other has not same base prevalences of self")
|
|
||||||
|
|
||||||
if len(set(self.dict.keys()).intersection(set(other.dict.keys()))) > 0:
|
|
||||||
raise ValueError("self and other have matching keys")
|
|
||||||
|
|
||||||
report = EvaluationReport()
|
|
||||||
report.base = self.base
|
|
||||||
report.dict = self.dict | other.dict
|
|
||||||
return report
|
|
||||||
|
|
||||||
@property
|
|
||||||
def times(self):
|
|
||||||
return self._times
|
|
||||||
|
|
||||||
@times.setter
|
|
||||||
def times(self, val):
|
|
||||||
self._times = val
|
|
||||||
|
|
||||||
@property
|
|
||||||
def prevs(self):
|
|
||||||
return self._prevs
|
|
||||||
|
|
||||||
@prevs.setter
|
|
||||||
def prevs(self, val):
|
|
||||||
self._prevs = val
|
|
||||||
|
|
||||||
@property
|
|
||||||
def target(self):
|
|
||||||
return self._target
|
|
||||||
|
|
||||||
@target.setter
|
|
||||||
def target(self, val):
|
|
||||||
self._target = val
|
|
||||||
|
|
||||||
def to_md(self, *metrics):
|
def to_md(self, *metrics):
|
||||||
res = _fmt_line("target: " + self.target)
|
res = ""
|
||||||
for k, v in self.prevs.items():
|
for k, v in self.train_prevs.items():
|
||||||
res += _fmt_line(f"{k}: {str(v)}")
|
res += fmt_line_md(f"{k}: {str(v)}")
|
||||||
for k, v in self.times.items():
|
for k, v in self.times.items():
|
||||||
res += _fmt_line(f"{k}: {v:.3f}s")
|
res += fmt_line_md(f"{k}: {v:.3f}s")
|
||||||
res += "\n"
|
res += "\n"
|
||||||
for m in metrics:
|
for m in metrics:
|
||||||
res += self.get_dataframe(metrics=m).to_html() + "\n\n"
|
res += self.get_dataframe(metric=m).to_html() + "\n\n"
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def merge(self, other):
|
||||||
|
if not all(v1 == v2 for v1, v2 in zip(self._prevs, other._prevs)):
|
||||||
|
raise ValueError("other has not same base prevalences of self")
|
||||||
|
|
||||||
|
if len(set(self._dict.keys()).intersection(set(other._dict.keys()))) > 0:
|
||||||
|
raise ValueError("self and other have matching keys")
|
||||||
|
|
||||||
|
report = EvaluationReport()
|
||||||
|
report._prevs = self._prevs
|
||||||
|
report._dict = self._dict | other._dict
|
||||||
|
return report
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def combine_reports(*args):
|
def combine_reports(*args, name="default"):
|
||||||
er = args[0]
|
er = args[0]
|
||||||
for r in args[1:]:
|
for r in args[1:]:
|
||||||
er = er.merge(r)
|
er = er.merge(r)
|
||||||
|
|
||||||
|
er.name = name
|
||||||
return er
|
return er
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetReport:
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.ers: List[EvaluationReport] = []
|
||||||
|
|
||||||
|
def add(self, er: EvaluationReport):
|
||||||
|
self.ers.append(er)
|
||||||
|
|
||||||
|
def to_md(self, *metrics):
|
||||||
|
res = f"{self.name}\n\n"
|
||||||
|
for er in self.ers:
|
||||||
|
res += f"{er.to_md(*metrics)}\n\n"
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (er for er in self.ers)
|
||||||
|
|
|
@ -1,33 +1,16 @@
|
||||||
import traceback
|
import quacc.evaluation.comp as comp
|
||||||
import quacc.evaluation.method as method
|
from quacc.dataset import Dataset
|
||||||
|
from quacc.environ import env
|
||||||
|
|
||||||
DATASET = "imdb"
|
|
||||||
OUTPUT_FILE = "out_" + DATASET + ".md"
|
|
||||||
TARGETS = {
|
|
||||||
"rcv1" : [
|
|
||||||
'C12',
|
|
||||||
'C13', 'C15', 'C151', 'C1511', 'C152', 'C17', 'C172',
|
|
||||||
'C18', 'C181', 'C21', 'C24', 'C31', 'C42', 'CCAT'
|
|
||||||
'E11', 'E12', 'E21', 'E211', 'E212', 'E41', 'E51', 'ECAT',
|
|
||||||
'G15', 'GCAT', 'GCRIM', 'GDIP', 'GPOL', 'GVIO', 'GVOTE', 'GWEA',
|
|
||||||
'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
|
|
||||||
'M142', 'M143', 'MCAT'
|
|
||||||
],
|
|
||||||
"spambase": ["default"],
|
|
||||||
"imdb": ["default"],
|
|
||||||
}
|
|
||||||
|
|
||||||
def estimate_comparison():
|
def estimate_comparison():
|
||||||
open(OUTPUT_FILE, "w").close()
|
dataset = Dataset(
|
||||||
targets = TARGETS[DATASET]
|
env.DATASET_NAME, target=env.DATASET_TARGET, n_prevalences=env.DATASET_N_PREVS
|
||||||
for target in targets:
|
)
|
||||||
try:
|
output_path = env.OUT_DIR / f"{dataset.name}.md"
|
||||||
er = method.evaluate_comparison(DATASET, target=target)
|
with open(output_path, "w") as f:
|
||||||
er.target = target
|
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
|
||||||
with open(OUTPUT_FILE, "a") as f:
|
f.write(dr.to_md("acc"))
|
||||||
f.write(er.to_md(["acc"], ["f1"]))
|
|
||||||
except Exception:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
# print(df.to_latex(float_format="{:.4f}".format))
|
# print(df.to_latex(float_format="{:.4f}".format))
|
||||||
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||||
if len(dfs) < 1:
|
if len(dfs) < 1:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
@ -16,9 +17,7 @@ def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||||
|
|
||||||
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
def _reduce_func(s1, s2):
|
def _reduce_func(s1, s2):
|
||||||
return {
|
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
|
||||||
(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
lst = df.to_dict(orient="records")[1:-1]
|
lst = df.to_dict(orient="records")[1:-1]
|
||||||
summed_series = functools.reduce(_reduce_func, lst)
|
summed_series = functools.reduce(_reduce_func, lst)
|
||||||
|
@ -29,3 +28,7 @@ def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
if n1 != "base"
|
if n1 != "base"
|
||||||
}
|
}
|
||||||
return pd.DataFrame([avg_report], columns=idx)
|
return pd.DataFrame([avg_report], columns=idx)
|
||||||
|
|
||||||
|
|
||||||
|
def fmt_line_md(s):
|
||||||
|
return f"> {s} \n"
|
||||||
|
|
Loading…
Reference in New Issue