switching

This commit is contained in:
Alejandro Moreo Fernandez 2024-04-11 11:33:48 +02:00
parent 6c3c604a43
commit d5b8d68f03
6 changed files with 91 additions and 591 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "result_table"]
path = result_table
url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git

View File

@ -1,5 +1,8 @@
from ClassifierAccuracy.gen_tables import gen_tables
from ClassifierAccuracy.util.commons import * from ClassifierAccuracy.util.commons import *
from ClassifierAccuracy.util.generators import *
from ClassifierAccuracy.util.plotting import plot_diagonal from ClassifierAccuracy.util.plotting import plot_diagonal
from quapy.protocol import UPP
PROBLEM = 'multiclass' PROBLEM = 'multiclass'
ORACLE = False ORACLE = False
@ -53,7 +56,7 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier
# instances of CAPContingencyTable instead are generic, and the evaluation measure can # instances of CAPContingencyTable instead are generic, and the evaluation measure can
# be nested to the predictions to speed up things # be nested to the predictions to speed up things
for (method_name, method) in gen_CAP_cont_table(h): for (method_name, method) in gen_CAP_cont_table(h):
if not any_missing(basedir, cls_name, dataset_name, method_name): if not any_missing(basedir, cls_name, dataset_name, method_name, acc_measures):
print(f'\t\tmethod {method_name} has all results already computed. Skipping.') print(f'\t\tmethod {method_name} has all results already computed. Skipping.')
continue continue

View File

@ -1,3 +1,77 @@
from ClassifierAccuracy.util.commons import gen_tables
gen_tables() def gen_tables(basedir, datasets):
mock_h = LogisticRegression(),
methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
classifiers = [classifier for classifier, _ in gen_classifiers()]
os.makedirs('./tables', exist_ok=True)
with_oracle = 'oracle' in basedir
tex_doc = """
\\documentclass[10pt,a4paper]{article}
\\usepackage[utf8]{inputenc}
\\usepackage{amsmath}
\\usepackage{amsfonts}
\\usepackage{amssymb}
\\usepackage{graphicx}
\\usepackage{tabularx}
\\usepackage{color}
\\usepackage{colortbl}
\\usepackage{xcolor}
\\begin{document}
"""
for classifier in classifiers:
for metric in [measure for measure, _ in gen_acc_measure()]:
table = Table(datasets, methods, prec_mean=5, clean_zero=True)
for method, dataset in itertools.product(methods, datasets):
path = getpath(basedir, classifier, metric, dataset, method)
if not os.path.exists(path):
print('missing ', path)
continue
results = json.load(open(path, 'r'))
true_acc = results['true_acc']
estim_acc = np.asarray(results['estim_acc'])
if any(np.isnan(estim_acc)):
print(f'nan values found in {method=} {dataset=}')
continue
if any(estim_acc>1.00001):
print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]')
continue
if any(estim_acc<-0.00001):
print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]')
continue
errors = cap_errors(true_acc, estim_acc)
table.add(dataset, method, errors)
tex = table.latexTabular()
table_name = f'{basedir}_{classifier}_{metric}.tex'
table_name = table_name.replace('/', '_')
with open(f'./tables/{table_name}', 'wt') as foo:
foo.write('\\begin{table}[h]\n')
foo.write('\\centering\n')
foo.write('\\resizebox{\\textwidth}{!}{%\n')
foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
foo.write(tex)
foo.write('\\end{tabular}%\n')
foo.write('}\n')
foo.write('\\caption{Classifier ' + classifier.replace('_', ' ') + ('(oracle)' if with_oracle else '') +
' evaluated in terms of ' + metric.replace('_', ' ') + '}\n')
foo.write('\\end{table}\n')
tex_doc += "\input{" + table_name + "}\n\n"
tex_doc += """
\\end{document}
"""
with open(f'./tables/main.tex', 'wt') as foo:
foo.write(tex_doc)
print("[Tables Done] runing latex")
os.chdir('./tables/')
os.system('pdflatex main.tex')
os.system('rm main.aux main.log')

View File

@ -1,3 +1,6 @@
from sklearn.base import BaseEstimator
import quapy as qp
import itertools import itertools
import json import json
import os import os
@ -6,139 +9,13 @@ from glob import glob
from pathlib import Path from pathlib import Path
from time import time from time import time
import numpy as np import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.datasets import fetch_rcv1, fetch_20newsgroups
from sklearn.model_selection import GridSearchCV
from ClassifierAccuracy.models_multiclass import *
from ClassifierAccuracy.util.tabular import Table from ClassifierAccuracy.util.tabular import Table
from quapy.protocol import OnLabelledCollectionProtocol, AbstractStochasticSeededProtocol from quapy.protocol import OnLabelledCollectionProtocol, AbstractStochasticSeededProtocol
from quapy.method.aggregative import EMQ, ACC, KDEyML
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS, fetch_lequa2022, TWITTER_SENTIMENT_DATASETS_TEST
from quapy.data.datasets import fetch_reviews
def gen_classifiers():
param_grid = {
'C': np.logspace(-4, -4, 9),
'class_weight': ['balanced', None]
}
yield 'LR', LogisticRegression()
#yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
#yield 'NB', GaussianNB()
#yield 'SVM(rbf)', SVC()
#yield 'SVM(linear)', LinearSVC()
def gen_multi_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
for dataset_name in UCI_MULTICLASS_DATASETS:
if dataset_name == 'wine-quality':
continue
if only_names:
yield dataset_name, None
else:
dataset = fetch_UCIMulticlassLabelledCollection(dataset_name)
yield dataset_name, split(dataset)
# yields the 20 newsgroups dataset
if only_names:
yield "20news", None
else:
train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'))
test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'))
tfidf = TfidfVectorizer(min_df=5, sublinear_tf=True)
Xtr = tfidf.fit_transform(train.data)
Xte = tfidf.transform((test.data))
train = LabelledCollection(instances=Xtr, labels=train.target)
U = LabelledCollection(instances=Xte, labels=test.target)
T, V = train.split_stratified(train_prop=0.5, random_state=0)
yield "20news", (T, V, U)
# yields the T1B@LeQua2022 (training) dataset
if only_names:
yield "T1B-LeQua2022", None
else:
train, _, _ = fetch_lequa2022(task='T1B')
yield "T1B-LeQua2022", split(train)
def gen_tweet_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
if only_names:
yield dataset_name, None
else:
data = qp.datasets.fetch_twitter(dataset_name, min_df=3, pickle=True)
T, V = data.training.split_stratified(0.5, random_state=0)
U = data.test
yield dataset_name, (T, V, U)
def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
if only_names:
for dataset_name in ['imdb', 'CCAT', 'GCAT', 'MCAT']:
yield dataset_name, None
else:
train, U = fetch_reviews('imdb', tfidf=True, min_df=10, pickle=True).train_test
L, V = train.split_stratified(0.5, random_state=0)
yield 'imdb', (L, V, U)
training = fetch_rcv1(subset='train')
test = fetch_rcv1(subset='test')
class_names = training.target_names.tolist()
for cat in ['CCAT', 'GCAT', 'MCAT']:
class_idx = class_names.index(cat)
tr_labels = training.target[:,class_idx].toarray().flatten()
te_labels = test.target[:,class_idx].toarray().flatten()
tr = LabelledCollection(training.data, tr_labels)
U = LabelledCollection(test.data, te_labels)
L, V = tr.split_stratified(train_prop=0.5, random_state=0)
yield cat, (L, V, U)
def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]:
#yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
# yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
#yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
#yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
#yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
# yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
# yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE'])
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
acc_fn = None
yield 'Naive', NaiveCAP(h, acc_fn)
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
# yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
# yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
#yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
#yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
#yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
# yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True)
#yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True)
#yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
#yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
#yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
#yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
#yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
def get_method_names():
mock_h = LogisticRegression()
return [m for m, _ in gen_CAP(mock_h, None)] + [m for m, _ in gen_CAP_cont_table(mock_h)]
def gen_acc_measure():
yield 'vanilla_accuracy', vanilla_acc_fn
yield 'macro-F1', macrof1_fn
def split(data: LabelledCollection): def split(data: LabelledCollection):
@ -177,8 +54,8 @@ def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False):
return estim_accs_dict, t_test_ave return estim_accs_dict, t_test_ave
def any_missing(basedir, cls_name, dataset_name, method_name): def any_missing(basedir, cls_name, dataset_name, method_name, acc_measures):
for acc_name, _ in gen_acc_measure(): for acc_name in acc_measures():
if not os.path.exists(getpath(basedir, cls_name, acc_name, dataset_name, method_name)): if not os.path.exists(getpath(basedir, cls_name, acc_name, dataset_name, method_name)):
return True return True
return False return False
@ -322,114 +199,5 @@ def get_dataset_stats(path, test_prot, L, V):
save_json_file(path, info) save_json_file(path, info)
def gen_tables(basedir, datasets):
mock_h = LogisticRegression(),
methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
classifiers = [classifier for classifier, _ in gen_classifiers()]
os.makedirs('./tables', exist_ok=True)
with_oracle = 'oracle' in basedir
tex_doc = """
\\documentclass[10pt,a4paper]{article}
\\usepackage[utf8]{inputenc}
\\usepackage{amsmath}
\\usepackage{amsfonts}
\\usepackage{amssymb}
\\usepackage{graphicx}
\\usepackage{tabularx}
\\usepackage{color}
\\usepackage{colortbl}
\\usepackage{xcolor}
\\begin{document}
"""
for classifier in classifiers:
for metric in [measure for measure, _ in gen_acc_measure()]:
table = Table(datasets, methods, prec_mean=5, clean_zero=True)
for method, dataset in itertools.product(methods, datasets):
path = getpath(basedir, classifier, metric, dataset, method)
if not os.path.exists(path):
print('missing ', path)
continue
results = json.load(open(path, 'r'))
true_acc = results['true_acc']
estim_acc = np.asarray(results['estim_acc'])
if any(np.isnan(estim_acc)):
print(f'nan values found in {method=} {dataset=}')
continue
if any(estim_acc>1.00001):
print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]')
continue
if any(estim_acc<-0.00001):
print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]')
continue
errors = cap_errors(true_acc, estim_acc)
table.add(dataset, method, errors)
tex = table.latexTabular()
table_name = f'{basedir}_{classifier}_{metric}.tex'
table_name = table_name.replace('/', '_')
with open(f'./tables/{table_name}', 'wt') as foo:
foo.write('\\begin{table}[h]\n')
foo.write('\\centering\n')
foo.write('\\resizebox{\\textwidth}{!}{%\n')
foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
foo.write(tex)
foo.write('\\end{tabular}%\n')
foo.write('}\n')
foo.write('\\caption{Classifier ' + classifier.replace('_', ' ') + ('(oracle)' if with_oracle else '') +
' evaluated in terms of ' + metric.replace('_', ' ') + '}\n')
foo.write('\\end{table}\n')
tex_doc += "\input{" + table_name + "}\n\n"
tex_doc += """
\\end{document}
"""
with open(f'./tables/main.tex', 'wt') as foo:
foo.write(tex_doc)
print("[Tables Done] runing latex")
os.chdir('./tables/')
os.system('pdflatex main.tex')
os.system('rm main.aux main.log')
class ArtificialAccuracyProtocol(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
def __init__(self, data: LabelledCollection, h: BaseEstimator, sample_size=None, n_prevalences=101, repeats=10, random_state=0):
super(ArtificialAccuracyProtocol, self).__init__(random_state)
self.data = data
self.h = h
self.sample_size = qp._get_sample_size(sample_size)
self.n_prevalences = n_prevalences
self.repeats = repeats
self.collator = OnLabelledCollectionProtocol.get_collator('labelled_collection')
def accuracy_grid(self):
grid = np.linspace(0, 1, self.n_prevalences)
grid = np.repeat(grid, self.repeats, axis=0)
return grid
def samples_parameters(self):
# issue predictions
label_predictions = self.h.predict(self.data.X)
correct = label_predictions == self.data.y
self.data_evaluated = LabelledCollection(self.data.X, labels=correct, classes=[0,1])
indexes = []
for acc_value in self.accuracy_grid():
index = self.data_evaluated.sampling_index(self.sample_size, acc_value)
indexes.append(index)
return indexes
def sample(self, index):
return self.data.sampling_from_index(index)
def total(self):
return self.n_prevalences * self.repeats

View File

@ -1,349 +0,0 @@
import numpy as np
import itertools
from scipy.stats import ttest_ind_from_stats, wilcoxon
class Table:
VALID_TESTS = [None, "wilcoxon", "ttest"]
def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3,
clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', color=True,
maxtone=50):
assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
self.benchmarks = np.asarray(benchmarks)
self.benchmark_index = {row: i for i, row in enumerate(benchmarks)}
self.methods = np.asarray(methods)
self.method_index = {col: j for j, col in enumerate(methods)}
self.map = {}
# keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
self._addmap('values', dtype=object)
self.lower_is_better = lower_is_better
self.ttest = ttest
self.prec_mean = prec_mean
self.clean_zero = clean_zero
self.show_std = show_std
self.prec_std = prec_std
self.add_average = average
self.missing = missing
self.missing_str = missing_str
self.color = color
self.maxtone = maxtone
self.touch()
@property
def nbenchmarks(self):
return len(self.benchmarks)
@property
def nmethods(self):
return len(self.methods)
def touch(self):
self._modif = True
def update(self):
if self._modif:
self.compute()
def _getfilled(self):
return np.argwhere(self.map['fill'])
@property
def values(self):
return self.map['values']
def _indexes(self):
return itertools.product(range(self.nbenchmarks), range(self.nmethods))
def _addmap(self, map, dtype, func=None):
self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
if func is None:
return
m = self.map[map]
f = func
indexes = self._indexes() if map == 'fill' else self._getfilled()
for i, j in indexes:
m[i, j] = f(self.values[i, j])
def _addrank(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
if not self.lower_is_better:
ranked_cols_idx = ranked_cols_idx[::-1]
self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1)
def _addcolor(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if filled_cols_idx.size == 0:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
# col_means = [self.map['rank'][i, j] for j in filled_cols_idx]
minval = min(col_means)
maxval = max(col_means)
for col_idx in filled_cols_idx:
val = self.map['mean'][i, col_idx]
norm = (maxval - minval)
if norm > 0:
normval = (val - minval) / norm
else:
normval = 0.5
if self.lower_is_better:
normval = 1 - normval
normval = np.clip(normval, 0, 1)
self.map['color'][i, col_idx] = color_red2green_01(normval, self.maxtone)
def _run_ttest(self, row, col1, col2):
mean1 = self.map['mean'][row, col1]
std1 = self.map['std'][row, col1]
nobs1 = self.map['nobs'][row, col1]
mean2 = self.map['mean'][row, col2]
std2 = self.map['std'][row, col2]
nobs2 = self.map['nobs'][row, col2]
_, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
return p_val
def _run_wilcoxon(self, row, col1, col2):
values1 = self.map['values'][row, col1]
values2 = self.map['values'][row, col2]
try:
_, p_val = wilcoxon(values1, values2)
except ValueError:
p_val = 0
return p_val
def _add_statistical_test(self):
if self.ttest is None:
return
self.some_similar = [False] * self.nmethods
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if len(filled_cols_idx) <= 1:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
best_pos = filled_cols_idx[np.argmin(col_means)]
for j in filled_cols_idx:
if j == best_pos:
continue
if self.ttest == 'ttest':
p_val = self._run_ttest(i, best_pos, j)
else:
p_val = self._run_wilcoxon(i, best_pos, j)
pval_outcome = pval_interpretation(p_val)
self.map['ttest'][i, j] = pval_outcome
if pval_outcome != 'Diff':
self.some_similar[j] = True
def compute(self):
self._addmap('fill', dtype=bool, func=lambda x: x is not None)
self._addmap('mean', dtype=float, func=np.mean)
self._addmap('std', dtype=float, func=np.std)
self._addmap('nobs', dtype=float, func=len)
self._addmap('rank', dtype=int, func=None)
self._addmap('color', dtype=object, func=None)
self._addmap('ttest', dtype=object, func=None)
self._addmap('latex', dtype=object, func=None)
self._addrank()
self._addcolor()
self._add_statistical_test()
if self.add_average:
self._addave()
self._modif = False
def _is_column_full(self, col):
return all(self.map['fill'][:, self.method_index[col]])
def _addave(self):
ave = Table(['ave'], self.methods,
lower_is_better=self.lower_is_better,
ttest=self.ttest,
average=False,
missing=self.missing,
missing_str=self.missing_str,
prec_mean=self.prec_mean,
prec_std=self.prec_std,
clean_zero=self.clean_zero,
show_std=self.show_std,
color=self.color,
maxtone=self.maxtone)
for col in self.methods:
values = None
if self._is_column_full(col):
if self.ttest == 'ttest':
# values = np.asarray(self.map['mean'][:, self.method_index[col]])
values = np.concatenate(self.values[:, self.method_index[col]])
else: # wilcoxon
# values = np.asarray(self.map['mean'][:, self.method_index[col]])
values = np.concatenate(self.values[:, self.method_index[col]])
ave.add('ave', col, values)
self.average = ave
def add(self, benchmark, method, values):
if values is not None:
values = np.asarray(values)
if values.ndim == 0:
values = values.flatten()
rid, cid = self._coordinates(benchmark, method)
self.map['values'][rid, cid] = values
self.touch()
def get(self, benchmark, method, attr='mean'):
self.update()
assert attr in self.map, f'unknwon attribute {attr}'
rid, cid = self._coordinates(benchmark, method)
if self.map['fill'][rid, cid]:
v = self.map[attr][rid, cid]
if v is None or (isinstance(v, float) and np.isnan(v)):
return self.missing
return v
else:
return self.missing
def _coordinates(self, benchmark, method):
assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
assert method in self.method_index, f'method {method} out of range'
rid = self.benchmark_index[benchmark]
cid = self.method_index[method]
return rid, cid
def get_average(self, method, attr='mean'):
self.update()
if self.add_average:
return self.average.get('ave', method, attr=attr)
return None
def get_color(self, benchmark, method):
color = self.get(benchmark, method, attr='color')
if color is None:
return ''
return color
def latex(self, benchmark, method):
self.update()
i, j = self._coordinates(benchmark, method)
if self.map['fill'][i, j] == False:
return self.missing_str
mean = self.map['mean'][i, j]
l = f" {mean:.{self.prec_mean}f}"
if self.clean_zero:
l = l.replace(' 0.', '.')
isbest = self.map['rank'][i, j] == 1
if isbest:
l = "\\textbf{" + l.strip() + "}"
stat = '' if self.ttest is None else '^{\phantom{\ddag}}'
if self.ttest is not None and self.some_similar[j]:
test_label = self.map['ttest'][i, j]
if test_label == 'Sim':
stat = '^{\dag}'
elif test_label == 'Same':
stat = '^{\ddag}'
elif isbest or test_label == 'Diff':
stat = '^{\phantom{\ddag}}'
std = ''
if self.show_std:
std = self.map['std'][i, j]
std = f" {std:.{self.prec_std}f}"
if self.clean_zero:
std = std.replace(' 0.', '.')
std = f"\pm {std:{self.prec_std}}"
if stat != '' or std != '':
l = f'{l}${stat}{std}$'
if self.color:
l += ' ' + self.map['color'][i, j]
return l
def latexTabular(self, benchmark_replace={}, method_replace={}, aslines=False, endl='\\\\\hline'):
lines = []
l = '\multicolumn{1}{c|}{} & '
l += ' & '.join([method_replace.get(col, col) for col in self.methods])
l += ' \\\\\hline'
lines.append(l)
for row in self.benchmarks:
rowname = benchmark_replace.get(row, row)
l = rowname + ' & '
l += self.latexRow(row, endl=endl)
lines.append(l)
if self.add_average:
# l += '\hline\n'
l = '\hline \n \\textit{Average} & '
l += self.latexAverage(endl=endl)
lines.append(l)
if not aslines:
lines = '\n'.join(lines)
return lines
def latexRow(self, benchmark, endl='\\\\\hline\n'):
s = [self.latex(benchmark, col) for col in self.methods]
s = ' & '.join(s)
s += ' ' + endl
return s
def latexAverage(self, endl='\\\\\hline\n'):
if self.add_average:
return self.average.latexRow('ave', endl=endl)
def getRankTable(self, prec_mean=0):
t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=prec_mean, average=True,
maxtone=self.maxtone, ttest=None)
for rid, cid in self._getfilled():
row = self.benchmarks[rid]
col = self.methods[cid]
t.add(row, col, self.get(row, col, 'rank'))
t.compute()
return t
def dropMethods(self, methods):
drop_index = [self.method_index[m] for m in methods]
new_methods = np.delete(self.methods, drop_index)
new_index = {col: j for j, col in enumerate(new_methods)}
self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
self.methods = new_methods
self.method_index = new_index
self.touch()
def pval_interpretation(p_val):
if 0.005 >= p_val:
return 'Diff'
elif 0.05 >= p_val > 0.005:
return 'Sim'
elif p_val > 0.05:
return 'Same'
def color_red2green_01(val, maxtone=50):
if np.isnan(val): return None
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
# rescale to [-1,1]
val = val * 2 - 1
if val < 0:
color = 'red'
tone = maxtone * (-val)
else:
color = 'green'
tone = maxtone * val
return '\cellcolor{' + color + f'!{int(tone)}' + '}'

1
result_table Submodule

@ -0,0 +1 @@
Subproject commit 2e0e3d7fc0464f9c9b50ace3c7785dd8d97710a6