tables complete; refactoring needed urgently
This commit is contained in:
parent
f820d36927
commit
5793484f70
|
@ -0,0 +1,89 @@
|
|||
AE RAE
|
||||
SemEval13 SVM-KLD 0.0722 0.1720
|
||||
SVM-NKLD 0.0714 0.2756
|
||||
SVM-QBETA2 0.0782 0.2775
|
||||
LR-CC 0.0996 0.3095
|
||||
LR-EM 0.1191 0.3923
|
||||
LR-PCC 0.0344 0.1506
|
||||
LR-ACC 0.0806 0.2479
|
||||
LR-PACC 0.0812 0.2626
|
||||
SemEval14 SVM-KLD 0.0843 0.2268
|
||||
SVM-NKLD 0.0836 0.3367
|
||||
SVM-QBETA2 0.1018 0.3680
|
||||
LR-CC 0.1043 0.3212
|
||||
LR-EM 0.0807 0.3517
|
||||
LR-PCC 0.1001 0.4277
|
||||
LR-ACC 0.0581 0.2360
|
||||
LR-PACC 0.0533 0.2573
|
||||
SemEval15 SVM-KLD 0.1185 0.3789
|
||||
SVM-NKLD 0.1155 0.4720
|
||||
SVM-QBETA2 0.1263 0.4762
|
||||
LR-CC 0.1101 0.2879
|
||||
LR-EM 0.1204 0.2949
|
||||
LR-PCC 0.0460 0.1973
|
||||
LR-ACC 0.1064 0.2971
|
||||
LR-PACC 0.1013 0.2729
|
||||
SemEval16 SVM-KLD 0.0385 0.1512
|
||||
SVM-NKLD 0.0830 0.3249
|
||||
SVM-QBETA2 0.1201 0.5156
|
||||
LR-CC 0.0500 0.1771
|
||||
LR-EM 0.0646 0.2126
|
||||
LR-PCC 0.0379 0.1553
|
||||
LR-ACC 0.0542 0.2246
|
||||
LR-PACC 0.0864 0.3504
|
||||
Sanders SVM-KLD 0.0134 0.0630
|
||||
SVM-NKLD 0.0950 0.3965
|
||||
SVM-QBETA2 0.1098 0.4360
|
||||
LR-CC 0.0671 0.2682
|
||||
LR-EM 0.0715 0.2849
|
||||
LR-PCC 0.0150 0.0602
|
||||
LR-ACC 0.0338 0.1306
|
||||
LR-PACC 0.0301 0.1173
|
||||
SST SVM-KLD 0.0413 0.1458
|
||||
SVM-NKLD 0.0749 0.2497
|
||||
SVM-QBETA2 0.0671 0.2343
|
||||
LR-CC 0.0330 0.1239
|
||||
LR-EM 0.0369 0.1190
|
||||
LR-PCC 0.0282 0.1068
|
||||
LR-ACC 0.0492 0.1689
|
||||
LR-PACC 0.0841 0.2302
|
||||
OMD SVM-KLD 0.0305 0.0999
|
||||
SVM-NKLD 0.0437 0.1279
|
||||
SVM-QBETA2 0.0624 0.1826
|
||||
LR-CC 0.0524 0.1527
|
||||
LR-EM 0.0648 0.1886
|
||||
LR-PCC 0.0046 0.0095
|
||||
LR-ACC 0.0239 0.0753
|
||||
LR-PACC 0.0100 0.0293
|
||||
HCR SVM-KLD 0.0414 0.2191
|
||||
SVM-NKLD 0.0604 0.2324
|
||||
SVM-QBETA2 0.1272 0.4600
|
||||
LR-CC 0.0525 0.1817
|
||||
LR-EM 0.0895 0.3093
|
||||
LR-PCC 0.0055 0.0202
|
||||
LR-ACC 0.0240 0.1026
|
||||
LR-PACC 0.0329 0.1436
|
||||
GASP SVM-KLD 0.0171 0.0529
|
||||
SVM-NKLD 0.0503 0.3416
|
||||
SVM-QBETA2 0.0640 0.4402
|
||||
LR-CC 0.0189 0.1297
|
||||
LR-EM 0.0231 0.1589
|
||||
LR-PCC 0.0097 0.0682
|
||||
LR-ACC 0.0150 0.1038
|
||||
LR-PACC 0.0087 0.0597
|
||||
WA SVM-KLD 0.0647 0.1957
|
||||
SVM-NKLD 0.0393 0.1357
|
||||
SVM-QBETA2 0.0798 0.2332
|
||||
LR-CC 0.0434 0.1270
|
||||
LR-EM 0.0391 0.1145
|
||||
LR-PCC 0.0338 0.0990
|
||||
LR-ACC 0.0407 0.1197
|
||||
LR-PACC 0.0277 0.0815
|
||||
WB SVM-KLD 0.0613 0.1791
|
||||
SVM-NKLD 0.0534 0.1756
|
||||
SVM-QBETA2 0.0249 0.0774
|
||||
LR-CC 0.0132 0.0399
|
||||
LR-EM 0.0244 0.0773
|
||||
LR-PCC 0.0123 0.0390
|
||||
LR-ACC 0.0230 0.0719
|
||||
LR-PACC 0.0165 0.0515
|
|
@ -1,15 +1,7 @@
|
|||
from scipy.stats import wilcoxon, ttest_ind_from_stats
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
class Table:
|
||||
def __init__(self):
|
||||
self.tab = {}
|
||||
|
||||
def add(self, col, key, x):
|
||||
if col not in self.tab:
|
||||
self.tab[col] = ResultSet(col)
|
||||
"""
|
||||
|
||||
|
||||
class ResultSet:
|
||||
VALID_TESTS = [None, "wilcoxon", "ttest_ind_from_stats"]
|
||||
|
@ -18,7 +10,7 @@ class ResultSet:
|
|||
TTEST_SAME = 'same'
|
||||
|
||||
def __init__(self, name, addfunc, compare='mean', lower_is_better=True, show_std=True, test="wilcoxon",
|
||||
remove_mean='0.', prec_mean=3, remove_std='0.', prec_std=3, maxtone=100, minval=None, maxval=None):
|
||||
remove_mean='', prec_mean=3, remove_std='', prec_std=3, maxtone=50, minval=None, maxval=None):
|
||||
"""
|
||||
|
||||
:param name: name of the result set (e.g., a Dataset)
|
||||
|
@ -65,13 +57,18 @@ class ResultSet:
|
|||
self.r[key]['nobs'] = len(vals)
|
||||
self.computed = False
|
||||
|
||||
def update(self):
|
||||
if not self.computed:
|
||||
self.compute()
|
||||
|
||||
def compute(self):
|
||||
keylist = np.asarray(list(self.r.keys()))
|
||||
vallist = [self.r[key][self.compare] for key in keylist]
|
||||
keylist = keylist[np.argsort(vallist)]
|
||||
|
||||
minval = min(vallist) if self.minval is None else self.minval
|
||||
maxval = max(vallist) if self.maxval is None else self.maxval
|
||||
print(vallist)
|
||||
self.range_minval = min(vallist) if self.minval is None else self.minval
|
||||
self.range_maxval = max(vallist) if self.maxval is None else self.maxval
|
||||
if not self.lower_is_better:
|
||||
keylist = keylist[::-1]
|
||||
|
||||
|
@ -88,10 +85,7 @@ class ResultSet:
|
|||
|
||||
#color
|
||||
val = self.r[key][self.compare]
|
||||
val = (val-minval)/(maxval-minval)
|
||||
if self.lower_is_better:
|
||||
val = 1-val
|
||||
self.r[key]['color'] = color_red2green_01(val, self.maxtone)
|
||||
self.r[key]['color'] = self.get_value_color(val, minval=self.range_minval, maxval=self.range_maxval)
|
||||
|
||||
if self.test is not None:
|
||||
if isbest:
|
||||
|
@ -115,11 +109,11 @@ class ResultSet:
|
|||
self.computed = True
|
||||
|
||||
def latex(self, key, missing='--', color=True):
|
||||
|
||||
if key not in self.r:
|
||||
return missing
|
||||
|
||||
if not self.computed:
|
||||
self.compute()
|
||||
self.update()
|
||||
|
||||
rd = self.r[key]
|
||||
s = f"{rd['mean']:.{self.prec_mean}f}"
|
||||
|
@ -148,29 +142,52 @@ class ResultSet:
|
|||
|
||||
return s
|
||||
|
||||
def mean(self, attr='mean', required:int=None):
|
||||
def mean(self, attr='mean', required:int=None, missing=np.nan):
|
||||
"""
|
||||
returns the mean value for the "key" attribute
|
||||
returns the mean value for the "attr" attribute
|
||||
:param attr: the attribute to average across results
|
||||
:param required: if specified, indicates the number of values that should be part of the mean; if this number
|
||||
is different, then the mean is not computed
|
||||
:param missing: the value to return in case the required condition is not satisfied
|
||||
:return: the mean of the "key" attribute
|
||||
"""
|
||||
keylist = list(self.r.keys())
|
||||
vallist = [self.r[key].get(attr, None) for key in keylist]
|
||||
if None in vallist:
|
||||
return None
|
||||
return missing
|
||||
if required is not None:
|
||||
if len(vallist) != required:
|
||||
return None
|
||||
return missing
|
||||
return np.mean(vallist)
|
||||
|
||||
def get(self, key, attr, missing='--'):
|
||||
if key in self.r:
|
||||
self.update()
|
||||
if attr in self.r[key]:
|
||||
return self.r[key][attr]
|
||||
return missing
|
||||
|
||||
def get_color(self, key):
|
||||
if key not in self.r:
|
||||
return ''
|
||||
self.update()
|
||||
return self.r[key]['color']
|
||||
|
||||
def get_value_color(self, val, minval=None, maxval=None):
|
||||
if minval is None or maxval is None:
|
||||
self.update()
|
||||
minval=self.range_minval
|
||||
maxval=self.range_maxval
|
||||
val = (val - minval) / (maxval - minval)
|
||||
if self.lower_is_better:
|
||||
val = 1 - val
|
||||
return color_red2green_01(val, self.maxtone)
|
||||
|
||||
def change_compare(self, attr):
|
||||
self.compare = attr
|
||||
self.computed = False
|
||||
|
||||
|
||||
|
||||
def color_red2green_01(val, maxtone=100):
|
||||
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
|
||||
|
@ -185,24 +202,3 @@ def color_red2green_01(val, maxtone=100):
|
|||
tone = maxtone * val
|
||||
return '\cellcolor{' + color + f'!{int(tone)}' + '}'
|
||||
|
||||
|
||||
def add(x):
|
||||
r = np.random.rand(100)/2+x
|
||||
return {
|
||||
'values': r
|
||||
}
|
||||
|
||||
"""
|
||||
r = ResultSet('dataset1', addfunc=add, show_std=False, minval=0, maxval=1)
|
||||
for x in range(10):
|
||||
r.add(f'a{x}', np.random.randint(0,5) / 10)
|
||||
|
||||
print(r.name)
|
||||
for x in range(10):
|
||||
key = f'a{x}'
|
||||
print(r.latex(key), r.get(key, 'rank'))
|
||||
|
||||
print('----')
|
||||
print(f'ave: {r.mean():.3f}')
|
||||
print(f'averank: {r.mean("rank"):.3f}')
|
||||
"""
|
|
@ -1,4 +1,5 @@
|
|||
import quapy as qp
|
||||
import numpy as np
|
||||
from os import makedirs
|
||||
# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani
|
||||
import sys, os
|
||||
|
@ -16,15 +17,6 @@ sample_size = 100
|
|||
qp.environ['SAMPLE_SIZE'] = sample_size
|
||||
|
||||
|
||||
|
||||
# results_dict = evaluate_directory('results/*.pkl', evaluation_measures)
|
||||
# stats = {
|
||||
# dataset : {
|
||||
# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae),
|
||||
# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae),
|
||||
# } for dataset in datasets
|
||||
# }
|
||||
|
||||
nice = {
|
||||
'mae':'AE',
|
||||
'mrae':'RAE',
|
||||
|
@ -45,7 +37,8 @@ nice = {
|
|||
'semeval13': 'SemEval13',
|
||||
'semeval14': 'SemEval14',
|
||||
'semeval15': 'SemEval15',
|
||||
'semeval16': 'SemEval16'
|
||||
'semeval16': 'SemEval16',
|
||||
'Average': 'Average'
|
||||
}
|
||||
|
||||
|
||||
|
@ -68,6 +61,51 @@ def color_from_abs_rank(abs_rank, n_methods, maxtone=100):
|
|||
return color_from_rel_rank(rel_rank, maxtone)
|
||||
|
||||
|
||||
def load_Gao_Sebastiani_previous_results():
|
||||
def rename(method):
|
||||
old2new = {
|
||||
'kld': 'svmkld',
|
||||
'nkld': 'svmnkld',
|
||||
'qbeta2': 'svmq',
|
||||
'em': 'sld'
|
||||
}
|
||||
return old2new.get(method, method)
|
||||
|
||||
gao_seb_results = {}
|
||||
with open('./Gao_Sebastiani_results.txt', 'rt') as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines[1:]:
|
||||
line = line.strip()
|
||||
parts = line.lower().split()
|
||||
if len(parts) == 4:
|
||||
dataset, method, ae, rae = parts
|
||||
else:
|
||||
method, ae, rae = parts
|
||||
learner, method = method.split('-')
|
||||
method = rename(method)
|
||||
gao_seb_results[f'{dataset}-{method}-ae'] = float(ae)
|
||||
gao_seb_results[f'{dataset}-{method}-rae'] = float(rae)
|
||||
return gao_seb_results
|
||||
|
||||
|
||||
def get_ranks_from_Gao_Sebastiani():
|
||||
gao_seb_results = load_Gao_Sebastiani_previous_results()
|
||||
datasets = set([key.split('-')[0] for key in gao_seb_results.keys()])
|
||||
methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()]))
|
||||
ranks = {}
|
||||
for metric in ['ae', 'rae']:
|
||||
for dataset in datasets:
|
||||
scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods]
|
||||
order = np.argsort(scores)
|
||||
sorted_methods = methods[order]
|
||||
for i, method in enumerate(sorted_methods):
|
||||
ranks[f'{dataset}-{method}-{metric}'] = i+1
|
||||
for method in methods:
|
||||
rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets])
|
||||
ranks[f'Average-{method}-{metric}'] = rankave
|
||||
return ranks, gao_seb_results
|
||||
|
||||
|
||||
def save_table(path, table):
|
||||
print(f'saving results in {path}')
|
||||
with open(path, 'wt') as foo:
|
||||
|
@ -77,14 +115,12 @@ def save_table(path, table):
|
|||
# Tables evaluation scores for AE and RAE (two tables)
|
||||
# ----------------------------------------------------
|
||||
|
||||
|
||||
|
||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
||||
evaluation_measures = [qp.error.ae, qp.error.rae]
|
||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
|
||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||
new_methods = []
|
||||
|
||||
|
||||
results_dict = {}
|
||||
stats={}
|
||||
def addfunc(dataset, method, loss):
|
||||
path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
|
||||
if os.path.exists(path):
|
||||
|
@ -96,103 +132,116 @@ def addfunc(dataset, method, loss):
|
|||
}
|
||||
return None
|
||||
|
||||
def addave(method, tables):
|
||||
values = []
|
||||
for table in tables:
|
||||
mean = table.get(method, 'values', missing=None)
|
||||
if mean is None:
|
||||
return None
|
||||
values.append(mean)
|
||||
values = np.concatenate(values)
|
||||
return {
|
||||
'values': values
|
||||
}
|
||||
|
||||
def addrankave(method, tables):
|
||||
values = []
|
||||
for table in tables:
|
||||
rank = table.get(method, 'rank', missing=None)
|
||||
if rank is None:
|
||||
return None
|
||||
values.append(rank)
|
||||
return {
|
||||
'values': np.asarray(values)
|
||||
}
|
||||
|
||||
|
||||
TABLES = {eval_func.__name__:{} for eval_func in evaluation_measures}
|
||||
|
||||
for i, eval_func in enumerate(evaluation_measures):
|
||||
eval_name = eval_func.__name__
|
||||
added_methods = ['svm' + eval_name] # , 'quanet', 'dys']
|
||||
added_methods = ['svm' + eval_name] + new_methods
|
||||
methods = gao_seb_methods + added_methods
|
||||
nold_methods = len(gao_seb_methods)
|
||||
nnew_methods = len(added_methods)
|
||||
|
||||
# fill table
|
||||
TABLE = {}
|
||||
TABLE = TABLES[eval_name]
|
||||
for dataset in datasets:
|
||||
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats", maxtone=50,
|
||||
remove_mean='0.' if eval_func == qp.error.ae else '')
|
||||
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats")
|
||||
for method in methods:
|
||||
TABLE[dataset].add(method, dataset, method, eval_name)
|
||||
|
||||
TABLE['Average'] = ResultSet('ave', addave, show_std=False, test="ttest_ind_from_stats")
|
||||
for method in methods:
|
||||
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
|
||||
|
||||
tabular = """
|
||||
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline
|
||||
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline
|
||||
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
|
||||
"""
|
||||
|
||||
for method in methods:
|
||||
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
||||
tabular += '\\\\\hline\n'
|
||||
|
||||
for dataset in datasets:
|
||||
for dataset in datasets + ['Average']:
|
||||
if dataset == 'Average': tabular+= '\line\n'
|
||||
tabular += nice.get(dataset, dataset.upper()) + ' '
|
||||
for method in methods:
|
||||
tabular += ' & ' + TABLE[dataset].latex(method)
|
||||
tabular += '\\\\\hline\n'
|
||||
|
||||
tabular += "\end{tabularx}"
|
||||
|
||||
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
||||
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
||||
|
||||
# Tables ranks for AE and RAE (two tables)
|
||||
# ----------------------------------------------------
|
||||
# for i, eval_func in enumerate(evaluation_measures):
|
||||
# eval_name = eval_func.__name__
|
||||
# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
|
||||
# table = """
|
||||
# \\begin{table}[h]
|
||||
# """
|
||||
# if i == 0:
|
||||
# caption = """
|
||||
# \caption{Rank positions of the quantification methods in the AE
|
||||
# experiments, and (between parentheses) the rank positions
|
||||
# obtained in the evaluation of~\cite{Gao:2016uq}.}
|
||||
# """
|
||||
# else:
|
||||
# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}"
|
||||
# table += caption + """
|
||||
# \\begin{center}
|
||||
# \\resizebox{\\textwidth}{!}{
|
||||
# """
|
||||
# tabular = """
|
||||
# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline
|
||||
# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
|
||||
# """
|
||||
#
|
||||
# for method in methods:
|
||||
# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
||||
# tabular += '\\\\\hline\n'
|
||||
#
|
||||
# for dataset in datasets:
|
||||
# tabular += nice.get(dataset, dataset.upper()) + ' '
|
||||
# ranks_no_gap = []
|
||||
# for method in methods:
|
||||
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
|
||||
# key = f'{dataset}-{method}-{learner}-{}-{eval_name}'
|
||||
# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2])
|
||||
# ranks_no_gap = sorted(ranks_no_gap)
|
||||
# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)}
|
||||
# for method in methods:
|
||||
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
|
||||
# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}'
|
||||
# if key in stats[dataset][eval_name]:
|
||||
# _, _, abs_rank = stats[dataset][eval_name][key]
|
||||
# real_rank = ranks_no_gap[abs_rank]
|
||||
# tabular += f' & {real_rank}'
|
||||
# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE)
|
||||
# else:
|
||||
# tabular += ' & --- '
|
||||
# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error')
|
||||
# tabular += f' ({old_rank})'
|
||||
# tabular += '\\\\\hline\n'
|
||||
# tabular += "\end{tabularx}"
|
||||
# table += tabular + """
|
||||
# }
|
||||
# \end{center}
|
||||
# \label{tab:""" + eval_name + """ranks}
|
||||
# \end{table}
|
||||
# """
|
||||
# save_table(f'../tables/tab_rank_{eval_name}.tex', table)
|
||||
#
|
||||
#
|
||||
# print("[Done]")
|
||||
for i, eval_func in enumerate(evaluation_measures):
|
||||
eval_name = eval_func.__name__
|
||||
methods = gao_seb_methods
|
||||
nold_methods = len(gao_seb_methods)
|
||||
|
||||
TABLE = TABLES[eval_name]
|
||||
TABLE['Average'] = ResultSet('ave', addrankave, show_std=False, test="ttest_ind_from_stats")
|
||||
for method in methods:
|
||||
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
|
||||
|
||||
|
||||
tabular = """
|
||||
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline
|
||||
& \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
|
||||
"""
|
||||
|
||||
for method in methods:
|
||||
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
||||
tabular += '\\\\\hline\n'
|
||||
|
||||
for dataset in datasets + ['Average']:
|
||||
if dataset == 'Average':
|
||||
tabular += '\line\n'
|
||||
else:
|
||||
TABLE[dataset].change_compare('rank')
|
||||
tabular += nice.get(dataset, dataset.upper()) + ' '
|
||||
for method in gao_seb_methods:
|
||||
if dataset == 'Average':
|
||||
method_rank = TABLE[dataset].get(method, 'mean')
|
||||
else:
|
||||
method_rank = TABLE[dataset].get(method, 'rank')
|
||||
gao_seb_rank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
|
||||
if dataset == 'Average':
|
||||
if method_rank != '--':
|
||||
method_rank = f'{method_rank:.1f}'
|
||||
gao_seb_rank = f'{gao_seb_rank:.1f}'
|
||||
tabular += ' & ' + f'{method_rank}' + f' ({gao_seb_rank}) ' + TABLE[dataset].get_color(method)
|
||||
tabular += '\\\\\hline\n'
|
||||
tabular += "\end{tabularx}"
|
||||
|
||||
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
|
||||
|
||||
|
||||
print("[Done]")
|
Loading…
Reference in New Issue