tables complete; refactoring needed urgently

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-13 18:43:35 +01:00
parent f820d36927
commit 5793484f70
3 changed files with 260 additions and 126 deletions

View File

@ -0,0 +1,89 @@
AE RAE
SemEval13 SVM-KLD 0.0722 0.1720
SVM-NKLD 0.0714 0.2756
SVM-QBETA2 0.0782 0.2775
LR-CC 0.0996 0.3095
LR-EM 0.1191 0.3923
LR-PCC 0.0344 0.1506
LR-ACC 0.0806 0.2479
LR-PACC 0.0812 0.2626
SemEval14 SVM-KLD 0.0843 0.2268
SVM-NKLD 0.0836 0.3367
SVM-QBETA2 0.1018 0.3680
LR-CC 0.1043 0.3212
LR-EM 0.0807 0.3517
LR-PCC 0.1001 0.4277
LR-ACC 0.0581 0.2360
LR-PACC 0.0533 0.2573
SemEval15 SVM-KLD 0.1185 0.3789
SVM-NKLD 0.1155 0.4720
SVM-QBETA2 0.1263 0.4762
LR-CC 0.1101 0.2879
LR-EM 0.1204 0.2949
LR-PCC 0.0460 0.1973
LR-ACC 0.1064 0.2971
LR-PACC 0.1013 0.2729
SemEval16 SVM-KLD 0.0385 0.1512
SVM-NKLD 0.0830 0.3249
SVM-QBETA2 0.1201 0.5156
LR-CC 0.0500 0.1771
LR-EM 0.0646 0.2126
LR-PCC 0.0379 0.1553
LR-ACC 0.0542 0.2246
LR-PACC 0.0864 0.3504
Sanders SVM-KLD 0.0134 0.0630
SVM-NKLD 0.0950 0.3965
SVM-QBETA2 0.1098 0.4360
LR-CC 0.0671 0.2682
LR-EM 0.0715 0.2849
LR-PCC 0.0150 0.0602
LR-ACC 0.0338 0.1306
LR-PACC 0.0301 0.1173
SST SVM-KLD 0.0413 0.1458
SVM-NKLD 0.0749 0.2497
SVM-QBETA2 0.0671 0.2343
LR-CC 0.0330 0.1239
LR-EM 0.0369 0.1190
LR-PCC 0.0282 0.1068
LR-ACC 0.0492 0.1689
LR-PACC 0.0841 0.2302
OMD SVM-KLD 0.0305 0.0999
SVM-NKLD 0.0437 0.1279
SVM-QBETA2 0.0624 0.1826
LR-CC 0.0524 0.1527
LR-EM 0.0648 0.1886
LR-PCC 0.0046 0.0095
LR-ACC 0.0239 0.0753
LR-PACC 0.0100 0.0293
HCR SVM-KLD 0.0414 0.2191
SVM-NKLD 0.0604 0.2324
SVM-QBETA2 0.1272 0.4600
LR-CC 0.0525 0.1817
LR-EM 0.0895 0.3093
LR-PCC 0.0055 0.0202
LR-ACC 0.0240 0.1026
LR-PACC 0.0329 0.1436
GASP SVM-KLD 0.0171 0.0529
SVM-NKLD 0.0503 0.3416
SVM-QBETA2 0.0640 0.4402
LR-CC 0.0189 0.1297
LR-EM 0.0231 0.1589
LR-PCC 0.0097 0.0682
LR-ACC 0.0150 0.1038
LR-PACC 0.0087 0.0597
WA SVM-KLD 0.0647 0.1957
SVM-NKLD 0.0393 0.1357
SVM-QBETA2 0.0798 0.2332
LR-CC 0.0434 0.1270
LR-EM 0.0391 0.1145
LR-PCC 0.0338 0.0990
LR-ACC 0.0407 0.1197
LR-PACC 0.0277 0.0815
WB SVM-KLD 0.0613 0.1791
SVM-NKLD 0.0534 0.1756
SVM-QBETA2 0.0249 0.0774
LR-CC 0.0132 0.0399
LR-EM 0.0244 0.0773
LR-PCC 0.0123 0.0390
LR-ACC 0.0230 0.0719
LR-PACC 0.0165 0.0515

View File

@ -1,15 +1,7 @@
from scipy.stats import wilcoxon, ttest_ind_from_stats
import numpy as np
"""
class Table:
def __init__(self):
self.tab = {}
def add(self, col, key, x):
if col not in self.tab:
self.tab[col] = ResultSet(col)
"""
class ResultSet:
VALID_TESTS = [None, "wilcoxon", "ttest_ind_from_stats"]
@ -18,7 +10,7 @@ class ResultSet:
TTEST_SAME = 'same'
def __init__(self, name, addfunc, compare='mean', lower_is_better=True, show_std=True, test="wilcoxon",
remove_mean='0.', prec_mean=3, remove_std='0.', prec_std=3, maxtone=100, minval=None, maxval=None):
remove_mean='', prec_mean=3, remove_std='', prec_std=3, maxtone=50, minval=None, maxval=None):
"""
:param name: name of the result set (e.g., a Dataset)
@ -65,13 +57,18 @@ class ResultSet:
self.r[key]['nobs'] = len(vals)
self.computed = False
def update(self):
if not self.computed:
self.compute()
def compute(self):
keylist = np.asarray(list(self.r.keys()))
vallist = [self.r[key][self.compare] for key in keylist]
keylist = keylist[np.argsort(vallist)]
minval = min(vallist) if self.minval is None else self.minval
maxval = max(vallist) if self.maxval is None else self.maxval
print(vallist)
self.range_minval = min(vallist) if self.minval is None else self.minval
self.range_maxval = max(vallist) if self.maxval is None else self.maxval
if not self.lower_is_better:
keylist = keylist[::-1]
@ -88,10 +85,7 @@ class ResultSet:
#color
val = self.r[key][self.compare]
val = (val-minval)/(maxval-minval)
if self.lower_is_better:
val = 1-val
self.r[key]['color'] = color_red2green_01(val, self.maxtone)
self.r[key]['color'] = self.get_value_color(val, minval=self.range_minval, maxval=self.range_maxval)
if self.test is not None:
if isbest:
@ -115,11 +109,11 @@ class ResultSet:
self.computed = True
def latex(self, key, missing='--', color=True):
if key not in self.r:
return missing
if not self.computed:
self.compute()
self.update()
rd = self.r[key]
s = f"{rd['mean']:.{self.prec_mean}f}"
@ -148,29 +142,52 @@ class ResultSet:
return s
def mean(self, attr='mean', required:int=None):
def mean(self, attr='mean', required:int=None, missing=np.nan):
"""
returns the mean value for the "key" attribute
returns the mean value for the "attr" attribute
:param attr: the attribute to average across results
:param required: if specified, indicates the number of values that should be part of the mean; if this number
is different, then the mean is not computed
:param missing: the value to return in case the required condition is not satisfied
:return: the mean of the "key" attribute
"""
keylist = list(self.r.keys())
vallist = [self.r[key].get(attr, None) for key in keylist]
if None in vallist:
return None
return missing
if required is not None:
if len(vallist) != required:
return None
return missing
return np.mean(vallist)
def get(self, key, attr, missing='--'):
if key in self.r:
self.update()
if attr in self.r[key]:
return self.r[key][attr]
return missing
def get_color(self, key):
if key not in self.r:
return ''
self.update()
return self.r[key]['color']
def get_value_color(self, val, minval=None, maxval=None):
if minval is None or maxval is None:
self.update()
minval=self.range_minval
maxval=self.range_maxval
val = (val - minval) / (maxval - minval)
if self.lower_is_better:
val = 1 - val
return color_red2green_01(val, self.maxtone)
def change_compare(self, attr):
self.compare = attr
self.computed = False
def color_red2green_01(val, maxtone=100):
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
@ -185,24 +202,3 @@ def color_red2green_01(val, maxtone=100):
tone = maxtone * val
return '\cellcolor{' + color + f'!{int(tone)}' + '}'
def add(x):
r = np.random.rand(100)/2+x
return {
'values': r
}
"""
r = ResultSet('dataset1', addfunc=add, show_std=False, minval=0, maxval=1)
for x in range(10):
r.add(f'a{x}', np.random.randint(0,5) / 10)
print(r.name)
for x in range(10):
key = f'a{x}'
print(r.latex(key), r.get(key, 'rank'))
print('----')
print(f'ave: {r.mean():.3f}')
print(f'averank: {r.mean("rank"):.3f}')
"""

View File

@ -1,4 +1,5 @@
import quapy as qp
import numpy as np
from os import makedirs
# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani
import sys, os
@ -16,15 +17,6 @@ sample_size = 100
qp.environ['SAMPLE_SIZE'] = sample_size
# results_dict = evaluate_directory('results/*.pkl', evaluation_measures)
# stats = {
# dataset : {
# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae),
# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae),
# } for dataset in datasets
# }
nice = {
'mae':'AE',
'mrae':'RAE',
@ -45,7 +37,8 @@ nice = {
'semeval13': 'SemEval13',
'semeval14': 'SemEval14',
'semeval15': 'SemEval15',
'semeval16': 'SemEval16'
'semeval16': 'SemEval16',
'Average': 'Average'
}
@ -68,6 +61,51 @@ def color_from_abs_rank(abs_rank, n_methods, maxtone=100):
return color_from_rel_rank(rel_rank, maxtone)
def load_Gao_Sebastiani_previous_results():
def rename(method):
old2new = {
'kld': 'svmkld',
'nkld': 'svmnkld',
'qbeta2': 'svmq',
'em': 'sld'
}
return old2new.get(method, method)
gao_seb_results = {}
with open('./Gao_Sebastiani_results.txt', 'rt') as fin:
lines = fin.readlines()
for line in lines[1:]:
line = line.strip()
parts = line.lower().split()
if len(parts) == 4:
dataset, method, ae, rae = parts
else:
method, ae, rae = parts
learner, method = method.split('-')
method = rename(method)
gao_seb_results[f'{dataset}-{method}-ae'] = float(ae)
gao_seb_results[f'{dataset}-{method}-rae'] = float(rae)
return gao_seb_results
def get_ranks_from_Gao_Sebastiani():
gao_seb_results = load_Gao_Sebastiani_previous_results()
datasets = set([key.split('-')[0] for key in gao_seb_results.keys()])
methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()]))
ranks = {}
for metric in ['ae', 'rae']:
for dataset in datasets:
scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods]
order = np.argsort(scores)
sorted_methods = methods[order]
for i, method in enumerate(sorted_methods):
ranks[f'{dataset}-{method}-{metric}'] = i+1
for method in methods:
rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets])
ranks[f'Average-{method}-{metric}'] = rankave
return ranks, gao_seb_results
def save_table(path, table):
print(f'saving results in {path}')
with open(path, 'wt') as foo:
@ -77,14 +115,12 @@ def save_table(path, table):
# Tables evaluation scores for AE and RAE (two tables)
# ----------------------------------------------------
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
evaluation_measures = [qp.error.ae, qp.error.rae]
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
new_methods = []
results_dict = {}
stats={}
def addfunc(dataset, method, loss):
path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
if os.path.exists(path):
@ -96,103 +132,116 @@ def addfunc(dataset, method, loss):
}
return None
def addave(method, tables):
values = []
for table in tables:
mean = table.get(method, 'values', missing=None)
if mean is None:
return None
values.append(mean)
values = np.concatenate(values)
return {
'values': values
}
def addrankave(method, tables):
values = []
for table in tables:
rank = table.get(method, 'rank', missing=None)
if rank is None:
return None
values.append(rank)
return {
'values': np.asarray(values)
}
TABLES = {eval_func.__name__:{} for eval_func in evaluation_measures}
for i, eval_func in enumerate(evaluation_measures):
eval_name = eval_func.__name__
added_methods = ['svm' + eval_name] # , 'quanet', 'dys']
added_methods = ['svm' + eval_name] + new_methods
methods = gao_seb_methods + added_methods
nold_methods = len(gao_seb_methods)
nnew_methods = len(added_methods)
# fill table
TABLE = {}
TABLE = TABLES[eval_name]
for dataset in datasets:
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats", maxtone=50,
remove_mean='0.' if eval_func == qp.error.ae else '')
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats")
for method in methods:
TABLE[dataset].add(method, dataset, method, eval_name)
TABLE['Average'] = ResultSet('ave', addave, show_std=False, test="ttest_ind_from_stats")
for method in methods:
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
tabular = """
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
"""
for method in methods:
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
tabular += '\\\\\hline\n'
for dataset in datasets:
for dataset in datasets + ['Average']:
if dataset == 'Average': tabular+= '\line\n'
tabular += nice.get(dataset, dataset.upper()) + ' '
for method in methods:
tabular += ' & ' + TABLE[dataset].latex(method)
tabular += '\\\\\hline\n'
tabular += "\end{tabularx}"
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
sys.exit(0)
# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
# Tables ranks for AE and RAE (two tables)
# ----------------------------------------------------
# for i, eval_func in enumerate(evaluation_measures):
# eval_name = eval_func.__name__
# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
# table = """
# \\begin{table}[h]
# """
# if i == 0:
# caption = """
# \caption{Rank positions of the quantification methods in the AE
# experiments, and (between parentheses) the rank positions
# obtained in the evaluation of~\cite{Gao:2016uq}.}
# """
# else:
# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}"
# table += caption + """
# \\begin{center}
# \\resizebox{\\textwidth}{!}{
# """
# tabular = """
# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline
# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
# """
#
# for method in methods:
# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
# tabular += '\\\\\hline\n'
#
# for dataset in datasets:
# tabular += nice.get(dataset, dataset.upper()) + ' '
# ranks_no_gap = []
# for method in methods:
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
# key = f'{dataset}-{method}-{learner}-{}-{eval_name}'
# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2])
# ranks_no_gap = sorted(ranks_no_gap)
# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)}
# for method in methods:
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}'
# if key in stats[dataset][eval_name]:
# _, _, abs_rank = stats[dataset][eval_name][key]
# real_rank = ranks_no_gap[abs_rank]
# tabular += f' & {real_rank}'
# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE)
# else:
# tabular += ' & --- '
# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error')
# tabular += f' ({old_rank})'
# tabular += '\\\\\hline\n'
# tabular += "\end{tabularx}"
# table += tabular + """
# }
# \end{center}
# \label{tab:""" + eval_name + """ranks}
# \end{table}
# """
# save_table(f'../tables/tab_rank_{eval_name}.tex', table)
#
#
# print("[Done]")
for i, eval_func in enumerate(evaluation_measures):
eval_name = eval_func.__name__
methods = gao_seb_methods
nold_methods = len(gao_seb_methods)
TABLE = TABLES[eval_name]
TABLE['Average'] = ResultSet('ave', addrankave, show_std=False, test="ttest_ind_from_stats")
for method in methods:
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
tabular = """
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline
& \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
"""
for method in methods:
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
tabular += '\\\\\hline\n'
for dataset in datasets + ['Average']:
if dataset == 'Average':
tabular += '\line\n'
else:
TABLE[dataset].change_compare('rank')
tabular += nice.get(dataset, dataset.upper()) + ' '
for method in gao_seb_methods:
if dataset == 'Average':
method_rank = TABLE[dataset].get(method, 'mean')
else:
method_rank = TABLE[dataset].get(method, 'rank')
gao_seb_rank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
if dataset == 'Average':
if method_rank != '--':
method_rank = f'{method_rank:.1f}'
gao_seb_rank = f'{gao_seb_rank:.1f}'
tabular += ' & ' + f'{method_rank}' + f' ({gao_seb_rank}) ' + TABLE[dataset].get_color(method)
tabular += '\\\\\hline\n'
tabular += "\end{tabularx}"
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
print("[Done]")