forked from moreo/QuaPy
tables complete; refactoring needed urgently
This commit is contained in:
parent
f820d36927
commit
5793484f70
|
@ -0,0 +1,89 @@
|
||||||
|
AE RAE
|
||||||
|
SemEval13 SVM-KLD 0.0722 0.1720
|
||||||
|
SVM-NKLD 0.0714 0.2756
|
||||||
|
SVM-QBETA2 0.0782 0.2775
|
||||||
|
LR-CC 0.0996 0.3095
|
||||||
|
LR-EM 0.1191 0.3923
|
||||||
|
LR-PCC 0.0344 0.1506
|
||||||
|
LR-ACC 0.0806 0.2479
|
||||||
|
LR-PACC 0.0812 0.2626
|
||||||
|
SemEval14 SVM-KLD 0.0843 0.2268
|
||||||
|
SVM-NKLD 0.0836 0.3367
|
||||||
|
SVM-QBETA2 0.1018 0.3680
|
||||||
|
LR-CC 0.1043 0.3212
|
||||||
|
LR-EM 0.0807 0.3517
|
||||||
|
LR-PCC 0.1001 0.4277
|
||||||
|
LR-ACC 0.0581 0.2360
|
||||||
|
LR-PACC 0.0533 0.2573
|
||||||
|
SemEval15 SVM-KLD 0.1185 0.3789
|
||||||
|
SVM-NKLD 0.1155 0.4720
|
||||||
|
SVM-QBETA2 0.1263 0.4762
|
||||||
|
LR-CC 0.1101 0.2879
|
||||||
|
LR-EM 0.1204 0.2949
|
||||||
|
LR-PCC 0.0460 0.1973
|
||||||
|
LR-ACC 0.1064 0.2971
|
||||||
|
LR-PACC 0.1013 0.2729
|
||||||
|
SemEval16 SVM-KLD 0.0385 0.1512
|
||||||
|
SVM-NKLD 0.0830 0.3249
|
||||||
|
SVM-QBETA2 0.1201 0.5156
|
||||||
|
LR-CC 0.0500 0.1771
|
||||||
|
LR-EM 0.0646 0.2126
|
||||||
|
LR-PCC 0.0379 0.1553
|
||||||
|
LR-ACC 0.0542 0.2246
|
||||||
|
LR-PACC 0.0864 0.3504
|
||||||
|
Sanders SVM-KLD 0.0134 0.0630
|
||||||
|
SVM-NKLD 0.0950 0.3965
|
||||||
|
SVM-QBETA2 0.1098 0.4360
|
||||||
|
LR-CC 0.0671 0.2682
|
||||||
|
LR-EM 0.0715 0.2849
|
||||||
|
LR-PCC 0.0150 0.0602
|
||||||
|
LR-ACC 0.0338 0.1306
|
||||||
|
LR-PACC 0.0301 0.1173
|
||||||
|
SST SVM-KLD 0.0413 0.1458
|
||||||
|
SVM-NKLD 0.0749 0.2497
|
||||||
|
SVM-QBETA2 0.0671 0.2343
|
||||||
|
LR-CC 0.0330 0.1239
|
||||||
|
LR-EM 0.0369 0.1190
|
||||||
|
LR-PCC 0.0282 0.1068
|
||||||
|
LR-ACC 0.0492 0.1689
|
||||||
|
LR-PACC 0.0841 0.2302
|
||||||
|
OMD SVM-KLD 0.0305 0.0999
|
||||||
|
SVM-NKLD 0.0437 0.1279
|
||||||
|
SVM-QBETA2 0.0624 0.1826
|
||||||
|
LR-CC 0.0524 0.1527
|
||||||
|
LR-EM 0.0648 0.1886
|
||||||
|
LR-PCC 0.0046 0.0095
|
||||||
|
LR-ACC 0.0239 0.0753
|
||||||
|
LR-PACC 0.0100 0.0293
|
||||||
|
HCR SVM-KLD 0.0414 0.2191
|
||||||
|
SVM-NKLD 0.0604 0.2324
|
||||||
|
SVM-QBETA2 0.1272 0.4600
|
||||||
|
LR-CC 0.0525 0.1817
|
||||||
|
LR-EM 0.0895 0.3093
|
||||||
|
LR-PCC 0.0055 0.0202
|
||||||
|
LR-ACC 0.0240 0.1026
|
||||||
|
LR-PACC 0.0329 0.1436
|
||||||
|
GASP SVM-KLD 0.0171 0.0529
|
||||||
|
SVM-NKLD 0.0503 0.3416
|
||||||
|
SVM-QBETA2 0.0640 0.4402
|
||||||
|
LR-CC 0.0189 0.1297
|
||||||
|
LR-EM 0.0231 0.1589
|
||||||
|
LR-PCC 0.0097 0.0682
|
||||||
|
LR-ACC 0.0150 0.1038
|
||||||
|
LR-PACC 0.0087 0.0597
|
||||||
|
WA SVM-KLD 0.0647 0.1957
|
||||||
|
SVM-NKLD 0.0393 0.1357
|
||||||
|
SVM-QBETA2 0.0798 0.2332
|
||||||
|
LR-CC 0.0434 0.1270
|
||||||
|
LR-EM 0.0391 0.1145
|
||||||
|
LR-PCC 0.0338 0.0990
|
||||||
|
LR-ACC 0.0407 0.1197
|
||||||
|
LR-PACC 0.0277 0.0815
|
||||||
|
WB SVM-KLD 0.0613 0.1791
|
||||||
|
SVM-NKLD 0.0534 0.1756
|
||||||
|
SVM-QBETA2 0.0249 0.0774
|
||||||
|
LR-CC 0.0132 0.0399
|
||||||
|
LR-EM 0.0244 0.0773
|
||||||
|
LR-PCC 0.0123 0.0390
|
||||||
|
LR-ACC 0.0230 0.0719
|
||||||
|
LR-PACC 0.0165 0.0515
|
|
@ -1,15 +1,7 @@
|
||||||
from scipy.stats import wilcoxon, ttest_ind_from_stats
|
from scipy.stats import wilcoxon, ttest_ind_from_stats
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
"""
|
|
||||||
class Table:
|
|
||||||
def __init__(self):
|
|
||||||
self.tab = {}
|
|
||||||
|
|
||||||
def add(self, col, key, x):
|
|
||||||
if col not in self.tab:
|
|
||||||
self.tab[col] = ResultSet(col)
|
|
||||||
"""
|
|
||||||
|
|
||||||
class ResultSet:
|
class ResultSet:
|
||||||
VALID_TESTS = [None, "wilcoxon", "ttest_ind_from_stats"]
|
VALID_TESTS = [None, "wilcoxon", "ttest_ind_from_stats"]
|
||||||
|
@ -18,7 +10,7 @@ class ResultSet:
|
||||||
TTEST_SAME = 'same'
|
TTEST_SAME = 'same'
|
||||||
|
|
||||||
def __init__(self, name, addfunc, compare='mean', lower_is_better=True, show_std=True, test="wilcoxon",
|
def __init__(self, name, addfunc, compare='mean', lower_is_better=True, show_std=True, test="wilcoxon",
|
||||||
remove_mean='0.', prec_mean=3, remove_std='0.', prec_std=3, maxtone=100, minval=None, maxval=None):
|
remove_mean='', prec_mean=3, remove_std='', prec_std=3, maxtone=50, minval=None, maxval=None):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param name: name of the result set (e.g., a Dataset)
|
:param name: name of the result set (e.g., a Dataset)
|
||||||
|
@ -65,13 +57,18 @@ class ResultSet:
|
||||||
self.r[key]['nobs'] = len(vals)
|
self.r[key]['nobs'] = len(vals)
|
||||||
self.computed = False
|
self.computed = False
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
if not self.computed:
|
||||||
|
self.compute()
|
||||||
|
|
||||||
def compute(self):
|
def compute(self):
|
||||||
keylist = np.asarray(list(self.r.keys()))
|
keylist = np.asarray(list(self.r.keys()))
|
||||||
vallist = [self.r[key][self.compare] for key in keylist]
|
vallist = [self.r[key][self.compare] for key in keylist]
|
||||||
keylist = keylist[np.argsort(vallist)]
|
keylist = keylist[np.argsort(vallist)]
|
||||||
|
|
||||||
minval = min(vallist) if self.minval is None else self.minval
|
print(vallist)
|
||||||
maxval = max(vallist) if self.maxval is None else self.maxval
|
self.range_minval = min(vallist) if self.minval is None else self.minval
|
||||||
|
self.range_maxval = max(vallist) if self.maxval is None else self.maxval
|
||||||
if not self.lower_is_better:
|
if not self.lower_is_better:
|
||||||
keylist = keylist[::-1]
|
keylist = keylist[::-1]
|
||||||
|
|
||||||
|
@ -88,10 +85,7 @@ class ResultSet:
|
||||||
|
|
||||||
#color
|
#color
|
||||||
val = self.r[key][self.compare]
|
val = self.r[key][self.compare]
|
||||||
val = (val-minval)/(maxval-minval)
|
self.r[key]['color'] = self.get_value_color(val, minval=self.range_minval, maxval=self.range_maxval)
|
||||||
if self.lower_is_better:
|
|
||||||
val = 1-val
|
|
||||||
self.r[key]['color'] = color_red2green_01(val, self.maxtone)
|
|
||||||
|
|
||||||
if self.test is not None:
|
if self.test is not None:
|
||||||
if isbest:
|
if isbest:
|
||||||
|
@ -115,11 +109,11 @@ class ResultSet:
|
||||||
self.computed = True
|
self.computed = True
|
||||||
|
|
||||||
def latex(self, key, missing='--', color=True):
|
def latex(self, key, missing='--', color=True):
|
||||||
|
|
||||||
if key not in self.r:
|
if key not in self.r:
|
||||||
return missing
|
return missing
|
||||||
|
|
||||||
if not self.computed:
|
self.update()
|
||||||
self.compute()
|
|
||||||
|
|
||||||
rd = self.r[key]
|
rd = self.r[key]
|
||||||
s = f"{rd['mean']:.{self.prec_mean}f}"
|
s = f"{rd['mean']:.{self.prec_mean}f}"
|
||||||
|
@ -148,29 +142,52 @@ class ResultSet:
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def mean(self, attr='mean', required:int=None):
|
def mean(self, attr='mean', required:int=None, missing=np.nan):
|
||||||
"""
|
"""
|
||||||
returns the mean value for the "key" attribute
|
returns the mean value for the "attr" attribute
|
||||||
:param attr: the attribute to average across results
|
:param attr: the attribute to average across results
|
||||||
:param required: if specified, indicates the number of values that should be part of the mean; if this number
|
:param required: if specified, indicates the number of values that should be part of the mean; if this number
|
||||||
is different, then the mean is not computed
|
is different, then the mean is not computed
|
||||||
|
:param missing: the value to return in case the required condition is not satisfied
|
||||||
:return: the mean of the "key" attribute
|
:return: the mean of the "key" attribute
|
||||||
"""
|
"""
|
||||||
keylist = list(self.r.keys())
|
keylist = list(self.r.keys())
|
||||||
vallist = [self.r[key].get(attr, None) for key in keylist]
|
vallist = [self.r[key].get(attr, None) for key in keylist]
|
||||||
if None in vallist:
|
if None in vallist:
|
||||||
return None
|
return missing
|
||||||
if required is not None:
|
if required is not None:
|
||||||
if len(vallist) != required:
|
if len(vallist) != required:
|
||||||
return None
|
return missing
|
||||||
return np.mean(vallist)
|
return np.mean(vallist)
|
||||||
|
|
||||||
def get(self, key, attr, missing='--'):
|
def get(self, key, attr, missing='--'):
|
||||||
if key in self.r:
|
if key in self.r:
|
||||||
|
self.update()
|
||||||
if attr in self.r[key]:
|
if attr in self.r[key]:
|
||||||
return self.r[key][attr]
|
return self.r[key][attr]
|
||||||
return missing
|
return missing
|
||||||
|
|
||||||
|
def get_color(self, key):
|
||||||
|
if key not in self.r:
|
||||||
|
return ''
|
||||||
|
self.update()
|
||||||
|
return self.r[key]['color']
|
||||||
|
|
||||||
|
def get_value_color(self, val, minval=None, maxval=None):
|
||||||
|
if minval is None or maxval is None:
|
||||||
|
self.update()
|
||||||
|
minval=self.range_minval
|
||||||
|
maxval=self.range_maxval
|
||||||
|
val = (val - minval) / (maxval - minval)
|
||||||
|
if self.lower_is_better:
|
||||||
|
val = 1 - val
|
||||||
|
return color_red2green_01(val, self.maxtone)
|
||||||
|
|
||||||
|
def change_compare(self, attr):
|
||||||
|
self.compare = attr
|
||||||
|
self.computed = False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def color_red2green_01(val, maxtone=100):
|
def color_red2green_01(val, maxtone=100):
|
||||||
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
|
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
|
||||||
|
@ -185,24 +202,3 @@ def color_red2green_01(val, maxtone=100):
|
||||||
tone = maxtone * val
|
tone = maxtone * val
|
||||||
return '\cellcolor{' + color + f'!{int(tone)}' + '}'
|
return '\cellcolor{' + color + f'!{int(tone)}' + '}'
|
||||||
|
|
||||||
|
|
||||||
def add(x):
|
|
||||||
r = np.random.rand(100)/2+x
|
|
||||||
return {
|
|
||||||
'values': r
|
|
||||||
}
|
|
||||||
|
|
||||||
"""
|
|
||||||
r = ResultSet('dataset1', addfunc=add, show_std=False, minval=0, maxval=1)
|
|
||||||
for x in range(10):
|
|
||||||
r.add(f'a{x}', np.random.randint(0,5) / 10)
|
|
||||||
|
|
||||||
print(r.name)
|
|
||||||
for x in range(10):
|
|
||||||
key = f'a{x}'
|
|
||||||
print(r.latex(key), r.get(key, 'rank'))
|
|
||||||
|
|
||||||
print('----')
|
|
||||||
print(f'ave: {r.mean():.3f}')
|
|
||||||
print(f'averank: {r.mean("rank"):.3f}')
|
|
||||||
"""
|
|
|
@ -1,4 +1,5 @@
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
import numpy as np
|
||||||
from os import makedirs
|
from os import makedirs
|
||||||
# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani
|
# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani
|
||||||
import sys, os
|
import sys, os
|
||||||
|
@ -16,15 +17,6 @@ sample_size = 100
|
||||||
qp.environ['SAMPLE_SIZE'] = sample_size
|
qp.environ['SAMPLE_SIZE'] = sample_size
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# results_dict = evaluate_directory('results/*.pkl', evaluation_measures)
|
|
||||||
# stats = {
|
|
||||||
# dataset : {
|
|
||||||
# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae),
|
|
||||||
# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae),
|
|
||||||
# } for dataset in datasets
|
|
||||||
# }
|
|
||||||
|
|
||||||
nice = {
|
nice = {
|
||||||
'mae':'AE',
|
'mae':'AE',
|
||||||
'mrae':'RAE',
|
'mrae':'RAE',
|
||||||
|
@ -45,7 +37,8 @@ nice = {
|
||||||
'semeval13': 'SemEval13',
|
'semeval13': 'SemEval13',
|
||||||
'semeval14': 'SemEval14',
|
'semeval14': 'SemEval14',
|
||||||
'semeval15': 'SemEval15',
|
'semeval15': 'SemEval15',
|
||||||
'semeval16': 'SemEval16'
|
'semeval16': 'SemEval16',
|
||||||
|
'Average': 'Average'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -68,6 +61,51 @@ def color_from_abs_rank(abs_rank, n_methods, maxtone=100):
|
||||||
return color_from_rel_rank(rel_rank, maxtone)
|
return color_from_rel_rank(rel_rank, maxtone)
|
||||||
|
|
||||||
|
|
||||||
|
def load_Gao_Sebastiani_previous_results():
|
||||||
|
def rename(method):
|
||||||
|
old2new = {
|
||||||
|
'kld': 'svmkld',
|
||||||
|
'nkld': 'svmnkld',
|
||||||
|
'qbeta2': 'svmq',
|
||||||
|
'em': 'sld'
|
||||||
|
}
|
||||||
|
return old2new.get(method, method)
|
||||||
|
|
||||||
|
gao_seb_results = {}
|
||||||
|
with open('./Gao_Sebastiani_results.txt', 'rt') as fin:
|
||||||
|
lines = fin.readlines()
|
||||||
|
for line in lines[1:]:
|
||||||
|
line = line.strip()
|
||||||
|
parts = line.lower().split()
|
||||||
|
if len(parts) == 4:
|
||||||
|
dataset, method, ae, rae = parts
|
||||||
|
else:
|
||||||
|
method, ae, rae = parts
|
||||||
|
learner, method = method.split('-')
|
||||||
|
method = rename(method)
|
||||||
|
gao_seb_results[f'{dataset}-{method}-ae'] = float(ae)
|
||||||
|
gao_seb_results[f'{dataset}-{method}-rae'] = float(rae)
|
||||||
|
return gao_seb_results
|
||||||
|
|
||||||
|
|
||||||
|
def get_ranks_from_Gao_Sebastiani():
|
||||||
|
gao_seb_results = load_Gao_Sebastiani_previous_results()
|
||||||
|
datasets = set([key.split('-')[0] for key in gao_seb_results.keys()])
|
||||||
|
methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()]))
|
||||||
|
ranks = {}
|
||||||
|
for metric in ['ae', 'rae']:
|
||||||
|
for dataset in datasets:
|
||||||
|
scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods]
|
||||||
|
order = np.argsort(scores)
|
||||||
|
sorted_methods = methods[order]
|
||||||
|
for i, method in enumerate(sorted_methods):
|
||||||
|
ranks[f'{dataset}-{method}-{metric}'] = i+1
|
||||||
|
for method in methods:
|
||||||
|
rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets])
|
||||||
|
ranks[f'Average-{method}-{metric}'] = rankave
|
||||||
|
return ranks, gao_seb_results
|
||||||
|
|
||||||
|
|
||||||
def save_table(path, table):
|
def save_table(path, table):
|
||||||
print(f'saving results in {path}')
|
print(f'saving results in {path}')
|
||||||
with open(path, 'wt') as foo:
|
with open(path, 'wt') as foo:
|
||||||
|
@ -77,14 +115,12 @@ def save_table(path, table):
|
||||||
# Tables evaluation scores for AE and RAE (two tables)
|
# Tables evaluation scores for AE and RAE (two tables)
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
||||||
evaluation_measures = [qp.error.ae, qp.error.rae]
|
evaluation_measures = [qp.error.ae, qp.error.rae]
|
||||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
|
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||||
|
new_methods = []
|
||||||
|
|
||||||
|
|
||||||
results_dict = {}
|
|
||||||
stats={}
|
|
||||||
def addfunc(dataset, method, loss):
|
def addfunc(dataset, method, loss):
|
||||||
path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
|
path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
|
@ -96,103 +132,116 @@ def addfunc(dataset, method, loss):
|
||||||
}
|
}
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def addave(method, tables):
|
||||||
|
values = []
|
||||||
|
for table in tables:
|
||||||
|
mean = table.get(method, 'values', missing=None)
|
||||||
|
if mean is None:
|
||||||
|
return None
|
||||||
|
values.append(mean)
|
||||||
|
values = np.concatenate(values)
|
||||||
|
return {
|
||||||
|
'values': values
|
||||||
|
}
|
||||||
|
|
||||||
|
def addrankave(method, tables):
|
||||||
|
values = []
|
||||||
|
for table in tables:
|
||||||
|
rank = table.get(method, 'rank', missing=None)
|
||||||
|
if rank is None:
|
||||||
|
return None
|
||||||
|
values.append(rank)
|
||||||
|
return {
|
||||||
|
'values': np.asarray(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TABLES = {eval_func.__name__:{} for eval_func in evaluation_measures}
|
||||||
|
|
||||||
for i, eval_func in enumerate(evaluation_measures):
|
for i, eval_func in enumerate(evaluation_measures):
|
||||||
eval_name = eval_func.__name__
|
eval_name = eval_func.__name__
|
||||||
added_methods = ['svm' + eval_name] # , 'quanet', 'dys']
|
added_methods = ['svm' + eval_name] + new_methods
|
||||||
methods = gao_seb_methods + added_methods
|
methods = gao_seb_methods + added_methods
|
||||||
nold_methods = len(gao_seb_methods)
|
nold_methods = len(gao_seb_methods)
|
||||||
nnew_methods = len(added_methods)
|
nnew_methods = len(added_methods)
|
||||||
|
|
||||||
# fill table
|
# fill table
|
||||||
TABLE = {}
|
TABLE = TABLES[eval_name]
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats", maxtone=50,
|
TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats")
|
||||||
remove_mean='0.' if eval_func == qp.error.ae else '')
|
|
||||||
for method in methods:
|
for method in methods:
|
||||||
TABLE[dataset].add(method, dataset, method, eval_name)
|
TABLE[dataset].add(method, dataset, method, eval_name)
|
||||||
|
|
||||||
|
TABLE['Average'] = ResultSet('ave', addave, show_std=False, test="ttest_ind_from_stats")
|
||||||
|
for method in methods:
|
||||||
|
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
|
||||||
|
|
||||||
tabular = """
|
tabular = """
|
||||||
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline
|
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline
|
||||||
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline
|
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for method in methods:
|
for method in methods:
|
||||||
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
||||||
tabular += '\\\\\hline\n'
|
tabular += '\\\\\hline\n'
|
||||||
|
|
||||||
for dataset in datasets:
|
for dataset in datasets + ['Average']:
|
||||||
|
if dataset == 'Average': tabular+= '\line\n'
|
||||||
tabular += nice.get(dataset, dataset.upper()) + ' '
|
tabular += nice.get(dataset, dataset.upper()) + ' '
|
||||||
for method in methods:
|
for method in methods:
|
||||||
tabular += ' & ' + TABLE[dataset].latex(method)
|
tabular += ' & ' + TABLE[dataset].latex(method)
|
||||||
tabular += '\\\\\hline\n'
|
tabular += '\\\\\hline\n'
|
||||||
|
|
||||||
tabular += "\end{tabularx}"
|
tabular += "\end{tabularx}"
|
||||||
|
|
||||||
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
|
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
|
||||||
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
||||||
|
|
||||||
# Tables ranks for AE and RAE (two tables)
|
# Tables ranks for AE and RAE (two tables)
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
# for i, eval_func in enumerate(evaluation_measures):
|
for i, eval_func in enumerate(evaluation_measures):
|
||||||
# eval_name = eval_func.__name__
|
eval_name = eval_func.__name__
|
||||||
# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
|
methods = gao_seb_methods
|
||||||
# table = """
|
nold_methods = len(gao_seb_methods)
|
||||||
# \\begin{table}[h]
|
|
||||||
# """
|
TABLE = TABLES[eval_name]
|
||||||
# if i == 0:
|
TABLE['Average'] = ResultSet('ave', addrankave, show_std=False, test="ttest_ind_from_stats")
|
||||||
# caption = """
|
for method in methods:
|
||||||
# \caption{Rank positions of the quantification methods in the AE
|
TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets])
|
||||||
# experiments, and (between parentheses) the rank positions
|
|
||||||
# obtained in the evaluation of~\cite{Gao:2016uq}.}
|
|
||||||
# """
|
tabular = """
|
||||||
# else:
|
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline
|
||||||
# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}"
|
& \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
|
||||||
# table += caption + """
|
"""
|
||||||
# \\begin{center}
|
|
||||||
# \\resizebox{\\textwidth}{!}{
|
for method in methods:
|
||||||
# """
|
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
||||||
# tabular = """
|
tabular += '\\\\\hline\n'
|
||||||
# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline
|
|
||||||
# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
|
for dataset in datasets + ['Average']:
|
||||||
# """
|
if dataset == 'Average':
|
||||||
#
|
tabular += '\line\n'
|
||||||
# for method in methods:
|
else:
|
||||||
# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
|
TABLE[dataset].change_compare('rank')
|
||||||
# tabular += '\\\\\hline\n'
|
tabular += nice.get(dataset, dataset.upper()) + ' '
|
||||||
#
|
for method in gao_seb_methods:
|
||||||
# for dataset in datasets:
|
if dataset == 'Average':
|
||||||
# tabular += nice.get(dataset, dataset.upper()) + ' '
|
method_rank = TABLE[dataset].get(method, 'mean')
|
||||||
# ranks_no_gap = []
|
else:
|
||||||
# for method in methods:
|
method_rank = TABLE[dataset].get(method, 'rank')
|
||||||
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
|
gao_seb_rank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
|
||||||
# key = f'{dataset}-{method}-{learner}-{}-{eval_name}'
|
if dataset == 'Average':
|
||||||
# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2])
|
if method_rank != '--':
|
||||||
# ranks_no_gap = sorted(ranks_no_gap)
|
method_rank = f'{method_rank:.1f}'
|
||||||
# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)}
|
gao_seb_rank = f'{gao_seb_rank:.1f}'
|
||||||
# for method in methods:
|
tabular += ' & ' + f'{method_rank}' + f' ({gao_seb_rank}) ' + TABLE[dataset].get_color(method)
|
||||||
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
|
tabular += '\\\\\hline\n'
|
||||||
# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}'
|
tabular += "\end{tabularx}"
|
||||||
# if key in stats[dataset][eval_name]:
|
|
||||||
# _, _, abs_rank = stats[dataset][eval_name][key]
|
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
|
||||||
# real_rank = ranks_no_gap[abs_rank]
|
|
||||||
# tabular += f' & {real_rank}'
|
|
||||||
# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE)
|
print("[Done]")
|
||||||
# else:
|
|
||||||
# tabular += ' & --- '
|
|
||||||
# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error')
|
|
||||||
# tabular += f' ({old_rank})'
|
|
||||||
# tabular += '\\\\\hline\n'
|
|
||||||
# tabular += "\end{tabularx}"
|
|
||||||
# table += tabular + """
|
|
||||||
# }
|
|
||||||
# \end{center}
|
|
||||||
# \label{tab:""" + eval_name + """ranks}
|
|
||||||
# \end{table}
|
|
||||||
# """
|
|
||||||
# save_table(f'../tables/tab_rank_{eval_name}.tex', table)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# print("[Done]")
|
|
Loading…
Reference in New Issue