From 5793484f70fb26d23b8b8e57cd3456198d707150 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Wed, 13 Jan 2021 18:43:35 +0100 Subject: [PATCH] tables complete; refactoring needed urgently --- TweetSentQuant/Gao_Sebastiani_results.txt | 89 +++++++++ TweetSentQuant/result_manager.py | 82 ++++----- TweetSentQuant/tables.py | 215 +++++++++++++--------- 3 files changed, 260 insertions(+), 126 deletions(-) create mode 100644 TweetSentQuant/Gao_Sebastiani_results.txt diff --git a/TweetSentQuant/Gao_Sebastiani_results.txt b/TweetSentQuant/Gao_Sebastiani_results.txt new file mode 100644 index 0000000..de0e6dd --- /dev/null +++ b/TweetSentQuant/Gao_Sebastiani_results.txt @@ -0,0 +1,89 @@ + AE RAE +SemEval13 SVM-KLD 0.0722 0.1720 + SVM-NKLD 0.0714 0.2756 + SVM-QBETA2 0.0782 0.2775 + LR-CC 0.0996 0.3095 + LR-EM 0.1191 0.3923 + LR-PCC 0.0344 0.1506 + LR-ACC 0.0806 0.2479 + LR-PACC 0.0812 0.2626 +SemEval14 SVM-KLD 0.0843 0.2268 + SVM-NKLD 0.0836 0.3367 + SVM-QBETA2 0.1018 0.3680 + LR-CC 0.1043 0.3212 + LR-EM 0.0807 0.3517 + LR-PCC 0.1001 0.4277 + LR-ACC 0.0581 0.2360 + LR-PACC 0.0533 0.2573 +SemEval15 SVM-KLD 0.1185 0.3789 + SVM-NKLD 0.1155 0.4720 + SVM-QBETA2 0.1263 0.4762 + LR-CC 0.1101 0.2879 + LR-EM 0.1204 0.2949 + LR-PCC 0.0460 0.1973 + LR-ACC 0.1064 0.2971 + LR-PACC 0.1013 0.2729 +SemEval16 SVM-KLD 0.0385 0.1512 + SVM-NKLD 0.0830 0.3249 + SVM-QBETA2 0.1201 0.5156 + LR-CC 0.0500 0.1771 + LR-EM 0.0646 0.2126 + LR-PCC 0.0379 0.1553 + LR-ACC 0.0542 0.2246 + LR-PACC 0.0864 0.3504 +Sanders SVM-KLD 0.0134 0.0630 + SVM-NKLD 0.0950 0.3965 + SVM-QBETA2 0.1098 0.4360 + LR-CC 0.0671 0.2682 + LR-EM 0.0715 0.2849 + LR-PCC 0.0150 0.0602 + LR-ACC 0.0338 0.1306 + LR-PACC 0.0301 0.1173 +SST SVM-KLD 0.0413 0.1458 + SVM-NKLD 0.0749 0.2497 + SVM-QBETA2 0.0671 0.2343 + LR-CC 0.0330 0.1239 + LR-EM 0.0369 0.1190 + LR-PCC 0.0282 0.1068 + LR-ACC 0.0492 0.1689 + LR-PACC 0.0841 0.2302 +OMD SVM-KLD 0.0305 0.0999 + SVM-NKLD 0.0437 0.1279 + SVM-QBETA2 0.0624 0.1826 + LR-CC 0.0524 0.1527 + LR-EM 0.0648 0.1886 + LR-PCC 0.0046 0.0095 + LR-ACC 0.0239 0.0753 + LR-PACC 0.0100 0.0293 +HCR SVM-KLD 0.0414 0.2191 + SVM-NKLD 0.0604 0.2324 + SVM-QBETA2 0.1272 0.4600 + LR-CC 0.0525 0.1817 + LR-EM 0.0895 0.3093 + LR-PCC 0.0055 0.0202 + LR-ACC 0.0240 0.1026 + LR-PACC 0.0329 0.1436 +GASP SVM-KLD 0.0171 0.0529 + SVM-NKLD 0.0503 0.3416 + SVM-QBETA2 0.0640 0.4402 + LR-CC 0.0189 0.1297 + LR-EM 0.0231 0.1589 + LR-PCC 0.0097 0.0682 + LR-ACC 0.0150 0.1038 + LR-PACC 0.0087 0.0597 +WA SVM-KLD 0.0647 0.1957 + SVM-NKLD 0.0393 0.1357 + SVM-QBETA2 0.0798 0.2332 + LR-CC 0.0434 0.1270 + LR-EM 0.0391 0.1145 + LR-PCC 0.0338 0.0990 + LR-ACC 0.0407 0.1197 + LR-PACC 0.0277 0.0815 +WB SVM-KLD 0.0613 0.1791 + SVM-NKLD 0.0534 0.1756 + SVM-QBETA2 0.0249 0.0774 + LR-CC 0.0132 0.0399 + LR-EM 0.0244 0.0773 + LR-PCC 0.0123 0.0390 + LR-ACC 0.0230 0.0719 + LR-PACC 0.0165 0.0515 \ No newline at end of file diff --git a/TweetSentQuant/result_manager.py b/TweetSentQuant/result_manager.py index bfb3aae..0f3f35e 100644 --- a/TweetSentQuant/result_manager.py +++ b/TweetSentQuant/result_manager.py @@ -1,15 +1,7 @@ from scipy.stats import wilcoxon, ttest_ind_from_stats import numpy as np -""" -class Table: - def __init__(self): - self.tab = {} - - def add(self, col, key, x): - if col not in self.tab: - self.tab[col] = ResultSet(col) -""" + class ResultSet: VALID_TESTS = [None, "wilcoxon", "ttest_ind_from_stats"] @@ -18,7 +10,7 @@ class ResultSet: TTEST_SAME = 'same' def __init__(self, name, addfunc, compare='mean', lower_is_better=True, show_std=True, test="wilcoxon", - remove_mean='0.', prec_mean=3, remove_std='0.', prec_std=3, maxtone=100, minval=None, maxval=None): + remove_mean='', prec_mean=3, remove_std='', prec_std=3, maxtone=50, minval=None, maxval=None): """ :param name: name of the result set (e.g., a Dataset) @@ -65,13 +57,18 @@ class ResultSet: self.r[key]['nobs'] = len(vals) self.computed = False + def update(self): + if not self.computed: + self.compute() + def compute(self): keylist = np.asarray(list(self.r.keys())) vallist = [self.r[key][self.compare] for key in keylist] keylist = keylist[np.argsort(vallist)] - minval = min(vallist) if self.minval is None else self.minval - maxval = max(vallist) if self.maxval is None else self.maxval + print(vallist) + self.range_minval = min(vallist) if self.minval is None else self.minval + self.range_maxval = max(vallist) if self.maxval is None else self.maxval if not self.lower_is_better: keylist = keylist[::-1] @@ -88,10 +85,7 @@ class ResultSet: #color val = self.r[key][self.compare] - val = (val-minval)/(maxval-minval) - if self.lower_is_better: - val = 1-val - self.r[key]['color'] = color_red2green_01(val, self.maxtone) + self.r[key]['color'] = self.get_value_color(val, minval=self.range_minval, maxval=self.range_maxval) if self.test is not None: if isbest: @@ -115,11 +109,11 @@ class ResultSet: self.computed = True def latex(self, key, missing='--', color=True): + if key not in self.r: return missing - if not self.computed: - self.compute() + self.update() rd = self.r[key] s = f"{rd['mean']:.{self.prec_mean}f}" @@ -148,29 +142,52 @@ class ResultSet: return s - def mean(self, attr='mean', required:int=None): + def mean(self, attr='mean', required:int=None, missing=np.nan): """ - returns the mean value for the "key" attribute + returns the mean value for the "attr" attribute :param attr: the attribute to average across results :param required: if specified, indicates the number of values that should be part of the mean; if this number is different, then the mean is not computed + :param missing: the value to return in case the required condition is not satisfied :return: the mean of the "key" attribute """ keylist = list(self.r.keys()) vallist = [self.r[key].get(attr, None) for key in keylist] if None in vallist: - return None + return missing if required is not None: if len(vallist) != required: - return None + return missing return np.mean(vallist) def get(self, key, attr, missing='--'): if key in self.r: + self.update() if attr in self.r[key]: return self.r[key][attr] return missing + def get_color(self, key): + if key not in self.r: + return '' + self.update() + return self.r[key]['color'] + + def get_value_color(self, val, minval=None, maxval=None): + if minval is None or maxval is None: + self.update() + minval=self.range_minval + maxval=self.range_maxval + val = (val - minval) / (maxval - minval) + if self.lower_is_better: + val = 1 - val + return color_red2green_01(val, self.maxtone) + + def change_compare(self, attr): + self.compare = attr + self.computed = False + + def color_red2green_01(val, maxtone=100): assert 0 <= val <= 1, f'val {val} out of range [0,1]' @@ -185,24 +202,3 @@ def color_red2green_01(val, maxtone=100): tone = maxtone * val return '\cellcolor{' + color + f'!{int(tone)}' + '}' - -def add(x): - r = np.random.rand(100)/2+x - return { - 'values': r - } - -""" -r = ResultSet('dataset1', addfunc=add, show_std=False, minval=0, maxval=1) -for x in range(10): - r.add(f'a{x}', np.random.randint(0,5) / 10) - -print(r.name) -for x in range(10): - key = f'a{x}' - print(r.latex(key), r.get(key, 'rank')) - -print('----') -print(f'ave: {r.mean():.3f}') -print(f'averank: {r.mean("rank"):.3f}') -""" \ No newline at end of file diff --git a/TweetSentQuant/tables.py b/TweetSentQuant/tables.py index a8f2b3c..38e5481 100644 --- a/TweetSentQuant/tables.py +++ b/TweetSentQuant/tables.py @@ -1,4 +1,5 @@ import quapy as qp +import numpy as np from os import makedirs # from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani import sys, os @@ -16,15 +17,6 @@ sample_size = 100 qp.environ['SAMPLE_SIZE'] = sample_size - -# results_dict = evaluate_directory('results/*.pkl', evaluation_measures) -# stats = { -# dataset : { -# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae), -# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae), -# } for dataset in datasets -# } - nice = { 'mae':'AE', 'mrae':'RAE', @@ -45,7 +37,8 @@ nice = { 'semeval13': 'SemEval13', 'semeval14': 'SemEval14', 'semeval15': 'SemEval15', - 'semeval16': 'SemEval16' + 'semeval16': 'SemEval16', + 'Average': 'Average' } @@ -68,6 +61,51 @@ def color_from_abs_rank(abs_rank, n_methods, maxtone=100): return color_from_rel_rank(rel_rank, maxtone) +def load_Gao_Sebastiani_previous_results(): + def rename(method): + old2new = { + 'kld': 'svmkld', + 'nkld': 'svmnkld', + 'qbeta2': 'svmq', + 'em': 'sld' + } + return old2new.get(method, method) + + gao_seb_results = {} + with open('./Gao_Sebastiani_results.txt', 'rt') as fin: + lines = fin.readlines() + for line in lines[1:]: + line = line.strip() + parts = line.lower().split() + if len(parts) == 4: + dataset, method, ae, rae = parts + else: + method, ae, rae = parts + learner, method = method.split('-') + method = rename(method) + gao_seb_results[f'{dataset}-{method}-ae'] = float(ae) + gao_seb_results[f'{dataset}-{method}-rae'] = float(rae) + return gao_seb_results + + +def get_ranks_from_Gao_Sebastiani(): + gao_seb_results = load_Gao_Sebastiani_previous_results() + datasets = set([key.split('-')[0] for key in gao_seb_results.keys()]) + methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()])) + ranks = {} + for metric in ['ae', 'rae']: + for dataset in datasets: + scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods] + order = np.argsort(scores) + sorted_methods = methods[order] + for i, method in enumerate(sorted_methods): + ranks[f'{dataset}-{method}-{metric}'] = i+1 + for method in methods: + rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets]) + ranks[f'Average-{method}-{metric}'] = rankave + return ranks, gao_seb_results + + def save_table(path, table): print(f'saving results in {path}') with open(path, 'wt') as foo: @@ -77,14 +115,12 @@ def save_table(path, table): # Tables evaluation scores for AE and RAE (two tables) # ---------------------------------------------------- - - datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST evaluation_measures = [qp.error.ae, qp.error.rae] -gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld'] +gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld'] +new_methods = [] + -results_dict = {} -stats={} def addfunc(dataset, method, loss): path = result_path(dataset, method, 'm'+loss if not loss.startswith('m') else loss) if os.path.exists(path): @@ -96,103 +132,116 @@ def addfunc(dataset, method, loss): } return None +def addave(method, tables): + values = [] + for table in tables: + mean = table.get(method, 'values', missing=None) + if mean is None: + return None + values.append(mean) + values = np.concatenate(values) + return { + 'values': values + } + +def addrankave(method, tables): + values = [] + for table in tables: + rank = table.get(method, 'rank', missing=None) + if rank is None: + return None + values.append(rank) + return { + 'values': np.asarray(values) + } + + +TABLES = {eval_func.__name__:{} for eval_func in evaluation_measures} for i, eval_func in enumerate(evaluation_measures): eval_name = eval_func.__name__ - added_methods = ['svm' + eval_name] # , 'quanet', 'dys'] + added_methods = ['svm' + eval_name] + new_methods methods = gao_seb_methods + added_methods nold_methods = len(gao_seb_methods) nnew_methods = len(added_methods) # fill table - TABLE = {} + TABLE = TABLES[eval_name] for dataset in datasets: - TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats", maxtone=50, - remove_mean='0.' if eval_func == qp.error.ae else '') + TABLE[dataset] = ResultSet(dataset, addfunc, show_std=False, test="ttest_ind_from_stats") for method in methods: TABLE[dataset].add(method, dataset, method, eval_name) + TABLE['Average'] = ResultSet('ave', addave, show_std=False, test="ttest_ind_from_stats") + for method in methods: + TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets]) + tabular = """ \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline - & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline + & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline """ for method in methods: tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' tabular += '\\\\\hline\n' - for dataset in datasets: + for dataset in datasets + ['Average']: + if dataset == 'Average': tabular+= '\line\n' tabular += nice.get(dataset, dataset.upper()) + ' ' for method in methods: tabular += ' & ' + TABLE[dataset].latex(method) tabular += '\\\\\hline\n' + tabular += "\end{tabularx}" save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular) -sys.exit(0) -# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani() +gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani() # Tables ranks for AE and RAE (two tables) # ---------------------------------------------------- -# for i, eval_func in enumerate(evaluation_measures): -# eval_name = eval_func.__name__ -# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld'] -# table = """ -# \\begin{table}[h] -# """ -# if i == 0: -# caption = """ -# \caption{Rank positions of the quantification methods in the AE -# experiments, and (between parentheses) the rank positions -# obtained in the evaluation of~\cite{Gao:2016uq}.} -# """ -# else: -# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}" -# table += caption + """ -# \\begin{center} -# \\resizebox{\\textwidth}{!}{ -# """ -# tabular = """ -# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline -# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline -# """ -# -# for method in methods: -# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' -# tabular += '\\\\\hline\n' -# -# for dataset in datasets: -# tabular += nice.get(dataset, dataset.upper()) + ' ' -# ranks_no_gap = [] -# for method in methods: -# learner = 'lr' if not method.startswith('svm') else 'svmperf' -# key = f'{dataset}-{method}-{learner}-{}-{eval_name}' -# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2]) -# ranks_no_gap = sorted(ranks_no_gap) -# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)} -# for method in methods: -# learner = 'lr' if not method.startswith('svm') else 'svmperf' -# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}' -# if key in stats[dataset][eval_name]: -# _, _, abs_rank = stats[dataset][eval_name][key] -# real_rank = ranks_no_gap[abs_rank] -# tabular += f' & {real_rank}' -# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE) -# else: -# tabular += ' & --- ' -# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error') -# tabular += f' ({old_rank})' -# tabular += '\\\\\hline\n' -# tabular += "\end{tabularx}" -# table += tabular + """ -# } -# \end{center} -# \label{tab:""" + eval_name + """ranks} -# \end{table} -# """ -# save_table(f'../tables/tab_rank_{eval_name}.tex', table) -# -# -# print("[Done]") \ No newline at end of file +for i, eval_func in enumerate(evaluation_measures): + eval_name = eval_func.__name__ + methods = gao_seb_methods + nold_methods = len(gao_seb_methods) + + TABLE = TABLES[eval_name] + TABLE['Average'] = ResultSet('ave', addrankave, show_std=False, test="ttest_ind_from_stats") + for method in methods: + TABLE['Average'].add(method, method, [TABLE[dataset] for dataset in datasets]) + + + tabular = """ + \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|' * len(gao_seb_methods)) + """} \hline + & \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline + """ + + for method in methods: + tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} ' + tabular += '\\\\\hline\n' + + for dataset in datasets + ['Average']: + if dataset == 'Average': + tabular += '\line\n' + else: + TABLE[dataset].change_compare('rank') + tabular += nice.get(dataset, dataset.upper()) + ' ' + for method in gao_seb_methods: + if dataset == 'Average': + method_rank = TABLE[dataset].get(method, 'mean') + else: + method_rank = TABLE[dataset].get(method, 'rank') + gao_seb_rank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}'] + if dataset == 'Average': + if method_rank != '--': + method_rank = f'{method_rank:.1f}' + gao_seb_rank = f'{gao_seb_rank:.1f}' + tabular += ' & ' + f'{method_rank}' + f' ({gao_seb_rank}) ' + TABLE[dataset].get_color(method) + tabular += '\\\\\hline\n' + tabular += "\end{tabularx}" + + save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular) + + +print("[Done]") \ No newline at end of file