diff --git a/refactor/main.py b/refactor/main.py index 2ccb0b2..d2ab71b 100644 --- a/refactor/main.py +++ b/refactor/main.py @@ -4,6 +4,7 @@ from view_generators import * from data.dataset_builder import MultilingualDataset from util.common import MultilingualIndex from util.evaluation import evaluate +from util.results_csv import CSVlog from time import time @@ -49,13 +50,34 @@ def main(args): print('\n[Testing Generalized Funnelling]') time_te = time() ly_ = gfun.predict(lXte) - l_eval = evaluate(ly_true=ly, ly_pred=ly_) - print(l_eval) - time_te = round(time() - time_te, 3) print(f'Testing completed in {time_te} seconds!') + # Logging --------------------------------------- + print('\n[Results]') + results = CSVlog('test_log.csv') + metrics = [] + for lang in lXte.keys(): + macrof1, microf1, macrok, microk = l_eval[lang] + metrics.append([macrof1, microf1, macrok, microk]) + print(f'Lang {lang}: macro-F1 = {macrof1:.3f} micro-F1 = {microf1:.3f}') + results.add_row(method='gfun', + setting='TODO', + sif='TODO', + zscore='TRUE', + l2='TRUE', + dataset='TODO', + time_tr=time_tr, + time_te=time_te, + lang=lang, + macrof1=macrof1, + microf1=microf1, + macrok=macrok, + microk=microk, + notes='') + print('Averages: MF1, mF1, MK, mK', np.round(np.mean(np.array(metrics), axis=0), 3)) + overall_time = round(time() - time_init, 3) exit(f'\nExecuted in: {overall_time } seconds!') diff --git a/refactor/util/results_csv.py b/refactor/util/results_csv.py new file mode 100644 index 0000000..85a7de1 --- /dev/null +++ b/refactor/util/results_csv.py @@ -0,0 +1,51 @@ +import os +import pandas as pd +import numpy as np + + +class CSVlog: + def __init__(self, file, autoflush=True, verbose=False): + self.file = file + self.columns = ['method', + 'setting', + 'sif', + 'zscore', + 'l2', + 'dataset', + 'time_tr', + 'time_te', + 'lang', + 'macrof1', + 'microf1', + 'macrok', + 'microk', + 'notes'] + self.autoflush = autoflush + self.verbose = verbose + if os.path.exists(file): + self.tell('Loading existing file from {}'.format(file)) + self.df = pd.read_csv(file, sep='\t') + else: + self.tell('File {} does not exist. Creating new frame.'.format(file)) + dir = os.path.dirname(self.file) + if dir and not os.path.exists(dir): os.makedirs(dir) + self.df = pd.DataFrame(columns=self.columns) + + def already_calculated(self, id): + return (self.df['id'] == id).any() + + def add_row(self, method, setting, sif, zscore, l2, dataset, time_tr, time_te, lang, + macrof1, microf1, macrok=np.nan, microk=np.nan, notes=''): + s = pd.Series([method, setting,sif, zscore, l2, dataset, time_tr, time_te, lang, + macrof1, microf1, macrok, microk, notes], + index=self.columns) + self.df = self.df.append(s, ignore_index=True) + if self.autoflush: self.flush() + self.tell(s.to_string()) + + def flush(self): + self.df.to_csv(self.file, index=False, sep='\t') + + def tell(self, msg): + if self.verbose: + print(msg)