running comparison

This commit is contained in:
andrea 2021-02-11 17:29:29 +01:00
parent 7c8de936db
commit d5417691d5
4 changed files with 24 additions and 89 deletions

12
main.py
View File

@ -25,12 +25,12 @@ def main(args):
lX, ly = data.training() lX, ly = data.training()
lXte, lyte = data.test() lXte, lyte = data.test()
# TODO: debug settings # # TODO: debug settings
print(f'\n[Running on DEBUG mode - samples per language are reduced to 50 max!]\n') # print(f'\n[Running on DEBUG mode - samples per language are reduced to 50 max!]\n')
lX = {k: v[:50] for k, v in lX.items()} # lX = {k: v[:50] for k, v in lX.items()}
ly = {k: v[:50] for k, v in ly.items()} # ly = {k: v[:50] for k, v in ly.items()}
lXte = {k: v[:50] for k, v in lXte.items()} # lXte = {k: v[:50] for k, v in lXte.items()}
lyte = {k: v[:50] for k, v in lyte.items()} # lyte = {k: v[:50] for k, v in lyte.items()}
# Init multilingualIndex - mandatory when deploying Neural View Generators... # Init multilingualIndex - mandatory when deploying Neural View Generators...

32
run.sh
View File

@ -2,18 +2,22 @@
echo Running Zero-shot experiments [output at csv_logs/gfun/zero_shot_gfun.csv] echo Running Zero-shot experiments [output at csv_logs/gfun/zero_shot_gfun.csv]
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl --n_jobs 6 #python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt --n_jobs 6 #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv --n_jobs 6 #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da
#for i in {0..10..1} #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de
#do #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en
# python main.py --gpus 0 #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es
#done #python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv

View File

@ -60,52 +60,6 @@ class BertModel(pl.LightningModule):
self.log('train-macroK', macroK, on_step=False, on_epoch=True, prog_bar=False, logger=True) self.log('train-macroK', macroK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('train-microK', microK, on_step=False, on_epoch=True, prog_bar=False, logger=True) self.log('train-microK', microK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
return {'loss': loss} return {'loss': loss}
# lX, ly = self._reconstruct_dict(predictions, y, batch_langs)
# return {'loss': loss, 'pred': lX, 'target': ly}
"""
def training_epoch_end(self, outputs):
pass
langs = []
for output in outputs:
langs.extend(list(output['pred'].keys()))
langs = set(langs)
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
# here we save epoch level metric values and compute them specifically for each language
res_macroF1 = {lang: [] for lang in langs}
res_microF1 = {lang: [] for lang in langs}
res_macroK = {lang: [] for lang in langs}
res_microK = {lang: [] for lang in langs}
for output in outputs:
lX, ly = output['pred'], output['target']
for lang in lX.keys():
X, y = lX[lang], ly[lang]
lang_macroF1 = self.lang_macroF1(X, y)
lang_microF1 = self.lang_microF1(X, y)
lang_macroK = self.lang_macroK(X, y)
lang_microK = self.lang_microK(X, y)
res_macroF1[lang].append(lang_macroF1)
res_microF1[lang].append(lang_microF1)
res_macroK[lang].append(lang_macroK)
res_microK[lang].append(lang_microK)
for lang in langs:
avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang]))
avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang]))
avg_macroK = torch.mean(torch.Tensor(res_macroK[lang]))
avg_microK = torch.mean(torch.Tensor(res_microK[lang]))
self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch)
if self.manual_log:
# Manual logging epoch loss
tr_epoch_loss = np.average([out['loss'].item() for out in outputs])
self.csv_metrics['tr_loss'].append(tr_epoch_loss)
self.save_manual_logs()
"""
def validation_step(self, val_batch, batch_idx): def validation_step(self, val_batch, batch_idx):
X, y, batch_langs = val_batch X, y, batch_langs = val_batch
@ -123,22 +77,6 @@ class BertModel(pl.LightningModule):
self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True) self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
self.log('val-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True) self.log('val-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
return {'loss': loss} return {'loss': loss}
# return {'loss': loss, 'pred': predictions, 'target': y}
# def validation_epoch_end(self, outputs):
# all_pred = []
# all_tar = []
# for output in outputs:
# all_pred.append(output['pred'].cpu().numpy())
# all_tar.append(output['target'].cpu().numpy())
# all_pred = np.vstack(all_pred)
# all_tar = np.vstack(all_tar)
# all_pred = {'all': all_pred}
# all_tar = {'all': all_tar}
# res = evaluate(all_tar, all_pred)
# res = [elem for elem in res.values()]
# res = np.average(res, axis=0)
# print(f'\n{res}')
def test_step(self, test_batch, batch_idx): def test_step(self, test_batch, batch_idx):
X, y, batch_langs = test_batch X, y, batch_langs = test_batch

View File

@ -369,13 +369,6 @@ class RecurrentGen(ViewGen):
callbacks=[self.early_stop_callback, self.lr_monitor], checkpoint_callback=False, callbacks=[self.early_stop_callback, self.lr_monitor], checkpoint_callback=False,
overfit_batches=0.01) overfit_batches=0.01)
# vanilla_torch_model = torch.load(
# '../_old_checkpoint/gru_viewgen_-rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle')
# self.model.linear0 = vanilla_torch_model.linear0
# self.model.linear1 = vanilla_torch_model.linear1
# self.model.linear2 = vanilla_torch_model.linear2
# self.model.rnn = vanilla_torch_model.rnn
if self.zero_shot: if self.zero_shot:
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}') print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')