From d5417691d52f3f20fd31214fcbb99aa971e7ba82 Mon Sep 17 00:00:00 2001 From: andrea Date: Thu, 11 Feb 2021 17:29:29 +0100 Subject: [PATCH] running comparison --- main.py | 12 ++++---- run.sh | 32 ++++++++++++---------- src/models/pl_bert.py | 62 ------------------------------------------ src/view_generators.py | 7 ----- 4 files changed, 24 insertions(+), 89 deletions(-) diff --git a/main.py b/main.py index f98c430..534c432 100644 --- a/main.py +++ b/main.py @@ -25,12 +25,12 @@ def main(args): lX, ly = data.training() lXte, lyte = data.test() - # TODO: debug settings - print(f'\n[Running on DEBUG mode - samples per language are reduced to 50 max!]\n') - lX = {k: v[:50] for k, v in lX.items()} - ly = {k: v[:50] for k, v in ly.items()} - lXte = {k: v[:50] for k, v in lXte.items()} - lyte = {k: v[:50] for k, v in lyte.items()} + # # TODO: debug settings + # print(f'\n[Running on DEBUG mode - samples per language are reduced to 50 max!]\n') + # lX = {k: v[:50] for k, v in lX.items()} + # ly = {k: v[:50] for k, v in ly.items()} + # lXte = {k: v[:50] for k, v in lXte.items()} + # lyte = {k: v[:50] for k, v in lyte.items()} # Init multilingualIndex - mandatory when deploying Neural View Generators... diff --git a/run.sh b/run.sh index 9f24380..d570486 100644 --- a/run.sh +++ b/run.sh @@ -2,18 +2,22 @@ echo Running Zero-shot experiments [output at csv_logs/gfun/zero_shot_gfun.csv] -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt --n_jobs 6 -python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 10 --batch_bert 8 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv --n_jobs 6 +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it +#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv - -#for i in {0..10..1} -#do -# python main.py --gpus 0 -#done \ No newline at end of file +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it +#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl +python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt +python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv \ No newline at end of file diff --git a/src/models/pl_bert.py b/src/models/pl_bert.py index 37b3df4..51002b9 100644 --- a/src/models/pl_bert.py +++ b/src/models/pl_bert.py @@ -60,52 +60,6 @@ class BertModel(pl.LightningModule): self.log('train-macroK', macroK, on_step=False, on_epoch=True, prog_bar=False, logger=True) self.log('train-microK', microK, on_step=False, on_epoch=True, prog_bar=False, logger=True) return {'loss': loss} - # lX, ly = self._reconstruct_dict(predictions, y, batch_langs) - # return {'loss': loss, 'pred': lX, 'target': ly} - - """ - def training_epoch_end(self, outputs): - pass - - langs = [] - for output in outputs: - langs.extend(list(output['pred'].keys())) - langs = set(langs) - # outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize. - # here we save epoch level metric values and compute them specifically for each language - res_macroF1 = {lang: [] for lang in langs} - res_microF1 = {lang: [] for lang in langs} - res_macroK = {lang: [] for lang in langs} - res_microK = {lang: [] for lang in langs} - for output in outputs: - lX, ly = output['pred'], output['target'] - for lang in lX.keys(): - X, y = lX[lang], ly[lang] - lang_macroF1 = self.lang_macroF1(X, y) - lang_microF1 = self.lang_microF1(X, y) - lang_macroK = self.lang_macroK(X, y) - lang_microK = self.lang_microK(X, y) - - res_macroF1[lang].append(lang_macroF1) - res_microF1[lang].append(lang_microF1) - res_macroK[lang].append(lang_macroK) - res_microK[lang].append(lang_microK) - for lang in langs: - avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang])) - avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang])) - avg_macroK = torch.mean(torch.Tensor(res_macroK[lang])) - avg_microK = torch.mean(torch.Tensor(res_microK[lang])) - self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch) - self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch) - self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch) - self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch) - - if self.manual_log: - # Manual logging epoch loss - tr_epoch_loss = np.average([out['loss'].item() for out in outputs]) - self.csv_metrics['tr_loss'].append(tr_epoch_loss) - self.save_manual_logs() - """ def validation_step(self, val_batch, batch_idx): X, y, batch_langs = val_batch @@ -123,22 +77,6 @@ class BertModel(pl.LightningModule): self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True) self.log('val-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True) return {'loss': loss} - # return {'loss': loss, 'pred': predictions, 'target': y} - - # def validation_epoch_end(self, outputs): - # all_pred = [] - # all_tar = [] - # for output in outputs: - # all_pred.append(output['pred'].cpu().numpy()) - # all_tar.append(output['target'].cpu().numpy()) - # all_pred = np.vstack(all_pred) - # all_tar = np.vstack(all_tar) - # all_pred = {'all': all_pred} - # all_tar = {'all': all_tar} - # res = evaluate(all_tar, all_pred) - # res = [elem for elem in res.values()] - # res = np.average(res, axis=0) - # print(f'\n{res}') def test_step(self, test_batch, batch_idx): X, y, batch_langs = test_batch diff --git a/src/view_generators.py b/src/view_generators.py index 9103f2b..d67d99b 100644 --- a/src/view_generators.py +++ b/src/view_generators.py @@ -369,13 +369,6 @@ class RecurrentGen(ViewGen): callbacks=[self.early_stop_callback, self.lr_monitor], checkpoint_callback=False, overfit_batches=0.01) - # vanilla_torch_model = torch.load( - # '../_old_checkpoint/gru_viewgen_-rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle') - # self.model.linear0 = vanilla_torch_model.linear0 - # self.model.linear1 = vanilla_torch_model.linear1 - # self.model.linear2 = vanilla_torch_model.linear2 - # self.model.rnn = vanilla_torch_model.rnn - if self.zero_shot: print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')