Implemented metrics logging

This commit is contained in:
andrea 2021-01-21 15:41:56 +01:00
parent 5ce1203942
commit 472b64ee0e
4 changed files with 89 additions and 39 deletions

View File

@ -108,6 +108,7 @@ class RecurrentDataModule(pl.LightningDataModule):
# Debug settings: reducing number of samples
# l_train_index = {l: train[:50] for l, train in l_train_index.items()}
# l_train_target = {l: target[:50] for l, target in l_train_target.items()}
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
lPad_index=self.multilingualIndex.l_pad())
@ -115,6 +116,7 @@ class RecurrentDataModule(pl.LightningDataModule):
# Debug settings: reducing number of samples
# l_val_index = {l: train[:50] for l, train in l_val_index.items()}
# l_val_target = {l: target[:50] for l, target in l_val_target.items()}
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
lPad_index=self.multilingualIndex.l_pad())
if stage == 'test' or stage is None:
@ -146,6 +148,7 @@ class BertDataModule(RecurrentDataModule):
# Debug settings: reducing number of samples
# l_train_raw = {l: train[:50] for l, train in l_train_raw.items()}
# l_train_target = {l: target[:50] for l, target in l_train_target.items()}
l_train_index = self.tokenize(l_train_raw, max_len=self.max_len)
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
lPad_index=self.multilingualIndex.l_pad())
@ -154,6 +157,7 @@ class BertDataModule(RecurrentDataModule):
# Debug settings: reducing number of samples
# l_val_raw = {l: train[:50] for l, train in l_val_raw.items()}
# l_val_target = {l: target[:50] for l, target in l_val_target.items()}
l_val_index = self.tokenize(l_val_raw, max_len=self.max_len)
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
lPad_index=self.multilingualIndex.l_pad())

View File

@ -15,7 +15,7 @@ def main(args):
_DATASET = '/home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle'
EMBEDDINGS_PATH = '/home/andreapdr/gfun/embeddings'
data = MultilingualDataset.load(_DATASET)
data.set_view(languages=['it'], categories=[0, 1])
# data.set_view(languages=['it', 'fr'])
lX, ly = data.training()
lXte, lyte = data.test()
@ -28,8 +28,8 @@ def main(args):
# gFun = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=N_JOBS)
# gFun = MuseGen(muse_dir='/home/andreapdr/funneling_pdr/embeddings', n_jobs=N_JOBS)
# gFun = WordClassGen(n_jobs=N_JOBS)
gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=True, batch_size=128,
nepochs=100, gpus=args.gpus, n_jobs=N_JOBS)
gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=False, batch_size=128,
nepochs=50, gpus=args.gpus, n_jobs=N_JOBS)
# gFun = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=N_JOBS)
gFun.fit(lX, ly)

View File

@ -6,21 +6,25 @@ from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from transformers import AdamW
import pytorch_lightning as pl
from pytorch_lightning.metrics import Accuracy
from models.helpers import init_embeddings
from util.pl_metrics import CustomF1, CustomK
from util.evaluation import evaluate
# TODO: it should also be possible to compute metrics independently for each language!
class RecurrentModel(pl.LightningModule):
"""
Check out for logging insight https://www.learnopencv.com/tensorboard-with-pytorch-lightning/
"""
def __init__(self, lPretrained, langs, output_size, hidden_size, lVocab_size, learnable_length,
drop_embedding_range, drop_embedding_prop, gpus=None):
"""
:param lPretrained:
:param langs:
:param output_size:
:param hidden_size:
:param lVocab_size:
:param learnable_length:
:param drop_embedding_range:
:param drop_embedding_prop:
:param gpus:
"""
super().__init__()
self.gpus = gpus
self.langs = langs
@ -32,11 +36,16 @@ class RecurrentModel(pl.LightningModule):
self.drop_embedding_prop = drop_embedding_prop
self.loss = torch.nn.BCEWithLogitsLoss()
self.accuracy = Accuracy()
self.microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
# Language specific metrics - I am not really sure if they should be initialized
# independently or we can use the metrics init above... # TODO: check it
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
self.lang_microK = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
self.lPretrained_embeddings = nn.ModuleDict()
self.lLearnable_embeddings = nn.ModuleDict()
@ -103,22 +112,60 @@ class RecurrentModel(pl.LightningModule):
_ly = []
for lang in sorted(lX.keys()):
_ly.append(ly[lang])
ly = torch.cat(_ly, dim=0)
loss = self.loss(logits, ly)
y = torch.cat(_ly, dim=0)
loss = self.loss(logits, y)
# Squashing logits through Sigmoid in order to get confidence score
predictions = torch.sigmoid(logits) > 0.5
accuracy = self.accuracy(predictions, ly)
microF1 = self.microF1(predictions, ly)
macroF1 = self.macroF1(predictions, ly)
microK = self.microK(predictions, ly)
macroK = self.macroK(predictions, ly)
microF1 = self.microF1(predictions, y)
macroF1 = self.macroF1(predictions, y)
microK = self.microK(predictions, y)
macroK = self.macroK(predictions, y)
self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
self.log('train-accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=False, logger=True)
self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
self.log('train-macroK', macroK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
self.log('train-microK', microK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
return {'loss': loss}
re_lX = self._reconstruct_dict(predictions, ly)
return {'loss': loss, 'pred': re_lX, 'target': ly}
def _reconstruct_dict(self, X, ly):
reconstructed = {}
_start = 0
for lang in sorted(ly.keys()):
lang_batchsize = len(ly[lang])
reconstructed[lang] = X[_start:_start+lang_batchsize]
_start += lang_batchsize
return reconstructed
def training_epoch_end(self, outputs):
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
# here we save epoch level metric values and compute them specifically for each language
res_macroF1 = {lang: [] for lang in self.langs}
res_microF1 = {lang: [] for lang in self.langs}
res_macroK = {lang: [] for lang in self.langs}
res_microK = {lang: [] for lang in self.langs}
for output in outputs:
lX, ly = output['pred'], output['target']
for lang in lX.keys():
X, y = lX[lang], ly[lang]
lang_macroF1 = self.lang_macroF1(X, y)
lang_microF1 = self.lang_microF1(X, y)
lang_macroK = self.lang_macroK(X, y)
lang_microK = self.lang_microK(X, y)
res_macroF1[lang].append(lang_macroF1)
res_microF1[lang].append(lang_microF1)
res_macroK[lang].append(lang_macroK)
res_microK[lang].append(lang_microK)
for lang in self.langs:
avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang]))
avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang]))
avg_macroK = torch.mean(torch.Tensor(res_macroK[lang]))
avg_microK = torch.mean(torch.Tensor(res_microK[lang]))
self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch)
self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch)
def validation_step(self, val_batch, batch_idx):
lX, ly = val_batch
@ -129,13 +176,11 @@ class RecurrentModel(pl.LightningModule):
ly = torch.cat(_ly, dim=0)
loss = self.loss(logits, ly)
predictions = torch.sigmoid(logits) > 0.5
accuracy = self.accuracy(predictions, ly)
microF1 = self.microF1(predictions, ly)
macroF1 = self.macroF1(predictions, ly)
microK = self.microK(predictions, ly)
macroK = self.macroK(predictions, ly)
self.log('val-loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('val-accuracy', accuracy, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
@ -150,12 +195,10 @@ class RecurrentModel(pl.LightningModule):
_ly.append(ly[lang])
ly = torch.cat(_ly, dim=0)
predictions = torch.sigmoid(logits) > 0.5
accuracy = self.accuracy(predictions, ly)
microF1 = self.microF1(predictions, ly)
macroF1 = self.macroF1(predictions, ly)
self.log('test-accuracy', accuracy, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=False)
self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=False)
return
def embed(self, X, lang):

View File

@ -62,7 +62,8 @@ class MultilingualIndex:
for lang in self.langs:
# Init monolingual Index
self.l_index[lang] = Index(l_devel_raw[lang], l_devel_target[lang], l_test_raw[lang], l_test_target[lang], lang)
self.l_index[lang] = Index(l_devel_raw[lang], l_devel_target[lang], l_test_raw[lang], l_test_target[lang],
lang)
# call to index() function of monolingual Index
self.l_index[lang].index(l_pretrained_vocabulary[lang], l_analyzer[lang], l_vocabulary[lang])
@ -227,8 +228,10 @@ class Index:
# index documents and keep track of test terms outside the development vocabulary that are in Muse (if available)
self.out_of_vocabulary = dict()
self.devel_index = index(self.devel_raw, self.word2index, known_words, analyzer, self.unk_index, self.out_of_vocabulary)
self.test_index = index(self.test_raw, self.word2index, known_words, analyzer, self.unk_index, self.out_of_vocabulary)
self.devel_index = index(self.devel_raw, self.word2index, known_words, analyzer, self.unk_index,
self.out_of_vocabulary)
self.test_index = index(self.test_raw, self.word2index, known_words, analyzer, self.unk_index,
self.out_of_vocabulary)
self.vocabsize = len(self.word2index) + len(self.out_of_vocabulary)
@ -248,7 +251,8 @@ class Index:
train_test_split(
devel, target, devel_raw, test_size=val_size, random_state=seed, shuffle=True)
print(f'split lang {self.lang}: train={len(self.train_index)} val={len(self.val_index)} test={len(self.test_index)}')
print(
f'split lang {self.lang}: train={len(self.train_index)} val={len(self.val_index)} test={len(self.test_index)}')
def get_word_list(self):
def extract_word_list(word2index):
@ -335,4 +339,3 @@ def is_true(tensor, device):
def is_false(tensor, device):
return torch.where(tensor == 0, torch.Tensor([1]).to(device), torch.Tensor([0]).to(device))