Implemented inference functions for bert (cpu and gpu)
This commit is contained in:
parent
01bd85d156
commit
6e0b66e13e
|
|
@ -105,16 +105,16 @@ class RecurrentDataModule(pl.LightningDataModule):
|
||||||
if stage == 'fit' or stage is None:
|
if stage == 'fit' or stage is None:
|
||||||
l_train_index, l_train_target = self.multilingualIndex.l_train()
|
l_train_index, l_train_target = self.multilingualIndex.l_train()
|
||||||
# Debug settings: reducing number of samples
|
# Debug settings: reducing number of samples
|
||||||
# l_train_index = {l: train[:50] for l, train in l_train_index.items()}
|
l_train_index = {l: train[:50] for l, train in l_train_index.items()}
|
||||||
# l_train_target = {l: target[:50] for l, target in l_train_target.items()}
|
l_train_target = {l: target[:50] for l, target in l_train_target.items()}
|
||||||
|
|
||||||
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
|
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
|
||||||
lPad_index=self.multilingualIndex.l_pad())
|
lPad_index=self.multilingualIndex.l_pad())
|
||||||
|
|
||||||
l_val_index, l_val_target = self.multilingualIndex.l_val()
|
l_val_index, l_val_target = self.multilingualIndex.l_val()
|
||||||
# Debug settings: reducing number of samples
|
# Debug settings: reducing number of samples
|
||||||
# l_val_index = {l: train[:50] for l, train in l_val_index.items()}
|
l_val_index = {l: train[:50] for l, train in l_val_index.items()}
|
||||||
# l_val_target = {l: target[:50] for l, target in l_val_target.items()}
|
l_val_target = {l: target[:50] for l, target in l_val_target.items()}
|
||||||
|
|
||||||
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
|
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
|
||||||
lPad_index=self.multilingualIndex.l_pad())
|
lPad_index=self.multilingualIndex.l_pad())
|
||||||
|
|
@ -163,7 +163,7 @@ class BertDataModule(RecurrentDataModule):
|
||||||
|
|
||||||
if stage == 'test' or stage is None:
|
if stage == 'test' or stage is None:
|
||||||
l_test_raw, l_test_target = self.multilingualIndex.l_test_raw()
|
l_test_raw, l_test_target = self.multilingualIndex.l_test_raw()
|
||||||
l_test_index = self.tokenize(l_val_raw, max_len=self.max_len)
|
l_test_index = self.tokenize(l_test_raw, max_len=self.max_len)
|
||||||
self.test_dataset = RecurrentDataset(l_test_index, l_test_target,
|
self.test_dataset = RecurrentDataset(l_test_index, l_test_target,
|
||||||
lPad_index=self.multilingualIndex.l_pad())
|
lPad_index=self.multilingualIndex.l_pad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,15 +28,16 @@ def main(args):
|
||||||
# gFun = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=N_JOBS)
|
# gFun = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=N_JOBS)
|
||||||
# gFun = MuseGen(muse_dir='/home/andreapdr/funneling_pdr/embeddings', n_jobs=N_JOBS)
|
# gFun = MuseGen(muse_dir='/home/andreapdr/funneling_pdr/embeddings', n_jobs=N_JOBS)
|
||||||
# gFun = WordClassGen(n_jobs=N_JOBS)
|
# gFun = WordClassGen(n_jobs=N_JOBS)
|
||||||
gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=False, batch_size=256,
|
# gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=False, batch_size=256,
|
||||||
nepochs=50, gpus=args.gpus, n_jobs=N_JOBS)
|
# nepochs=50, gpus=args.gpus, n_jobs=N_JOBS)
|
||||||
# gFun = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=N_JOBS)
|
gFun = BertGen(multilingualIndex, batch_size=4, nepochs=1, gpus=args.gpus, n_jobs=N_JOBS)
|
||||||
|
|
||||||
time_init = time()
|
time_init = time()
|
||||||
# gFun.fit(lX, ly)
|
gFun.fit(lX, ly)
|
||||||
|
|
||||||
|
# print('Projecting...')
|
||||||
|
# y_ = gFun.transform(lX)
|
||||||
|
|
||||||
print('Projecting...')
|
|
||||||
y_ = gFun.transform(lX)
|
|
||||||
train_time = round(time() - time_init, 3)
|
train_time = round(time() - time_init, 3)
|
||||||
exit(f'Executed! Training time: {train_time}!')
|
exit(f'Executed! Training time: {train_time}!')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,23 +2,31 @@ import torch
|
||||||
import pytorch_lightning as pl
|
import pytorch_lightning as pl
|
||||||
from torch.optim.lr_scheduler import StepLR
|
from torch.optim.lr_scheduler import StepLR
|
||||||
from transformers import BertForSequenceClassification, AdamW
|
from transformers import BertForSequenceClassification, AdamW
|
||||||
from pytorch_lightning.metrics import Accuracy
|
from util.pl_metrics import CustomF1, CustomK
|
||||||
from util.pl_metrics import CustomF1
|
|
||||||
|
|
||||||
|
|
||||||
class BertModel(pl.LightningModule):
|
class BertModel(pl.LightningModule):
|
||||||
|
|
||||||
def __init__(self, output_size, stored_path, gpus=None):
|
def __init__(self, output_size, stored_path, gpus=None):
|
||||||
|
"""
|
||||||
|
Init Bert model.
|
||||||
|
:param output_size:
|
||||||
|
:param stored_path:
|
||||||
|
:param gpus:
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.loss = torch.nn.BCEWithLogitsLoss()
|
self.loss = torch.nn.BCEWithLogitsLoss()
|
||||||
self.gpus = gpus
|
self.gpus = gpus
|
||||||
self.accuracy = Accuracy()
|
self.microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.microF1_tr = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.macroF1_tr = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.microF1_va = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.macroF1_va = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
# Language specific metrics - I am not really sure if they should be initialized
|
||||||
self.microF1_te = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
# independently or we can use the metrics init above... # TODO: check it
|
||||||
self.macroF1_te = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
|
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
|
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
|
self.lang_microK = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
|
|
||||||
if stored_path:
|
if stored_path:
|
||||||
self.bert = BertForSequenceClassification.from_pretrained(stored_path,
|
self.bert = BertForSequenceClassification.from_pretrained(stored_path,
|
||||||
|
|
@ -37,51 +45,111 @@ class BertModel(pl.LightningModule):
|
||||||
def training_step(self, train_batch, batch_idx):
|
def training_step(self, train_batch, batch_idx):
|
||||||
X, y, _, batch_langs = train_batch
|
X, y, _, batch_langs = train_batch
|
||||||
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||||
y = y.type(torch.cuda.FloatTensor)
|
# y = y.type(torch.cuda.FloatTensor)
|
||||||
|
y = y.type(torch.FloatTensor)
|
||||||
|
y.to('cuda' if self.gpus else 'cpu')
|
||||||
logits, _ = self.forward(X)
|
logits, _ = self.forward(X)
|
||||||
loss = self.loss(logits, y)
|
loss = self.loss(logits, y)
|
||||||
# Squashing logits through Sigmoid in order to get confidence score
|
# Squashing logits through Sigmoid in order to get confidence score
|
||||||
predictions = torch.sigmoid(logits) > 0.5
|
predictions = torch.sigmoid(logits) > 0.5
|
||||||
accuracy = self.accuracy(predictions, y)
|
microF1 = self.microF1(predictions, y)
|
||||||
microF1 = self.microF1_tr(predictions, y)
|
macroF1 = self.macroF1(predictions, y)
|
||||||
macroF1 = self.macroF1_tr(predictions, y)
|
microK = self.microK(predictions, y)
|
||||||
self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
macroK = self.macroK(predictions, y)
|
||||||
self.log('train-accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||||
self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||||
self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||||
return {'loss': loss}
|
self.log('train-macroK', macroK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||||
|
self.log('train-microK', microK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||||
|
lX, ly = self._reconstruct_dict(predictions, y, batch_langs)
|
||||||
|
return {'loss': loss, 'pred': lX, 'target': ly}
|
||||||
|
|
||||||
|
def _reconstruct_dict(self, predictions, y, batch_langs):
|
||||||
|
reconstructed_x = {lang: [] for lang in set(batch_langs)}
|
||||||
|
reconstructed_y = {lang: [] for lang in set(batch_langs)}
|
||||||
|
for i, pred in enumerate(predictions):
|
||||||
|
reconstructed_x[batch_langs[i]].append(pred)
|
||||||
|
reconstructed_y[batch_langs[i]].append(y[i])
|
||||||
|
for k, v in reconstructed_x.items():
|
||||||
|
reconstructed_x[k] = torch.cat(v).view(-1, predictions.shape[1])
|
||||||
|
for k, v in reconstructed_y.items():
|
||||||
|
reconstructed_y[k] = torch.cat(v).view(-1, predictions.shape[1])
|
||||||
|
return reconstructed_x, reconstructed_y
|
||||||
|
|
||||||
|
def training_epoch_end(self, outputs):
|
||||||
|
langs = []
|
||||||
|
for output in outputs:
|
||||||
|
langs.extend(list(output['pred'].keys()))
|
||||||
|
langs = set(langs)
|
||||||
|
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||||
|
# here we save epoch level metric values and compute them specifically for each language
|
||||||
|
# TODO: this is horrible...
|
||||||
|
res_macroF1 = {lang: [] for lang in langs}
|
||||||
|
res_microF1 = {lang: [] for lang in langs}
|
||||||
|
res_macroK = {lang: [] for lang in langs}
|
||||||
|
res_microK = {lang: [] for lang in langs}
|
||||||
|
for output in outputs:
|
||||||
|
lX, ly = output['pred'], output['target']
|
||||||
|
for lang in lX.keys():
|
||||||
|
X, y = lX[lang], ly[lang]
|
||||||
|
lang_macroF1 = self.lang_macroF1(X, y)
|
||||||
|
lang_microF1 = self.lang_microF1(X, y)
|
||||||
|
lang_macroK = self.lang_macroK(X, y)
|
||||||
|
lang_microK = self.lang_microK(X, y)
|
||||||
|
|
||||||
|
res_macroF1[lang].append(lang_macroF1)
|
||||||
|
res_microF1[lang].append(lang_microF1)
|
||||||
|
res_macroK[lang].append(lang_macroK)
|
||||||
|
res_microK[lang].append(lang_microK)
|
||||||
|
for lang in langs:
|
||||||
|
avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang]))
|
||||||
|
avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang]))
|
||||||
|
avg_macroK = torch.mean(torch.Tensor(res_macroK[lang]))
|
||||||
|
avg_microK = torch.mean(torch.Tensor(res_microK[lang]))
|
||||||
|
self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch)
|
||||||
|
self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch)
|
||||||
|
self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch)
|
||||||
|
self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch)
|
||||||
|
|
||||||
def validation_step(self, val_batch, batch_idx):
|
def validation_step(self, val_batch, batch_idx):
|
||||||
X, y, _, batch_langs = val_batch
|
X, y, _, batch_langs = val_batch
|
||||||
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||||
y = y.type(torch.cuda.FloatTensor)
|
# y = y.type(torch.cuda.FloatTensor)
|
||||||
|
y = y.type(torch.FloatTensor)
|
||||||
|
y.to('cuda' if self.gpus else 'cpu')
|
||||||
logits, _ = self.forward(X)
|
logits, _ = self.forward(X)
|
||||||
loss = self.loss(logits, y)
|
loss = self.loss(logits, y)
|
||||||
predictions = torch.sigmoid(logits) > 0.5
|
predictions = torch.sigmoid(logits) > 0.5
|
||||||
accuracy = self.accuracy(predictions, y)
|
microF1 = self.microF1(predictions, y)
|
||||||
microF1 = self.microF1_va(predictions, y)
|
macroF1 = self.macroF1(predictions, y)
|
||||||
macroF1 = self.macroF1_va(predictions, y)
|
microK = self.microK(predictions, y)
|
||||||
self.log('val-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
macroK = self.macroK(predictions, y)
|
||||||
self.log('val-accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
self.log('val-loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||||
self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
|
self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
|
self.log('val-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
return {'loss': loss}
|
return {'loss': loss}
|
||||||
|
|
||||||
# def test_step(self, test_batch, batch_idx):
|
def test_step(self, test_batch, batch_idx):
|
||||||
# lX, ly = test_batch
|
X, y, _, batch_langs = test_batch
|
||||||
# logits = self.forward(lX)
|
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||||
# _ly = []
|
# y = y.type(torch.cuda.FloatTensor)
|
||||||
# for lang in sorted(lX.keys()):
|
y = y.type(torch.FloatTensor)
|
||||||
# _ly.append(ly[lang])
|
y.to('cuda' if self.gpus else 'cpu')
|
||||||
# ly = torch.cat(_ly, dim=0)
|
logits, _ = self.forward(X)
|
||||||
# predictions = torch.sigmoid(logits) > 0.5
|
loss = self.loss(logits, y)
|
||||||
# accuracy = self.accuracy(predictions, ly)
|
# Squashing logits through Sigmoid in order to get confidence score
|
||||||
# microF1 = self.microF1_te(predictions, ly)
|
predictions = torch.sigmoid(logits) > 0.5
|
||||||
# macroF1 = self.macroF1_te(predictions, ly)
|
microF1 = self.microF1(predictions, y)
|
||||||
# self.log('test-accuracy', accuracy, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
macroF1 = self.macroF1(predictions, y)
|
||||||
# self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
microK = self.microK(predictions, y)
|
||||||
# self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
macroK = self.macroK(predictions, y)
|
||||||
# return
|
self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||||
|
self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||||
|
self.log('test-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
|
self.log('test-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||||
|
return
|
||||||
|
|
||||||
def configure_optimizers(self, lr=3e-5, weight_decay=0.01):
|
def configure_optimizers(self, lr=3e-5, weight_decay=0.01):
|
||||||
no_decay = ['bias', 'LayerNorm.weight']
|
no_decay = ['bias', 'LayerNorm.weight']
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class RecurrentModel(pl.LightningModule):
|
||||||
def __init__(self, lPretrained, langs, output_size, hidden_size, lVocab_size, learnable_length,
|
def __init__(self, lPretrained, langs, output_size, hidden_size, lVocab_size, learnable_length,
|
||||||
drop_embedding_range, drop_embedding_prop, gpus=None):
|
drop_embedding_range, drop_embedding_prop, gpus=None):
|
||||||
"""
|
"""
|
||||||
|
Init RNN model.
|
||||||
:param lPretrained:
|
:param lPretrained:
|
||||||
:param langs:
|
:param langs:
|
||||||
:param output_size:
|
:param output_size:
|
||||||
|
|
|
||||||
|
|
@ -161,6 +161,9 @@ class MultilingualIndex:
|
||||||
def l_val_raw_index(self):
|
def l_val_raw_index(self):
|
||||||
return {l: index.val_raw for l, index in self.l_index.items()}
|
return {l: index.val_raw for l, index in self.l_index.items()}
|
||||||
|
|
||||||
|
def l_test_raw_index(self):
|
||||||
|
return {l: index.test_raw for l, index in self.l_index.items()}
|
||||||
|
|
||||||
def l_val_target(self):
|
def l_val_target(self):
|
||||||
return {l: index.val_target for l, index in self.l_index.items()}
|
return {l: index.val_target for l, index in self.l_index.items()}
|
||||||
|
|
||||||
|
|
@ -170,10 +173,6 @@ class MultilingualIndex:
|
||||||
def l_test_index(self):
|
def l_test_index(self):
|
||||||
return {l: index.test_index for l, index in self.l_index.items()}
|
return {l: index.test_index for l, index in self.l_index.items()}
|
||||||
|
|
||||||
def l_test_raw(self):
|
|
||||||
print('TODO: implement MultilingualIndex method to return RAW test data!')
|
|
||||||
return {l: index.test_raw for l, index in self.l_index.items()}
|
|
||||||
|
|
||||||
def l_devel_index(self):
|
def l_devel_index(self):
|
||||||
return {l: index.devel_index for l, index in self.l_index.items()}
|
return {l: index.devel_index for l, index in self.l_index.items()}
|
||||||
|
|
||||||
|
|
@ -195,6 +194,9 @@ class MultilingualIndex:
|
||||||
def l_val_raw(self):
|
def l_val_raw(self):
|
||||||
return self.l_val_raw_index(), self.l_val_target()
|
return self.l_val_raw_index(), self.l_val_target()
|
||||||
|
|
||||||
|
def l_test_raw(self):
|
||||||
|
return self.l_test_raw_index(), self.l_test_target()
|
||||||
|
|
||||||
def get_l_pad_index(self):
|
def get_l_pad_index(self):
|
||||||
return {l: index.get_pad_index() for l, index in self.l_index.items()}
|
return {l: index.get_pad_index() for l, index in self.l_index.items()}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -228,7 +228,6 @@ class RecurrentGen(ViewGen):
|
||||||
"""
|
"""
|
||||||
l_pad = self.multilingualIndex.l_pad()
|
l_pad = self.multilingualIndex.l_pad()
|
||||||
data = self.multilingualIndex.l_devel_index()
|
data = self.multilingualIndex.l_devel_index()
|
||||||
# trainer = Trainer(gpus=self.gpus)
|
|
||||||
self.model.to('cuda' if self.gpus else 'cpu')
|
self.model.to('cuda' if self.gpus else 'cpu')
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
time_init = time()
|
time_init = time()
|
||||||
|
|
@ -238,7 +237,7 @@ class RecurrentGen(ViewGen):
|
||||||
return l_embeds
|
return l_embeds
|
||||||
|
|
||||||
def fit_transform(self, lX, ly):
|
def fit_transform(self, lX, ly):
|
||||||
pass
|
return self.fit(lX, ly).transform(lX)
|
||||||
|
|
||||||
|
|
||||||
class BertGen(ViewGen):
|
class BertGen(ViewGen):
|
||||||
|
|
@ -268,7 +267,12 @@ class BertGen(ViewGen):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def transform(self, lX):
|
def transform(self, lX):
|
||||||
# lX is raw text data. It has to be first indexed via multilingualIndex Vectorizer.
|
# lX is raw text data. It has to be first indexed via Bert Tokenizer.
|
||||||
|
data = 'TOKENIZE THIS'
|
||||||
|
self.model.to('cuda' if self.gpus else 'cpu')
|
||||||
|
self.model.eval()
|
||||||
|
time_init = time()
|
||||||
|
l_emebds = self.model.encode(data)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def fit_transform(self, lX, ly):
|
def fit_transform(self, lX, ly):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue