typos + requirements.txt
This commit is contained in:
parent
90e974f0a3
commit
5958df3e3c
|
|
@ -46,7 +46,6 @@ def main(args):
|
||||||
|
|
||||||
if args.bert_embedder:
|
if args.bert_embedder:
|
||||||
bertEmbedder = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=args.n_jobs)
|
bertEmbedder = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=args.n_jobs)
|
||||||
bertEmbedder.transform(lX)
|
|
||||||
embedder_list.append(bertEmbedder)
|
embedder_list.append(bertEmbedder)
|
||||||
|
|
||||||
# Init DocEmbedderList (i.e., first-tier learners or view generators) and metaclassifier
|
# Init DocEmbedderList (i.e., first-tier learners or view generators) and metaclassifier
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,7 @@ class BertModel(pl.LightningModule):
|
||||||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
# Language specific metrics - I am not really sure if they should be initialized
|
# Language specific metrics to compute metrics at epoch level
|
||||||
# independently or we can use the metrics init above... # TODO: check it
|
|
||||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
|
|
@ -71,7 +70,6 @@ class BertModel(pl.LightningModule):
|
||||||
langs = set(langs)
|
langs = set(langs)
|
||||||
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||||
# here we save epoch level metric values and compute them specifically for each language
|
# here we save epoch level metric values and compute them specifically for each language
|
||||||
# TODO: make this a function (reused in pl_gru epoch_end)
|
|
||||||
res_macroF1 = {lang: [] for lang in langs}
|
res_macroF1 = {lang: [] for lang in langs}
|
||||||
res_microF1 = {lang: [] for lang in langs}
|
res_microF1 = {lang: [] for lang in langs}
|
||||||
res_macroK = {lang: [] for lang in langs}
|
res_macroK = {lang: [] for lang in langs}
|
||||||
|
|
|
||||||
|
|
@ -41,8 +41,7 @@ class RecurrentModel(pl.LightningModule):
|
||||||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
# Language specific metrics - I am not really sure if they should be initialized
|
# Language specific metrics to compute metrics at epoch level
|
||||||
# independently or we can use the metrics init above... # TODO: check it
|
|
||||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||||
|
|
@ -110,7 +109,6 @@ class RecurrentModel(pl.LightningModule):
|
||||||
def encode(self, lX, l_pad, batch_size=128):
|
def encode(self, lX, l_pad, batch_size=128):
|
||||||
"""
|
"""
|
||||||
Returns encoded data (i.e, RNN hidden state at second feed-forward layer - linear1). Dimensionality is 512.
|
Returns encoded data (i.e, RNN hidden state at second feed-forward layer - linear1). Dimensionality is 512.
|
||||||
# TODO: does not run on gpu..
|
|
||||||
:param lX:
|
:param lX:
|
||||||
:param l_pad:
|
:param l_pad:
|
||||||
:param batch_size:
|
:param batch_size:
|
||||||
|
|
@ -167,7 +165,6 @@ class RecurrentModel(pl.LightningModule):
|
||||||
def training_epoch_end(self, outputs):
|
def training_epoch_end(self, outputs):
|
||||||
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||||
# here we save epoch level metric values and compute them specifically for each language
|
# here we save epoch level metric values and compute them specifically for each language
|
||||||
# TODO: this is horrible...
|
|
||||||
res_macroF1 = {lang: [] for lang in self.langs}
|
res_macroF1 = {lang: [] for lang in self.langs}
|
||||||
res_microF1 = {lang: [] for lang in self.langs}
|
res_microF1 = {lang: [] for lang in self.langs}
|
||||||
res_macroK = {lang: [] for lang in self.langs}
|
res_macroK = {lang: [] for lang in self.langs}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
transformers==2.11.0
|
||||||
|
pandas==0.25.3
|
||||||
|
numpy==1.17.4
|
||||||
|
joblib==0.14.0
|
||||||
|
tqdm==4.50.2
|
||||||
|
pytorch_lightning==1.1.2
|
||||||
|
torch==1.3.1
|
||||||
|
nltk==3.4.5
|
||||||
|
scipy==1.3.3
|
||||||
|
rdflib==4.2.2
|
||||||
|
torchtext==0.4.0
|
||||||
|
scikit_learn==0.24.1
|
||||||
|
|
@ -102,10 +102,10 @@ class CustomK(Metric):
|
||||||
specificity, recall = 0., 0.
|
specificity, recall = 0., 0.
|
||||||
absolute_negatives = self.true_negative.sum() + self.false_positive.sum()
|
absolute_negatives = self.true_negative.sum() + self.false_positive.sum()
|
||||||
if absolute_negatives != 0:
|
if absolute_negatives != 0:
|
||||||
specificity = self.true_negative.sum()/absolute_negatives # Todo check if it is float
|
specificity = self.true_negative.sum()/absolute_negatives
|
||||||
absolute_positives = self.true_positive.sum() + self.false_negative.sum()
|
absolute_positives = self.true_positive.sum() + self.false_negative.sum()
|
||||||
if absolute_positives != 0:
|
if absolute_positives != 0:
|
||||||
recall = self.true_positive.sum()/absolute_positives # Todo check if it is float
|
recall = self.true_positive.sum()/absolute_positives
|
||||||
|
|
||||||
if absolute_positives == 0:
|
if absolute_positives == 0:
|
||||||
return 2. * specificity - 1
|
return 2. * specificity - 1
|
||||||
|
|
@ -125,10 +125,10 @@ class CustomK(Metric):
|
||||||
specificity, recall = 0., 0.
|
specificity, recall = 0., 0.
|
||||||
absolute_negatives = class_tn + class_fp
|
absolute_negatives = class_tn + class_fp
|
||||||
if absolute_negatives != 0:
|
if absolute_negatives != 0:
|
||||||
specificity = class_tn / absolute_negatives # Todo check if it is float
|
specificity = class_tn / absolute_negatives
|
||||||
absolute_positives = class_tp + class_fn
|
absolute_positives = class_tp + class_fn
|
||||||
if absolute_positives != 0:
|
if absolute_positives != 0:
|
||||||
recall = class_tp / absolute_positives # Todo check if it is float
|
recall = class_tp / absolute_positives
|
||||||
|
|
||||||
if absolute_positives == 0:
|
if absolute_positives == 0:
|
||||||
class_specific.append(2. * specificity - 1)
|
class_specific.append(2. * specificity - 1)
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
"""
|
"""
|
||||||
This module contains the view generators that take care of computing the view specific document embeddings:
|
This module contains the view generators that take care of computing the view specific document embeddings:
|
||||||
|
|
||||||
- VanillaFunGen (-X) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
|
- VanillaFunGen (-x) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
|
||||||
|
|
||||||
- WordClassGen (-W): generates document representation via Word-Class-Embeddings.
|
- WordClassGen (-w): generates document representation via Word-Class-Embeddings.
|
||||||
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||||
|
|
||||||
- MuseGen (-M):
|
- MuseGen (-m): generates document representation via MUSE embeddings.
|
||||||
|
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||||
|
|
||||||
- RecurrentGen (-G): generates document embedding by means of a Gated Recurrent Units. The model can be
|
- RecurrentGen (-g): generates document embedding by means of a Gated Recurrent Units. The model can be
|
||||||
initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
|
initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
|
||||||
Output dimension is (n_docs, 512).
|
Output dimension is (n_docs, 512).
|
||||||
|
|
||||||
- View generator (-B): generates document embedding via mBERT model.
|
- View generator (-b): generates document embedding via mBERT model.
|
||||||
"""
|
"""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from models.learners import *
|
from models.learners import *
|
||||||
|
|
@ -153,9 +154,6 @@ class WordClassGen(ViewGen):
|
||||||
|
|
||||||
|
|
||||||
class RecurrentGen(ViewGen):
|
class RecurrentGen(ViewGen):
|
||||||
# TODO: save model https://forums.pytorchlightning.ai/t/how-to-save-hparams-when-not-provided-as-argument-apparently-assigning-to-hparams-is-not-recomended/339/5
|
|
||||||
# Problem: we are passing lPretrained to init the RecurrentModel -> incredible slow at saving (checkpoint).
|
|
||||||
# if we do not save it is impossible to init RecurrentModel by calling RecurrentModel.load_from_checkpoint()
|
|
||||||
def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
|
def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
|
||||||
gpus=0, n_jobs=-1, stored_path=None):
|
gpus=0, n_jobs=-1, stored_path=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue