typos + requirements.txt
This commit is contained in:
parent
90e974f0a3
commit
5958df3e3c
|
@ -46,7 +46,6 @@ def main(args):
|
|||
|
||||
if args.bert_embedder:
|
||||
bertEmbedder = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=args.n_jobs)
|
||||
bertEmbedder.transform(lX)
|
||||
embedder_list.append(bertEmbedder)
|
||||
|
||||
# Init DocEmbedderList (i.e., first-tier learners or view generators) and metaclassifier
|
||||
|
|
|
@ -22,8 +22,7 @@ class BertModel(pl.LightningModule):
|
|||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||
# Language specific metrics - I am not really sure if they should be initialized
|
||||
# independently or we can use the metrics init above... # TODO: check it
|
||||
# Language specific metrics to compute metrics at epoch level
|
||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
|
@ -71,7 +70,6 @@ class BertModel(pl.LightningModule):
|
|||
langs = set(langs)
|
||||
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||
# here we save epoch level metric values and compute them specifically for each language
|
||||
# TODO: make this a function (reused in pl_gru epoch_end)
|
||||
res_macroF1 = {lang: [] for lang in langs}
|
||||
res_microF1 = {lang: [] for lang in langs}
|
||||
res_macroK = {lang: [] for lang in langs}
|
||||
|
|
|
@ -41,8 +41,7 @@ class RecurrentModel(pl.LightningModule):
|
|||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||
# Language specific metrics - I am not really sure if they should be initialized
|
||||
# independently or we can use the metrics init above... # TODO: check it
|
||||
# Language specific metrics to compute metrics at epoch level
|
||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
|
@ -110,7 +109,6 @@ class RecurrentModel(pl.LightningModule):
|
|||
def encode(self, lX, l_pad, batch_size=128):
|
||||
"""
|
||||
Returns encoded data (i.e, RNN hidden state at second feed-forward layer - linear1). Dimensionality is 512.
|
||||
# TODO: does not run on gpu..
|
||||
:param lX:
|
||||
:param l_pad:
|
||||
:param batch_size:
|
||||
|
@ -167,7 +165,6 @@ class RecurrentModel(pl.LightningModule):
|
|||
def training_epoch_end(self, outputs):
|
||||
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||
# here we save epoch level metric values and compute them specifically for each language
|
||||
# TODO: this is horrible...
|
||||
res_macroF1 = {lang: [] for lang in self.langs}
|
||||
res_microF1 = {lang: [] for lang in self.langs}
|
||||
res_macroK = {lang: [] for lang in self.langs}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
transformers==2.11.0
|
||||
pandas==0.25.3
|
||||
numpy==1.17.4
|
||||
joblib==0.14.0
|
||||
tqdm==4.50.2
|
||||
pytorch_lightning==1.1.2
|
||||
torch==1.3.1
|
||||
nltk==3.4.5
|
||||
scipy==1.3.3
|
||||
rdflib==4.2.2
|
||||
torchtext==0.4.0
|
||||
scikit_learn==0.24.1
|
|
@ -102,10 +102,10 @@ class CustomK(Metric):
|
|||
specificity, recall = 0., 0.
|
||||
absolute_negatives = self.true_negative.sum() + self.false_positive.sum()
|
||||
if absolute_negatives != 0:
|
||||
specificity = self.true_negative.sum()/absolute_negatives # Todo check if it is float
|
||||
specificity = self.true_negative.sum()/absolute_negatives
|
||||
absolute_positives = self.true_positive.sum() + self.false_negative.sum()
|
||||
if absolute_positives != 0:
|
||||
recall = self.true_positive.sum()/absolute_positives # Todo check if it is float
|
||||
recall = self.true_positive.sum()/absolute_positives
|
||||
|
||||
if absolute_positives == 0:
|
||||
return 2. * specificity - 1
|
||||
|
@ -125,10 +125,10 @@ class CustomK(Metric):
|
|||
specificity, recall = 0., 0.
|
||||
absolute_negatives = class_tn + class_fp
|
||||
if absolute_negatives != 0:
|
||||
specificity = class_tn / absolute_negatives # Todo check if it is float
|
||||
specificity = class_tn / absolute_negatives
|
||||
absolute_positives = class_tp + class_fn
|
||||
if absolute_positives != 0:
|
||||
recall = class_tp / absolute_positives # Todo check if it is float
|
||||
recall = class_tp / absolute_positives
|
||||
|
||||
if absolute_positives == 0:
|
||||
class_specific.append(2. * specificity - 1)
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
"""
|
||||
This module contains the view generators that take care of computing the view specific document embeddings:
|
||||
|
||||
- VanillaFunGen (-X) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
|
||||
- VanillaFunGen (-x) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
|
||||
|
||||
- WordClassGen (-W): generates document representation via Word-Class-Embeddings.
|
||||
- WordClassGen (-w): generates document representation via Word-Class-Embeddings.
|
||||
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||
|
||||
- MuseGen (-M):
|
||||
- MuseGen (-m): generates document representation via MUSE embeddings.
|
||||
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||
|
||||
- RecurrentGen (-G): generates document embedding by means of a Gated Recurrent Units. The model can be
|
||||
- RecurrentGen (-g): generates document embedding by means of a Gated Recurrent Units. The model can be
|
||||
initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
|
||||
Output dimension is (n_docs, 512).
|
||||
|
||||
- View generator (-B): generates document embedding via mBERT model.
|
||||
- View generator (-b): generates document embedding via mBERT model.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from models.learners import *
|
||||
|
@ -153,9 +154,6 @@ class WordClassGen(ViewGen):
|
|||
|
||||
|
||||
class RecurrentGen(ViewGen):
|
||||
# TODO: save model https://forums.pytorchlightning.ai/t/how-to-save-hparams-when-not-provided-as-argument-apparently-assigning-to-hparams-is-not-recomended/339/5
|
||||
# Problem: we are passing lPretrained to init the RecurrentModel -> incredible slow at saving (checkpoint).
|
||||
# if we do not save it is impossible to init RecurrentModel by calling RecurrentModel.load_from_checkpoint()
|
||||
def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
|
||||
gpus=0, n_jobs=-1, stored_path=None):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue