Compare commits
23 Commits
Author | SHA1 | Date |
---|---|---|
|
0fdb39532c | |
|
4cbef64e28 | |
|
d5417691d5 | |
|
7c8de936db | |
|
421d7660f6 | |
|
612e90a584 | |
|
7b6938459f | |
|
f579a1a7f2 | |
|
b2be446446 | |
|
80d0693cb1 | |
|
ec6886dbab | |
|
a6be7857a3 | |
|
495a0b6af9 | |
|
f3fafd0f00 | |
|
8968570d82 | |
|
7affa1fab4 | |
|
c65c91fc27 | |
|
ab3bacb29c | |
|
6361a4eba0 | |
|
ee98c5f610 | |
|
5821325c86 | |
|
7f493da0f8 | |
|
10bed81916 |
84
main.py
84
main.py
|
@ -7,19 +7,31 @@ from src.util.evaluation import evaluate
|
|||
from src.util.results_csv import CSVlog
|
||||
from src.view_generators import *
|
||||
|
||||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
|
||||
|
||||
def main(args):
|
||||
assert args.post_embedder or args.muse_embedder or args.wce_embedder or args.gru_embedder or args.bert_embedder, \
|
||||
'empty set of document embeddings is not allowed!'
|
||||
assert not (args.zero_shot and (args.zscl_langs is None)), \
|
||||
'--zscl_langs cannot be empty when setting --zero_shot to True'
|
||||
|
||||
print('Running generalized funnelling...')
|
||||
|
||||
data = MultilingualDataset.load(args.dataset)
|
||||
# data.set_view(languages=['it', 'da'])
|
||||
# data.set_view(languages=['da', 'nl', 'it'])
|
||||
data.show_dimensions()
|
||||
lX, ly = data.training()
|
||||
lXte, lyte = data.test()
|
||||
|
||||
# TODO: debug settings
|
||||
# print(f'\n[Running on DEBUG mode - samples per language are reduced to 5 max!]\n')
|
||||
# lX = {k: v[:5] for k, v in lX.items()}
|
||||
# ly = {k: v[:5] for k, v in ly.items()}
|
||||
# lXte = {k: v[:5] for k, v in lXte.items()}
|
||||
# lyte = {k: v[:5] for k, v in lyte.items()}
|
||||
|
||||
# Init multilingualIndex - mandatory when deploying Neural View Generators...
|
||||
if args.gru_embedder or args.bert_embedder:
|
||||
multilingualIndex = MultilingualIndex()
|
||||
|
@ -29,36 +41,65 @@ def main(args):
|
|||
# Init ViewGenerators and append them to embedder_list
|
||||
embedder_list = []
|
||||
if args.post_embedder:
|
||||
posteriorEmbedder = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=args.n_jobs)
|
||||
posteriorEmbedder = VanillaFunGen(base_learner=get_learner(calibrate=True),
|
||||
zero_shot=args.zero_shot,
|
||||
train_langs=args.zscl_langs,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
embedder_list.append(posteriorEmbedder)
|
||||
|
||||
if args.muse_embedder:
|
||||
museEmbedder = MuseGen(muse_dir=args.muse_dir, n_jobs=args.n_jobs)
|
||||
museEmbedder = MuseGen(muse_dir=args.muse_dir,
|
||||
zero_shot=args.zero_shot,
|
||||
train_langs=args.zscl_langs,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
embedder_list.append(museEmbedder)
|
||||
|
||||
if args.wce_embedder:
|
||||
wceEmbedder = WordClassGen(n_jobs=args.n_jobs)
|
||||
wceEmbedder = WordClassGen(zero_shot=args.zero_shot,
|
||||
train_langs=args.zscl_langs,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
embedder_list.append(wceEmbedder)
|
||||
|
||||
if args.gru_embedder:
|
||||
rnnEmbedder = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=args.rnn_wce,
|
||||
batch_size=args.batch_rnn, nepochs=args.nepochs_rnn, patience=args.patience_rnn,
|
||||
gpus=args.gpus, n_jobs=args.n_jobs)
|
||||
rnnEmbedder = RecurrentGen(multilingualIndex,
|
||||
pretrained_embeddings=lMuse,
|
||||
wce=args.rnn_wce,
|
||||
batch_size=args.batch_rnn,
|
||||
nepochs=args.nepochs_rnn,
|
||||
patience=args.patience_rnn,
|
||||
zero_shot=args.zero_shot,
|
||||
train_langs=args.zscl_langs,
|
||||
gpus=args.gpus,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
embedder_list.append(rnnEmbedder)
|
||||
|
||||
if args.bert_embedder:
|
||||
bertEmbedder = BertGen(multilingualIndex, batch_size=args.batch_bert, nepochs=args.nepochs_bert,
|
||||
patience=args.patience_bert, gpus=args.gpus, n_jobs=args.n_jobs)
|
||||
bertEmbedder.transform(lX)
|
||||
bertEmbedder = BertGen(multilingualIndex,
|
||||
batch_size=args.batch_bert,
|
||||
nepochs=args.nepochs_bert,
|
||||
patience=args.patience_bert,
|
||||
zero_shot=args.zero_shot,
|
||||
train_langs=args.zscl_langs,
|
||||
gpus=args.gpus,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
embedder_list.append(bertEmbedder)
|
||||
|
||||
# Init DocEmbedderList (i.e., first-tier learners or view generators) and metaclassifier
|
||||
docEmbedders = DocEmbedderList(embedder_list=embedder_list, probabilistic=True)
|
||||
|
||||
meta = MetaClassifier(meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
||||
meta_parameters=get_params(optimc=args.optimc))
|
||||
meta_parameters=get_params(optimc=args.optimc),
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
# Init Funnelling Architecture
|
||||
gfun = Funnelling(first_tier=docEmbedders, meta_classifier=meta)
|
||||
gfun = Funnelling(first_tier=docEmbedders,
|
||||
meta_classifier=meta,
|
||||
n_jobs=args.n_jobs)
|
||||
|
||||
# Training ---------------------------------------
|
||||
print('\n[Training Generalized Funnelling]')
|
||||
|
@ -70,14 +111,16 @@ def main(args):
|
|||
# Testing ----------------------------------------
|
||||
print('\n[Testing Generalized Funnelling]')
|
||||
time_te = time.time()
|
||||
if args.zero_shot:
|
||||
gfun.set_zero_shot(val=False)
|
||||
ly_ = gfun.predict(lXte)
|
||||
l_eval = evaluate(ly_true=lyte, ly_pred=ly_)
|
||||
l_eval = evaluate(ly_true=lyte, ly_pred=ly_, n_jobs=args.n_jobs)
|
||||
time_te = round(time.time() - time_te, 3)
|
||||
print(f'Testing completed in {time_te} seconds!')
|
||||
|
||||
# Logging ---------------------------------------
|
||||
print('\n[Results]')
|
||||
results = CSVlog(args.csv_dir)
|
||||
results = CSVlog(f'csv_logs/gfun/{args.csv_dir}')
|
||||
metrics = []
|
||||
for lang in lXte.keys():
|
||||
macrof1, microf1, macrok, microk = l_eval[lang]
|
||||
|
@ -99,7 +142,7 @@ def main(args):
|
|||
microf1=microf1,
|
||||
macrok=macrok,
|
||||
microk=microk,
|
||||
notes='')
|
||||
notes=f'Train langs: {sorted(args.zscl_langs)}' if args.zero_shot else '')
|
||||
print('Averages: MF1, mF1, MK, mK', np.round(np.mean(np.array(metrics), axis=0), 3))
|
||||
|
||||
overall_time = round(time.time() - time_init, 3)
|
||||
|
@ -112,8 +155,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('dataset', help='Path to the dataset')
|
||||
|
||||
parser.add_argument('-o', '--output', dest='csv_dir', metavar='',
|
||||
help='Result file (default ../csv_logs/gfun/gfun_results.csv)', type=str,
|
||||
default='../csv_logs/gfun/gfun_results.csv')
|
||||
help='Result file saved in csv_logs/gfun/dir, default is gfun_results.csv)', type=str,
|
||||
default='gfun_results.csv')
|
||||
|
||||
parser.add_argument('-x', '--post_embedder', dest='post_embedder', action='store_true',
|
||||
help='deploy posterior probabilities embedder to compute document embeddings',
|
||||
|
@ -186,5 +229,12 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--gpus', metavar='', help='specifies how many GPUs to use per node',
|
||||
default=None)
|
||||
|
||||
parser.add_argument('--zero_shot', dest='zero_shot', action='store_true',
|
||||
help='run zero-shot experiments',
|
||||
default=False)
|
||||
|
||||
parser.add_argument('--zscl_langs', dest='zscl_langs', metavar='', nargs='*',
|
||||
help='set the languages to be used in training in zero shot experiments')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
|
25
run.sh
25
run.sh
|
@ -1,8 +1,23 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -g --gpus 0
|
||||
echo Running Zero-shot experiments [output at csv_logs/gfun/zero_shot_gfun.csv]
|
||||
|
||||
#for i in {0..10..1}
|
||||
#do
|
||||
# python main.py --gpus 0
|
||||
#done
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it
|
||||
#python main.py /home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv
|
||||
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it
|
||||
#python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl
|
||||
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt
|
||||
python main.py ../datasets/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle -x -m -w -b -c --nepochs_bert 25 --n_jobs 6 --gpus 0 --muse_dir ../embeddings/MUSE/ -o zero_shot_gfun.csv --zero_shot --zscl_langs da de en es fr it nl pt sv
|
|
@ -92,7 +92,7 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
Pytorch Lightning Datamodule to be deployed with RecurrentGen.
|
||||
https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
|
||||
"""
|
||||
def __init__(self, multilingualIndex, batchsize=64, n_jobs=-1):
|
||||
def __init__(self, multilingualIndex, batchsize=64, n_jobs=-1, zero_shot=False, zscl_langs=None):
|
||||
"""
|
||||
Init RecurrentDataModule.
|
||||
:param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
|
||||
|
@ -103,6 +103,11 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
self.multilingualIndex = multilingualIndex
|
||||
self.batchsize = batchsize
|
||||
self.n_jobs = n_jobs
|
||||
# Zero shot arguments
|
||||
if zscl_langs is None:
|
||||
zscl_langs = []
|
||||
self.zero_shot = zero_shot
|
||||
self.train_langs = zscl_langs
|
||||
super().__init__()
|
||||
|
||||
def prepare_data(self, *args, **kwargs):
|
||||
|
@ -110,6 +115,9 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
|
||||
def setup(self, stage=None):
|
||||
if stage == 'fit' or stage is None:
|
||||
if self.zero_shot:
|
||||
l_train_index, l_train_target = self.multilingualIndex.l_train_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_train_index, l_train_target = self.multilingualIndex.l_train()
|
||||
# Debug settings: reducing number of samples
|
||||
# l_train_index = {l: train[:5] for l, train in l_train_index.items()}
|
||||
|
@ -118,6 +126,9 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
|
||||
lPad_index=self.multilingualIndex.l_pad())
|
||||
|
||||
if self.zero_shot:
|
||||
l_val_index, l_val_target = self.multilingualIndex.l_val_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_val_index, l_val_target = self.multilingualIndex.l_val()
|
||||
# Debug settings: reducing number of samples
|
||||
# l_val_index = {l: train[:5] for l, train in l_val_index.items()}
|
||||
|
@ -126,6 +137,9 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
|
||||
lPad_index=self.multilingualIndex.l_pad())
|
||||
if stage == 'test' or stage is None:
|
||||
if self.zero_shot:
|
||||
l_test_index, l_test_target = self.multilingualIndex.l_test_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_test_index, l_test_target = self.multilingualIndex.l_test()
|
||||
# Debug settings: reducing number of samples
|
||||
# l_test_index = {l: train[:5] for l, train in l_test_index.items()}
|
||||
|
@ -136,7 +150,7 @@ class RecurrentDataModule(pl.LightningDataModule):
|
|||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(self.training_dataset, batch_size=self.batchsize, num_workers=N_WORKERS,
|
||||
collate_fn=self.training_dataset.collate_fn)
|
||||
collate_fn=self.training_dataset.collate_fn, shuffle=True)
|
||||
|
||||
def val_dataloader(self):
|
||||
return DataLoader(self.val_dataset, batch_size=self.batchsize, num_workers=N_WORKERS,
|
||||
|
@ -167,7 +181,8 @@ class BertDataModule(RecurrentDataModule):
|
|||
Pytorch Lightning Datamodule to be deployed with BertGen.
|
||||
https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
|
||||
"""
|
||||
def __init__(self, multilingualIndex, batchsize=64, max_len=512):
|
||||
def __init__(self, multilingualIndex, batchsize=64, max_len=512, zero_shot=False, zscl_langs=None, debug=False,
|
||||
max_samples=50):
|
||||
"""
|
||||
Init BertDataModule.
|
||||
:param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
|
||||
|
@ -177,32 +192,53 @@ class BertDataModule(RecurrentDataModule):
|
|||
"""
|
||||
super().__init__(multilingualIndex, batchsize)
|
||||
self.max_len = max_len
|
||||
# Zero shot arguments
|
||||
if zscl_langs is None:
|
||||
zscl_langs = []
|
||||
self.zero_shot = zero_shot
|
||||
self.train_langs = zscl_langs
|
||||
self.debug = debug
|
||||
self.max_samples = max_samples
|
||||
if self.debug:
|
||||
print(f'\n[Running on DEBUG mode - samples per language are reduced to {self.max_samples} max!]\n')
|
||||
|
||||
def setup(self, stage=None):
|
||||
if stage == 'fit' or stage is None:
|
||||
if self.zero_shot:
|
||||
l_train_raw, l_train_target = self.multilingualIndex.l_train_raw_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_train_raw, l_train_target = self.multilingualIndex.l_train_raw()
|
||||
if self.debug:
|
||||
# Debug settings: reducing number of samples
|
||||
# l_train_raw = {l: train[:5] for l, train in l_train_raw.items()}
|
||||
# l_train_target = {l: target[:5] for l, target in l_train_target.items()}
|
||||
l_train_raw = {l: train[:self.max_samples] for l, train in l_train_raw.items()}
|
||||
l_train_target = {l: target[:self.max_samples] for l, target in l_train_target.items()}
|
||||
|
||||
l_train_index = tokenize(l_train_raw, max_len=self.max_len)
|
||||
self.training_dataset = RecurrentDataset(l_train_index, l_train_target,
|
||||
lPad_index=self.multilingualIndex.l_pad())
|
||||
|
||||
if self.zero_shot:
|
||||
l_val_raw, l_val_target = self.multilingualIndex.l_val_raw_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_val_raw, l_val_target = self.multilingualIndex.l_val_raw()
|
||||
if self.debug:
|
||||
# Debug settings: reducing number of samples
|
||||
# l_val_raw = {l: train[:5] for l, train in l_val_raw.items()}
|
||||
# l_val_target = {l: target[:5] for l, target in l_val_target.items()}
|
||||
l_val_raw = {l: train[:self.max_samples] for l, train in l_val_raw.items()}
|
||||
l_val_target = {l: target[:self.max_samples] for l, target in l_val_target.items()}
|
||||
|
||||
l_val_index = tokenize(l_val_raw, max_len=self.max_len)
|
||||
self.val_dataset = RecurrentDataset(l_val_index, l_val_target,
|
||||
lPad_index=self.multilingualIndex.l_pad())
|
||||
|
||||
if stage == 'test' or stage is None:
|
||||
if self.zero_shot:
|
||||
l_test_raw, l_test_target = self.multilingualIndex.l_test_raw_zero_shot(langs=self.train_langs)
|
||||
else:
|
||||
l_test_raw, l_test_target = self.multilingualIndex.l_test_raw()
|
||||
if self.debug:
|
||||
# Debug settings: reducing number of samples
|
||||
# l_test_raw = {l: train[:5] for l, train in l_test_raw.items()}
|
||||
# l_test_target = {l: target[:5] for l, target in l_test_target.items()}
|
||||
l_test_raw = {l: train[:self.max_samples] for l, train in l_test_raw.items()}
|
||||
l_test_target = {l: target[:self.max_samples] for l, target in l_test_target.items()}
|
||||
|
||||
l_test_index = tokenize(l_test_raw, max_len=self.max_len)
|
||||
self.test_dataset = RecurrentDataset(l_test_index, l_test_target,
|
||||
|
@ -213,10 +249,17 @@ class BertDataModule(RecurrentDataModule):
|
|||
NB: Setting n_workers to > 0 will cause "OSError: [Errno 24] Too many open files"
|
||||
:return:
|
||||
"""
|
||||
return DataLoader(self.training_dataset, batch_size=self.batchsize)
|
||||
return DataLoader(self.training_dataset, batch_size=self.batchsize, collate_fn=self.collate_fn_bert,
|
||||
shuffle=True)
|
||||
|
||||
def val_dataloader(self):
|
||||
return DataLoader(self.val_dataset, batch_size=self.batchsize)
|
||||
return DataLoader(self.val_dataset, batch_size=self.batchsize, collate_fn=self.collate_fn_bert)
|
||||
|
||||
def test_dataloader(self):
|
||||
return DataLoader(self.test_dataset, batch_size=self.batchsize)
|
||||
return DataLoader(self.test_dataset, batch_size=self.batchsize, collate_fn=self.collate_fn_bert)
|
||||
|
||||
def collate_fn_bert(self, data):
|
||||
x_batch = np.vstack([elem[0] for elem in data])
|
||||
y_batch = np.vstack([elem[1] for elem in data])
|
||||
lang_batch = [elem[2] for elem in data]
|
||||
return torch.LongTensor(x_batch), torch.FloatTensor(y_batch), lang_batch
|
||||
|
|
|
@ -23,7 +23,7 @@ class DocEmbedderList:
|
|||
if isinstance(embedder, VanillaFunGen):
|
||||
_tmp.append(embedder)
|
||||
else:
|
||||
_tmp.append(FeatureSet2Posteriors(embedder))
|
||||
_tmp.append(FeatureSet2Posteriors(embedder, n_jobs=embedder.n_jobs))
|
||||
self.embedders = _tmp
|
||||
|
||||
def fit(self, lX, ly):
|
||||
|
@ -43,23 +43,38 @@ class DocEmbedderList:
|
|||
:param lX:
|
||||
:return: common latent space (averaged).
|
||||
"""
|
||||
langs = sorted(lX.keys())
|
||||
lZparts = {lang: None for lang in langs}
|
||||
self.langs = sorted(lX.keys())
|
||||
lZparts = {lang: None for lang in self.langs}
|
||||
|
||||
for embedder in self.embedders:
|
||||
lZ = embedder.transform(lX)
|
||||
for lang in langs:
|
||||
for lang in sorted(lZ.keys()):
|
||||
Z = lZ[lang]
|
||||
if lZparts[lang] is None:
|
||||
lZparts[lang] = Z
|
||||
else:
|
||||
lZparts[lang] += Z
|
||||
n_embedders = len(self.embedders)
|
||||
return {lang: lZparts[lang]/n_embedders for lang in langs} # Averaging feature spaces
|
||||
# Zero shot experiments: removing k:v if v is None (i.e, it is a lang that will be used in zero shot setting)
|
||||
lZparts = {k: v for k, v in lZparts.items() if v is not None}
|
||||
lang_number_embedders = self.get_number_embedders_zeroshot()
|
||||
return {lang: lZparts[lang]/lang_number_embedders[lang] for lang in sorted(lZparts.keys())} # Averaging feature spaces
|
||||
|
||||
def fit_transform(self, lX, ly):
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def get_number_embedders_zeroshot(self):
|
||||
lang_number_embedders = {lang: len(self.embedders) for lang in self.langs}
|
||||
for lang in self.langs:
|
||||
for embedder in self.embedders:
|
||||
if isinstance(embedder, VanillaFunGen):
|
||||
if lang not in embedder.train_langs:
|
||||
lang_number_embedders[lang] = 2 # todo: number of view gen is hard-codede
|
||||
else:
|
||||
if lang not in embedder.embedder.train_langs:
|
||||
lang_number_embedders[lang] = 2 # todo: number of view gen is hard-codede
|
||||
return lang_number_embedders
|
||||
|
||||
|
||||
class FeatureSet2Posteriors:
|
||||
"""
|
||||
|
@ -77,7 +92,7 @@ class FeatureSet2Posteriors:
|
|||
self.l2 = l2
|
||||
self.n_jobs = n_jobs
|
||||
self.prob_classifier = MetaClassifier(
|
||||
SVC(kernel='rbf', gamma='auto', probability=True, cache_size=1000, random_state=1), n_jobs=n_jobs)
|
||||
SVC(kernel='rbf', gamma='auto', probability=True, cache_size=1000, random_state=1), n_jobs=self.n_jobs)
|
||||
|
||||
def fit(self, lX, ly):
|
||||
lZ = self.embedder.fit_transform(lX, ly)
|
||||
|
@ -113,12 +128,21 @@ class Funnelling:
|
|||
self.n_jobs = n_jobs
|
||||
|
||||
def fit(self, lX, ly):
|
||||
print('## Fitting first-tier learners!')
|
||||
print('\n## Fitting first-tier learners!')
|
||||
lZ = self.first_tier.fit_transform(lX, ly)
|
||||
print('## Fitting meta-learner!')
|
||||
print('\n## Fitting meta-learner!')
|
||||
self.meta.fit(lZ, ly)
|
||||
|
||||
def predict(self, lX):
|
||||
lZ = self.first_tier.transform(lX)
|
||||
ly = self.meta.predict(lZ)
|
||||
return ly
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
for embedder in self.first_tier.embedders:
|
||||
if isinstance(embedder, VanillaFunGen):
|
||||
embedder.set_zero_shot(val)
|
||||
else:
|
||||
embedder.embedder.set_zero_shot(val)
|
||||
return
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import time
|
||||
import src.util.disable_sklearn_warnings
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
|
@ -74,7 +75,7 @@ class NaivePolylingualClassifier:
|
|||
_sort_if_sparse(lX[lang])
|
||||
|
||||
models = Parallel(n_jobs=self.n_jobs)\
|
||||
(delayed(MonolingualClassifier(self.base_learner, parameters=self.parameters).fit)((lX[lang]), ly[lang]) for
|
||||
(delayed(MonolingualClassifier(self.base_learner, parameters=self.parameters, n_jobs=self.n_jobs).fit)((lX[lang]), ly[lang]) for
|
||||
lang in langs)
|
||||
|
||||
self.model = {lang: models[i] for i, lang in enumerate(langs)}
|
||||
|
|
|
@ -1,112 +0,0 @@
|
|||
#taken from https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM.py
|
||||
from models.helpers import *
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
class RNNMultilingualClassifier(nn.Module):
|
||||
|
||||
def __init__(self, output_size, hidden_size, lvocab_size, learnable_length, lpretrained=None,
|
||||
drop_embedding_range=None, drop_embedding_prop=0, post_probabilities=True, only_post=False,
|
||||
bert_embeddings=False):
|
||||
|
||||
super(RNNMultilingualClassifier, self).__init__()
|
||||
self.output_size = output_size
|
||||
self.hidden_size = hidden_size
|
||||
self.drop_embedding_range = drop_embedding_range
|
||||
self.drop_embedding_prop = drop_embedding_prop
|
||||
self.post_probabilities = post_probabilities
|
||||
self.bert_embeddings = bert_embeddings
|
||||
assert 0 <= drop_embedding_prop <= 1, 'drop_embedding_prop: wrong range'
|
||||
|
||||
self.lpretrained_embeddings = nn.ModuleDict()
|
||||
self.llearnable_embeddings = nn.ModuleDict()
|
||||
self.embedding_length = None
|
||||
self.langs = sorted(lvocab_size.keys())
|
||||
self.only_post = only_post
|
||||
|
||||
self.n_layers = 1
|
||||
self.n_directions = 1
|
||||
|
||||
self.dropout = nn.Dropout(0.6)
|
||||
|
||||
lstm_out = 256
|
||||
ff1 = 512
|
||||
ff2 = 256
|
||||
|
||||
lpretrained_embeddings = {}
|
||||
llearnable_embeddings = {}
|
||||
if only_post==False:
|
||||
for l in self.langs:
|
||||
pretrained = lpretrained[l] if lpretrained else None
|
||||
pretrained_embeddings, learnable_embeddings, embedding_length = init_embeddings(
|
||||
pretrained, lvocab_size[l], learnable_length
|
||||
)
|
||||
lpretrained_embeddings[l] = pretrained_embeddings
|
||||
llearnable_embeddings[l] = learnable_embeddings
|
||||
self.embedding_length = embedding_length
|
||||
|
||||
# self.lstm = nn.LSTM(self.embedding_length, hidden_size, dropout=0.2 if self.n_layers>1 else 0, num_layers=self.n_layers, bidirectional=(self.n_directions==2))
|
||||
self.rnn = nn.GRU(self.embedding_length, hidden_size)
|
||||
self.linear0 = nn.Linear(hidden_size * self.n_directions, lstm_out)
|
||||
self.lpretrained_embeddings.update(lpretrained_embeddings)
|
||||
self.llearnable_embeddings.update(llearnable_embeddings)
|
||||
|
||||
self.linear1 = nn.Linear(lstm_out, ff1)
|
||||
self.linear2 = nn.Linear(ff1, ff2)
|
||||
|
||||
if only_post:
|
||||
self.label = nn.Linear(output_size, output_size)
|
||||
elif post_probabilities and not bert_embeddings:
|
||||
self.label = nn.Linear(ff2 + output_size, output_size)
|
||||
elif bert_embeddings and not post_probabilities:
|
||||
self.label = nn.Linear(ff2 + 768, output_size)
|
||||
elif post_probabilities and bert_embeddings:
|
||||
self.label = nn.Linear(ff2 + output_size + 768, output_size)
|
||||
else:
|
||||
self.label = nn.Linear(ff2, output_size)
|
||||
|
||||
def forward(self, input, post, bert_embed, lang):
|
||||
if self.only_post:
|
||||
doc_embedding = post
|
||||
else:
|
||||
doc_embedding = self.transform(input, lang)
|
||||
if self.post_probabilities:
|
||||
doc_embedding = torch.cat([doc_embedding, post], dim=1)
|
||||
if self.bert_embeddings:
|
||||
doc_embedding = torch.cat([doc_embedding, bert_embed], dim=1)
|
||||
|
||||
logits = self.label(doc_embedding)
|
||||
return logits
|
||||
|
||||
def transform(self, input, lang):
|
||||
batch_size = input.shape[0]
|
||||
input = embed(self, input, lang)
|
||||
input = embedding_dropout(input, drop_range=self.drop_embedding_range, p_drop=self.drop_embedding_prop,
|
||||
training=self.training)
|
||||
input = input.permute(1, 0, 2)
|
||||
h_0 = Variable(torch.zeros(self.n_layers*self.n_directions, batch_size, self.hidden_size).cuda())
|
||||
# c_0 = Variable(torch.zeros(self.n_layers*self.n_directions, batch_size, self.hidden_size).cuda())
|
||||
# output, (_, _) = self.lstm(input, (h_0, c_0))
|
||||
output, _ = self.rnn(input, h_0)
|
||||
output = output[-1, :, :]
|
||||
output = F.relu(self.linear0(output))
|
||||
output = self.dropout(F.relu(self.linear1(output)))
|
||||
output = self.dropout(F.relu(self.linear2(output)))
|
||||
return output
|
||||
|
||||
def finetune_pretrained(self):
|
||||
for l in self.langs:
|
||||
self.lpretrained_embeddings[l].requires_grad = True
|
||||
self.lpretrained_embeddings[l].weight.requires_grad = True
|
||||
|
||||
def get_embeddings(self, input, lang):
|
||||
batch_size = input.shape[0]
|
||||
input = embed(self, input, lang)
|
||||
input = embedding_dropout(input, drop_range=self.drop_embedding_range, p_drop=self.drop_embedding_prop,
|
||||
training=self.training)
|
||||
input = input.permute(1, 0, 2)
|
||||
h_0 = Variable(torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size).cuda())
|
||||
output, _ = self.rnn(input, h_0)
|
||||
output = output[-1, :, :]
|
||||
return output.cpu().detach().numpy()
|
||||
|
|
@ -23,7 +23,7 @@ class BertModel(pl.LightningModule):
|
|||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||
# Language specific metrics to compute metrics at epoch level
|
||||
# Language specific metrics to compute at epoch level
|
||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
|
@ -44,9 +44,7 @@ class BertModel(pl.LightningModule):
|
|||
return logits
|
||||
|
||||
def training_step(self, train_batch, batch_idx):
|
||||
X, y, _, batch_langs = train_batch
|
||||
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||
y = y.type(torch.FloatTensor)
|
||||
X, y, batch_langs = train_batch
|
||||
y = y.to('cuda' if self.gpus else 'cpu')
|
||||
logits, _ = self.forward(X)
|
||||
loss = self.loss(logits, y)
|
||||
|
@ -56,52 +54,15 @@ class BertModel(pl.LightningModule):
|
|||
macroF1 = self.macroF1(predictions, y)
|
||||
microK = self.microK(predictions, y)
|
||||
macroK = self.macroK(predictions, y)
|
||||
self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-macroK', macroK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-microK', microK, on_step=True, on_epoch=True, prog_bar=False, logger=True)
|
||||
lX, ly = self._reconstruct_dict(predictions, y, batch_langs)
|
||||
return {'loss': loss, 'pred': lX, 'target': ly}
|
||||
|
||||
def training_epoch_end(self, outputs):
|
||||
langs = []
|
||||
for output in outputs:
|
||||
langs.extend(list(output['pred'].keys()))
|
||||
langs = set(langs)
|
||||
# outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
|
||||
# here we save epoch level metric values and compute them specifically for each language
|
||||
res_macroF1 = {lang: [] for lang in langs}
|
||||
res_microF1 = {lang: [] for lang in langs}
|
||||
res_macroK = {lang: [] for lang in langs}
|
||||
res_microK = {lang: [] for lang in langs}
|
||||
for output in outputs:
|
||||
lX, ly = output['pred'], output['target']
|
||||
for lang in lX.keys():
|
||||
X, y = lX[lang], ly[lang]
|
||||
lang_macroF1 = self.lang_macroF1(X, y)
|
||||
lang_microF1 = self.lang_microF1(X, y)
|
||||
lang_macroK = self.lang_macroK(X, y)
|
||||
lang_microK = self.lang_microK(X, y)
|
||||
|
||||
res_macroF1[lang].append(lang_macroF1)
|
||||
res_microF1[lang].append(lang_microF1)
|
||||
res_macroK[lang].append(lang_macroK)
|
||||
res_microK[lang].append(lang_microK)
|
||||
for lang in langs:
|
||||
avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang]))
|
||||
avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang]))
|
||||
avg_macroK = torch.mean(torch.Tensor(res_macroK[lang]))
|
||||
avg_microK = torch.mean(torch.Tensor(res_microK[lang]))
|
||||
self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch)
|
||||
self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch)
|
||||
self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch)
|
||||
self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch)
|
||||
self.log('train-loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('train-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-macroK', macroK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('train-microK', microK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
return {'loss': loss}
|
||||
|
||||
def validation_step(self, val_batch, batch_idx):
|
||||
X, y, _, batch_langs = val_batch
|
||||
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||
y = y.type(torch.FloatTensor)
|
||||
X, y, batch_langs = val_batch
|
||||
y = y.to('cuda' if self.gpus else 'cpu')
|
||||
logits, _ = self.forward(X)
|
||||
loss = self.loss(logits, y)
|
||||
|
@ -110,7 +71,7 @@ class BertModel(pl.LightningModule):
|
|||
macroF1 = self.macroF1(predictions, y)
|
||||
microK = self.microK(predictions, y)
|
||||
macroK = self.macroK(predictions, y)
|
||||
self.log('val-loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('val-loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
|
@ -118,12 +79,10 @@ class BertModel(pl.LightningModule):
|
|||
return {'loss': loss}
|
||||
|
||||
def test_step(self, test_batch, batch_idx):
|
||||
X, y, _, batch_langs = test_batch
|
||||
X = torch.cat(X).view([X[0].shape[0], len(X)])
|
||||
y = y.type(torch.FloatTensor)
|
||||
X, y, batch_langs = test_batch
|
||||
y = y.to('cuda' if self.gpus else 'cpu')
|
||||
logits, _ = self.forward(X)
|
||||
loss = self.loss(logits, y)
|
||||
# loss = self.loss(logits, y)
|
||||
# Squashing logits through Sigmoid in order to get confidence score
|
||||
predictions = torch.sigmoid(logits) > 0.5
|
||||
microF1 = self.microF1(predictions, y)
|
||||
|
@ -132,11 +91,11 @@ class BertModel(pl.LightningModule):
|
|||
macroK = self.macroK(predictions, y)
|
||||
self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('test-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('test-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log('test-macroK', macroK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
self.log('test-microK', microK, on_step=False, on_epoch=True, prog_bar=False, logger=True)
|
||||
return
|
||||
|
||||
def configure_optimizers(self, lr=3e-5, weight_decay=0.01):
|
||||
def configure_optimizers(self, lr=1e-5, weight_decay=0.01):
|
||||
no_decay = ['bias', 'LayerNorm.weight']
|
||||
optimizer_grouped_parameters = [
|
||||
{'params': [p for n, p in self.bert.named_parameters()
|
||||
|
@ -147,7 +106,8 @@ class BertModel(pl.LightningModule):
|
|||
'weight_decay': weight_decay}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=lr)
|
||||
scheduler = StepLR(optimizer, step_size=25, gamma=0.1)
|
||||
scheduler = {'scheduler': StepLR(optimizer, step_size=25, gamma=0.1),
|
||||
'interval': 'epoch'}
|
||||
return [optimizer], [scheduler]
|
||||
|
||||
def encode(self, lX, batch_size=64):
|
||||
|
|
|
@ -42,7 +42,7 @@ class RecurrentModel(pl.LightningModule):
|
|||
self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
|
||||
# Language specific metrics to compute metrics at epoch level
|
||||
# Language specific metrics to compute at epoch level
|
||||
self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
|
||||
self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
|
||||
|
|
|
@ -149,33 +149,60 @@ class MultilingualIndex:
|
|||
def l_train_index(self):
|
||||
return {l: index.train_index for l, index in self.l_index.items()}
|
||||
|
||||
def l_train_index_zero_shot(self, langs):
|
||||
return {l: index.train_index for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_train_raw_index(self):
|
||||
return {l: index.train_raw for l, index in self.l_index.items()}
|
||||
|
||||
def l_train_raw_index_zero_shot(self, langs):
|
||||
return {l: index.train_raw for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_train_target(self):
|
||||
return {l: index.train_target for l, index in self.l_index.items()}
|
||||
|
||||
def l_train_target_zero_shot(self, langs):
|
||||
return {l: index.train_target for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_val_index(self):
|
||||
return {l: index.val_index for l, index in self.l_index.items()}
|
||||
|
||||
def l_val_index_zero_shot(self, langs):
|
||||
return {l: index.val_index for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_val_raw_index(self):
|
||||
return {l: index.val_raw for l, index in self.l_index.items()}
|
||||
|
||||
def l_val_raw_index_zero_shot(self, langs):
|
||||
return {l: index.val_raw for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_test_raw_index(self):
|
||||
return {l: index.test_raw for l, index in self.l_index.items()}
|
||||
|
||||
def l_test_raw_index_zero_shot(self, langs):
|
||||
return {l: index.test_raw for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_devel_raw_index(self):
|
||||
return {l: index.devel_raw for l, index in self.l_index.items()}
|
||||
|
||||
def l_val_target(self):
|
||||
return {l: index.val_target for l, index in self.l_index.items()}
|
||||
|
||||
def l_val_target_zero_shot(self, langs):
|
||||
return {l: index.val_target for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_test_target(self):
|
||||
return {l: index.test_target for l, index in self.l_index.items()}
|
||||
|
||||
def l_test_index(self):
|
||||
return {l: index.test_index for l, index in self.l_index.items()}
|
||||
|
||||
def l_test_target_zero_shot(self, langs):
|
||||
return {l: index.test_target for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_test_index_zero_shot(self, langs):
|
||||
return {l: index.test_index for l, index in self.l_index.items() if l in langs}
|
||||
|
||||
def l_devel_index(self):
|
||||
return {l: index.devel_index for l, index in self.l_index.items()}
|
||||
|
||||
|
@ -191,15 +218,33 @@ class MultilingualIndex:
|
|||
def l_test(self):
|
||||
return self.l_test_index(), self.l_test_target()
|
||||
|
||||
def l_test_zero_shot(self, langs):
|
||||
return self.l_test_index_zero_shot(langs), self.l_test_target_zero_shot(langs)
|
||||
|
||||
def l_train_zero_shot(self, langs):
|
||||
return self.l_train_index_zero_shot(langs), self.l_train_target_zero_shot(langs)
|
||||
|
||||
def l_val_zero_shot(self, langs):
|
||||
return self.l_val_index_zero_shot(langs), self.l_val_target_zero_shot(langs)
|
||||
|
||||
def l_train_raw(self):
|
||||
return self.l_train_raw_index(), self.l_train_target()
|
||||
|
||||
def l_train_raw_zero_shot(self, langs):
|
||||
return self.l_train_raw_index_zero_shot(langs), self.l_train_target_zero_shot(langs)
|
||||
|
||||
def l_val_raw(self):
|
||||
return self.l_val_raw_index(), self.l_val_target()
|
||||
|
||||
def l_val_raw_zero_shot(self, langs):
|
||||
return self.l_val_raw_index_zero_shot(langs), self.l_val_target_zero_shot(langs)
|
||||
|
||||
def l_test_raw(self):
|
||||
return self.l_test_raw_index(), self.l_test_target()
|
||||
|
||||
def l_test_raw_zero_shot(self, langs):
|
||||
return self.l_test_raw_index_zero_shot(langs), self.l_test_target_zero_shot(langs)
|
||||
|
||||
def l_devel_raw(self):
|
||||
return self.l_devel_raw_index(), self.l_devel_target()
|
||||
|
||||
|
@ -317,7 +362,6 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
|
|||
unk_count = 0
|
||||
knw_count = 0
|
||||
out_count = 0
|
||||
# pbar = tqdm(data, desc=f'indexing')
|
||||
for text in data:
|
||||
words = analyzer(text)
|
||||
index = []
|
||||
|
@ -336,8 +380,6 @@ def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
|
|||
index.append(idx)
|
||||
indexes.append(index)
|
||||
knw_count += len(index)
|
||||
# pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
|
||||
# f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
|
||||
return indexes
|
||||
|
||||
|
||||
|
@ -378,7 +420,7 @@ def get_method_name(args):
|
|||
for i, conf in enumerate(_id_conf):
|
||||
if conf:
|
||||
_id += _id_name[i]
|
||||
_id = _id if not args.gru_wce else _id + '_wce'
|
||||
_id = _id if not args.rnn_wce else _id + '_wce'
|
||||
_dataset_path = args.dataset.split('/')[-1].split('_')
|
||||
dataset_id = _dataset_path[0] + _dataset_path[-1]
|
||||
return _id, dataset_id
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
import warnings
|
||||
|
||||
|
||||
def warn(*args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
warnings.warn = warn
|
|
@ -118,6 +118,7 @@ def hard_single_metric_statistics(true_labels, predicted_labels):
|
|||
|
||||
def macro_average(true_labels, predicted_labels, metric, metric_statistics=hard_single_metric_statistics):
|
||||
true_labels, predicted_labels, nC = __check_consistency_and_adapt(true_labels, predicted_labels)
|
||||
_tmp = [metric(metric_statistics(true_labels[:, c], predicted_labels[:, c])) for c in range(nC)]
|
||||
return np.mean([metric(metric_statistics(true_labels[:, c], predicted_labels[:, c])) for c in range(nC)])
|
||||
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ class CustomF1(Metric):
|
|||
if den > 0:
|
||||
class_specific.append(num / den)
|
||||
else:
|
||||
class_specific.append(1.)
|
||||
class_specific.append(torch.FloatTensor([1.]))
|
||||
average = torch.sum(torch.Tensor(class_specific))/self.num_classes
|
||||
return average.to(self.device)
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import numpy as np
|
||||
import src.util.disable_sklearn_warnings
|
||||
|
||||
|
||||
class StandardizeTransformer:
|
||||
|
|
|
@ -15,11 +15,12 @@ This module contains the view generators that take care of computing the view sp
|
|||
|
||||
- View generator (-b): generates document embedding via mBERT model.
|
||||
"""
|
||||
import torch
|
||||
from abc import ABC, abstractmethod
|
||||
# from time import time
|
||||
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.loggers import TensorBoardLogger
|
||||
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
|
||||
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
||||
from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor
|
||||
|
||||
|
@ -56,7 +57,7 @@ class VanillaFunGen(ViewGen):
|
|||
View Generator (x): original funnelling architecture proposed by Moreo, Esuli and
|
||||
Sebastiani in DOI: https://doi.org/10.1145/3326065
|
||||
"""
|
||||
def __init__(self, base_learner, first_tier_parameters=None, n_jobs=-1):
|
||||
def __init__(self, base_learner, first_tier_parameters=None, zero_shot=False, train_langs: list = None, n_jobs=-1):
|
||||
"""
|
||||
Init Posterior Probabilities embedder (i.e., VanillaFunGen)
|
||||
:param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to
|
||||
|
@ -69,13 +70,26 @@ class VanillaFunGen(ViewGen):
|
|||
self.first_tier_parameters = first_tier_parameters
|
||||
self.n_jobs = n_jobs
|
||||
self.doc_projector = NaivePolylingualClassifier(base_learner=self.learners,
|
||||
parameters=self.first_tier_parameters, n_jobs=self.n_jobs)
|
||||
parameters=self.first_tier_parameters,
|
||||
n_jobs=self.n_jobs)
|
||||
self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
|
||||
# Zero shot parameters
|
||||
self.zero_shot = zero_shot
|
||||
if train_langs is None:
|
||||
train_langs = ['it']
|
||||
self.train_langs = train_langs
|
||||
|
||||
def fit(self, lX, lY):
|
||||
print('# Fitting VanillaFunGen (X)...')
|
||||
def fit(self, lX, ly):
|
||||
print('\n# Fitting VanillaFunGen (X)...')
|
||||
if self.zero_shot:
|
||||
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')
|
||||
self.langs = sorted(self.train_langs)
|
||||
lX = self.zero_shot_experiments(lX)
|
||||
ly = self.zero_shot_experiments(ly)
|
||||
lX = self.vectorizer.fit_transform(lX)
|
||||
self.doc_projector.fit(lX, lY)
|
||||
else:
|
||||
lX = self.vectorizer.fit_transform(lX)
|
||||
self.doc_projector.fit(lX, ly)
|
||||
return self
|
||||
|
||||
def transform(self, lX):
|
||||
|
@ -93,13 +107,27 @@ class VanillaFunGen(ViewGen):
|
|||
def fit_transform(self, lX, ly):
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def zero_shot_experiments(self, lX):
|
||||
_lX = {}
|
||||
for lang in self.langs:
|
||||
if lang in self.train_langs:
|
||||
_lX[lang] = lX[lang]
|
||||
else:
|
||||
_lX[lang] = None
|
||||
lX = _lX
|
||||
return lX
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
self.zero_shot = val
|
||||
return
|
||||
|
||||
|
||||
class MuseGen(ViewGen):
|
||||
"""
|
||||
View Generator (m): generates document representation via MUSE embeddings (Fasttext multilingual word
|
||||
embeddings). Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||
"""
|
||||
def __init__(self, muse_dir='../embeddings', n_jobs=-1):
|
||||
def __init__(self, muse_dir='../embeddings', zero_shot=False, train_langs: list = None, n_jobs=-1):
|
||||
"""
|
||||
Init the MuseGen.
|
||||
:param muse_dir: string, path to folder containing muse embeddings
|
||||
|
@ -111,6 +139,11 @@ class MuseGen(ViewGen):
|
|||
self.langs = None
|
||||
self.lMuse = None
|
||||
self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
|
||||
# Zero shot parameters
|
||||
self.zero_shot = zero_shot
|
||||
if train_langs is None:
|
||||
train_langs = ['it']
|
||||
self.train_langs = train_langs
|
||||
|
||||
def fit(self, lX, ly):
|
||||
"""
|
||||
|
@ -119,7 +152,9 @@ class MuseGen(ViewGen):
|
|||
:param ly: dict {lang: target vectors}
|
||||
:return: self.
|
||||
"""
|
||||
print('# Fitting MuseGen (M)...')
|
||||
print('\n# Fitting MuseGen (M)...')
|
||||
if self.zero_shot:
|
||||
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')
|
||||
self.vectorizer.fit(lX)
|
||||
self.langs = sorted(lX.keys())
|
||||
self.lMuse = MuseLoader(langs=self.langs, cache=self.muse_dir)
|
||||
|
@ -135,23 +170,42 @@ class MuseGen(ViewGen):
|
|||
:param lX: dict {lang: indexed documents}
|
||||
:return: document projection to the common latent space.
|
||||
"""
|
||||
# Testing zero-shot experiments
|
||||
if self.zero_shot:
|
||||
lX = self.zero_shot_experiments(lX)
|
||||
lX = {l: self.vectorizer.vectorizer[l].transform(lX[l]) for l in self.langs if lX[l] is not None}
|
||||
else:
|
||||
lX = self.vectorizer.transform(lX)
|
||||
XdotMUSE = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(XdotM)(lX[lang], self.lMuse[lang], sif=True) for lang in self.langs)
|
||||
lZ = {lang: XdotMUSE[i] for i, lang in enumerate(self.langs)}
|
||||
delayed(XdotM)(lX[lang], self.lMuse[lang], sif=True) for lang in sorted(lX.keys()))
|
||||
lZ = {lang: XdotMUSE[i] for i, lang in enumerate(sorted(lX.keys()))}
|
||||
lZ = _normalize(lZ, l2=True)
|
||||
return lZ
|
||||
|
||||
def fit_transform(self, lX, ly):
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def zero_shot_experiments(self, lX):
|
||||
_lX = {}
|
||||
for lang in self.langs:
|
||||
if lang in self.train_langs:
|
||||
_lX[lang] = lX[lang]
|
||||
else:
|
||||
_lX[lang] = None
|
||||
lX = _lX
|
||||
return lX
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
self.zero_shot = val
|
||||
return
|
||||
|
||||
|
||||
class WordClassGen(ViewGen):
|
||||
"""
|
||||
View Generator (w): generates document representation via Word-Class-Embeddings.
|
||||
Document embeddings are obtained via weighted sum of document's constituent embeddings.
|
||||
"""
|
||||
def __init__(self, n_jobs=-1):
|
||||
def __init__(self, zero_shot=False, train_langs: list = None, n_jobs=-1):
|
||||
"""
|
||||
Init WordClassGen.
|
||||
:param n_jobs: int, number of concurrent workers
|
||||
|
@ -161,6 +215,11 @@ class WordClassGen(ViewGen):
|
|||
self.langs = None
|
||||
self.lWce = None
|
||||
self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
|
||||
# Zero shot parameters
|
||||
self.zero_shot = zero_shot
|
||||
if train_langs is None:
|
||||
train_langs = ['it']
|
||||
self.train_langs = train_langs
|
||||
|
||||
def fit(self, lX, ly):
|
||||
"""
|
||||
|
@ -169,9 +228,16 @@ class WordClassGen(ViewGen):
|
|||
:param ly: dict {lang: target vectors}
|
||||
:return: self.
|
||||
"""
|
||||
print('# Fitting WordClassGen (W)...')
|
||||
print('\n# Fitting WordClassGen (W)...')
|
||||
if self.zero_shot:
|
||||
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')
|
||||
self.langs = sorted(self.train_langs)
|
||||
lX = self.zero_shot_experiments(lX)
|
||||
lX = self.vectorizer.fit_transform(lX)
|
||||
else:
|
||||
lX = self.vectorizer.fit_transform(lX)
|
||||
self.langs = sorted(lX.keys())
|
||||
|
||||
wce = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(wce_matrix)(lX[lang], ly[lang]) for lang in self.langs)
|
||||
self.lWce = {l: wce[i] for i, l in enumerate(self.langs)}
|
||||
|
@ -187,14 +253,28 @@ class WordClassGen(ViewGen):
|
|||
"""
|
||||
lX = self.vectorizer.transform(lX)
|
||||
XdotWce = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(XdotM)(lX[lang], self.lWce[lang], sif=True) for lang in self.langs)
|
||||
lWce = {l: XdotWce[i] for i, l in enumerate(self.langs)}
|
||||
delayed(XdotM)(lX[lang], self.lWce[lang], sif=True) for lang in sorted(lX.keys()) if lang in self.lWce.keys())
|
||||
lWce = {l: XdotWce[i] for i, l in enumerate(sorted(lX.keys())) if l in self.lWce.keys()}
|
||||
lWce = _normalize(lWce, l2=True)
|
||||
return lWce
|
||||
|
||||
def fit_transform(self, lX, ly):
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def zero_shot_experiments(self, lX):
|
||||
_lX = {}
|
||||
for lang in self.langs:
|
||||
if lang in self.train_langs:
|
||||
_lX[lang] = lX[lang]
|
||||
else:
|
||||
_lX[lang] = None
|
||||
lX = _lX
|
||||
return lX
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
self.zero_shot = val
|
||||
return
|
||||
|
||||
|
||||
class RecurrentGen(ViewGen):
|
||||
"""
|
||||
|
@ -204,7 +284,7 @@ class RecurrentGen(ViewGen):
|
|||
the network internal state at the second feed-forward layer level. Training metrics are logged via TensorBoard.
|
||||
"""
|
||||
def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
|
||||
gpus=0, n_jobs=-1, patience=20, stored_path=None):
|
||||
gpus=0, n_jobs=-1, patience=20, stored_path=None, zero_shot=False, train_langs: list = None):
|
||||
"""
|
||||
Init RecurrentGen.
|
||||
:param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
|
||||
|
@ -238,11 +318,17 @@ class RecurrentGen(ViewGen):
|
|||
self.multilingualIndex.train_val_split(val_prop=0.2, max_val=2000, seed=1)
|
||||
self.multilingualIndex.embedding_matrices(self.pretrained, supervised=self.wce)
|
||||
self.model = self._init_model()
|
||||
self.logger = TensorBoardLogger(save_dir='../tb_logs', name='rnn', default_hp_metric=False)
|
||||
self.logger = TensorBoardLogger(save_dir='tb_logs', name='rnn', default_hp_metric=False)
|
||||
self.early_stop_callback = EarlyStopping(monitor='val-macroF1', min_delta=0.00,
|
||||
patience=self.patience, verbose=False, mode='max')
|
||||
self.lr_monitor = LearningRateMonitor(logging_interval='epoch')
|
||||
|
||||
# Zero shot parameters
|
||||
self.zero_shot = zero_shot
|
||||
if train_langs is None:
|
||||
train_langs = ['it']
|
||||
self.train_langs = train_langs
|
||||
|
||||
def _init_model(self):
|
||||
if self.stored_path:
|
||||
lpretrained = self.multilingualIndex.l_embeddings()
|
||||
|
@ -275,18 +361,16 @@ class RecurrentGen(ViewGen):
|
|||
:param ly: dict {lang: target vectors}
|
||||
:return: self.
|
||||
"""
|
||||
print('# Fitting RecurrentGen (G)...')
|
||||
print('\n# Fitting RecurrentGen (G)...')
|
||||
create_if_not_exist(self.logger.save_dir)
|
||||
recurrentDataModule = RecurrentDataModule(self.multilingualIndex, batchsize=self.batch_size, n_jobs=self.n_jobs)
|
||||
recurrentDataModule = RecurrentDataModule(self.multilingualIndex, batchsize=self.batch_size, n_jobs=self.n_jobs,
|
||||
zero_shot=self.zero_shot, zscl_langs=self.train_langs)
|
||||
trainer = Trainer(gradient_clip_val=1e-1, gpus=self.gpus, logger=self.logger, max_epochs=self.nepochs,
|
||||
callbacks=[self.early_stop_callback, self.lr_monitor], checkpoint_callback=False)
|
||||
callbacks=[self.early_stop_callback, self.lr_monitor], checkpoint_callback=False,
|
||||
overfit_batches=0.01)
|
||||
|
||||
# vanilla_torch_model = torch.load(
|
||||
# '../_old_checkpoint/gru_viewgen_-rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle')
|
||||
# self.model.linear0 = vanilla_torch_model.linear0
|
||||
# self.model.linear1 = vanilla_torch_model.linear1
|
||||
# self.model.linear2 = vanilla_torch_model.linear2
|
||||
# self.model.rnn = vanilla_torch_model.rnn
|
||||
if self.zero_shot:
|
||||
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')
|
||||
|
||||
trainer.fit(self.model, datamodule=recurrentDataModule)
|
||||
trainer.test(self.model, datamodule=recurrentDataModule)
|
||||
|
@ -298,6 +382,8 @@ class RecurrentGen(ViewGen):
|
|||
:param lX: dict {lang: indexed documents}
|
||||
:return: documents projected to the common latent space.
|
||||
"""
|
||||
if self.zero_shot:
|
||||
lX = self.zero_shot_experiments(lX)
|
||||
data = {}
|
||||
for lang in lX.keys():
|
||||
indexed = index(data=lX[lang],
|
||||
|
@ -316,6 +402,16 @@ class RecurrentGen(ViewGen):
|
|||
def fit_transform(self, lX, ly):
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def zero_shot_experiments(self, lX):
|
||||
for lang in sorted(lX.keys()):
|
||||
if lang not in self.train_langs:
|
||||
lX.pop(lang)
|
||||
return lX
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
self.zero_shot = val
|
||||
return
|
||||
|
||||
|
||||
class BertGen(ViewGen):
|
||||
"""
|
||||
|
@ -323,7 +419,8 @@ class BertGen(ViewGen):
|
|||
At inference time, the model returns the network internal state at the last original layer (i.e. 12th). Document
|
||||
embeddings are the state associated with the "start" token. Training metrics are logged via TensorBoard.
|
||||
"""
|
||||
def __init__(self, multilingualIndex, batch_size=128, nepochs=50, gpus=0, n_jobs=-1, patience=5, stored_path=None):
|
||||
def __init__(self, multilingualIndex, batch_size=128, nepochs=50, gpus=0, n_jobs=-1, patience=5, stored_path=None,
|
||||
zero_shot=False, train_langs: list = None):
|
||||
"""
|
||||
Init Bert model
|
||||
:param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
|
||||
|
@ -344,10 +441,20 @@ class BertGen(ViewGen):
|
|||
self.stored_path = stored_path
|
||||
self.model = self._init_model()
|
||||
self.patience = patience
|
||||
self.logger = TensorBoardLogger(save_dir='../tb_logs', name='bert', default_hp_metric=False)
|
||||
# self.logger = TensorBoardLogger(save_dir='tb_logs', name='bert', default_hp_metric=False)
|
||||
self.logger = CSVLogger(save_dir='csv_logs', name='bert')
|
||||
self.early_stop_callback = EarlyStopping(monitor='val-macroF1', min_delta=0.00,
|
||||
patience=self.patience, verbose=False, mode='max')
|
||||
|
||||
# modifying EarlyStopping global var in order to compute >= with respect to the best score
|
||||
self.early_stop_callback.mode_dict['max'] = torch.ge
|
||||
|
||||
# Zero shot parameters
|
||||
self.zero_shot = zero_shot
|
||||
if train_langs is None:
|
||||
train_langs = ['it']
|
||||
self.train_langs = train_langs
|
||||
|
||||
def _init_model(self):
|
||||
output_size = self.multilingualIndex.get_target_dim()
|
||||
return BertModel(output_size=output_size, stored_path=self.stored_path, gpus=self.gpus)
|
||||
|
@ -361,12 +468,21 @@ class BertGen(ViewGen):
|
|||
:param ly: dict {lang: target vectors}
|
||||
:return: self.
|
||||
"""
|
||||
print('# Fitting BertGen (M)...')
|
||||
print('\n# Fitting BertGen (B)...')
|
||||
create_if_not_exist(self.logger.save_dir)
|
||||
self.multilingualIndex.train_val_split(val_prop=0.2, max_val=2000, seed=1)
|
||||
bertDataModule = BertDataModule(self.multilingualIndex, batchsize=self.batch_size, max_len=512)
|
||||
trainer = Trainer(gradient_clip_val=1e-1, max_epochs=self.nepochs, gpus=self.gpus,
|
||||
logger=self.logger, callbacks=[self.early_stop_callback], checkpoint_callback=False)
|
||||
bertDataModule = BertDataModule(self.multilingualIndex, batchsize=self.batch_size, max_len=512,
|
||||
zero_shot=self.zero_shot, zscl_langs=self.train_langs,
|
||||
debug=False, max_samples=50)
|
||||
|
||||
if self.zero_shot:
|
||||
print(f'# Zero-shot setting! Training langs will be set to: {sorted(self.train_langs)}')
|
||||
|
||||
trainer = Trainer(max_epochs=self.nepochs, gpus=self.gpus,
|
||||
logger=self.logger,
|
||||
callbacks=[self.early_stop_callback],
|
||||
checkpoint_callback=False)
|
||||
|
||||
trainer.fit(self.model, datamodule=bertDataModule)
|
||||
trainer.test(self.model, datamodule=bertDataModule)
|
||||
return self
|
||||
|
@ -377,6 +493,8 @@ class BertGen(ViewGen):
|
|||
:param lX: dict {lang: indexed documents}
|
||||
:return: documents projected to the common latent space.
|
||||
"""
|
||||
if self.zero_shot:
|
||||
lX = self.zero_shot_experiments(lX)
|
||||
data = tokenize(lX, max_len=512)
|
||||
self.model.to('cuda' if self.gpus else 'cpu')
|
||||
self.model.eval()
|
||||
|
@ -386,3 +504,13 @@ class BertGen(ViewGen):
|
|||
def fit_transform(self, lX, ly):
|
||||
# we can assume that we have already indexed data for transform() since we are first calling fit()
|
||||
return self.fit(lX, ly).transform(lX)
|
||||
|
||||
def zero_shot_experiments(self, lX):
|
||||
for lang in sorted(lX.keys()):
|
||||
if lang not in self.train_langs:
|
||||
lX.pop(lang)
|
||||
return lX
|
||||
|
||||
def set_zero_shot(self, val: bool):
|
||||
self.zero_shot = val
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue