optimized imports
This commit is contained in:
parent
5958df3e3c
commit
2a8075bbc2
|
|
@ -1,7 +1,7 @@
|
|||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import numpy as np
|
||||
import pytorch_lightning as pl
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from transformers import BertTokenizer
|
||||
|
||||
N_WORKERS = 8
|
||||
|
|
|
|||
|
|
@ -1,19 +1,21 @@
|
|||
from os.path import join, exists
|
||||
from nltk.corpus import stopwords
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from data.reader.jrcacquis_reader import *
|
||||
from data.languages import lang_set, NLTK_LANGMAP, RCV2_LANGS_WITH_NLTK_STEMMING
|
||||
from data.reader.rcv_reader import fetch_RCV1, fetch_RCV2, fetch_topic_hierarchy
|
||||
from data.text_preprocessor import NLTKStemTokenizer, preprocess_documents
|
||||
import pickle
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from scipy.sparse import issparse
|
||||
import itertools
|
||||
from tqdm import tqdm
|
||||
import pickle
|
||||
import re
|
||||
from os.path import exists
|
||||
|
||||
import numpy as np
|
||||
from nltk.corpus import stopwords
|
||||
from scipy.sparse import csr_matrix
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from tqdm import tqdm
|
||||
|
||||
from data.languages import NLTK_LANGMAP, RCV2_LANGS_WITH_NLTK_STEMMING
|
||||
from data.reader.jrcacquis_reader import *
|
||||
from data.reader.rcv_reader import fetch_RCV1, fetch_RCV2
|
||||
from data.text_preprocessor import NLTKStemTokenizer, preprocess_documents
|
||||
|
||||
|
||||
class MultilingualDataset:
|
||||
|
|
|
|||
|
|
@ -1,19 +1,22 @@
|
|||
from __future__ import print_function
|
||||
import os, sys
|
||||
from os.path import join
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
import tarfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from sklearn.datasets import get_data_home
|
||||
import pickle
|
||||
from util.file import download_file, list_dirs, list_files
|
||||
import zipfile
|
||||
from collections import Counter
|
||||
from os.path import join
|
||||
from random import shuffle
|
||||
|
||||
import rdflib
|
||||
from rdflib.namespace import RDF, SKOS
|
||||
from rdflib import URIRef
|
||||
import zipfile
|
||||
from sklearn.datasets import get_data_home
|
||||
|
||||
from data.languages import JRC_LANGS
|
||||
from collections import Counter
|
||||
from random import shuffle
|
||||
from data.languages import lang_set
|
||||
from util.file import download_file, list_dirs, list_files
|
||||
|
||||
"""
|
||||
JRC Acquis' Nomenclature:
|
||||
|
|
|
|||
|
|
@ -1,15 +1,12 @@
|
|||
from zipfile import ZipFile
|
||||
import xml.etree.ElementTree as ET
|
||||
from data.languages import RCV2_LANGS_WITH_NLTK_STEMMING, RCV2_LANGS
|
||||
from util.file import list_files
|
||||
from sklearn.datasets import get_data_home
|
||||
import gzip
|
||||
from os.path import join, exists
|
||||
from util.file import download_file_if_not_exists
|
||||
import re
|
||||
from collections import Counter
|
||||
import xml.etree.ElementTree as ET
|
||||
from os.path import join, exists
|
||||
from zipfile import ZipFile
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
from util.file import download_file_if_not_exists
|
||||
from util.file import list_files
|
||||
|
||||
"""
|
||||
RCV2's Nomenclature:
|
||||
|
|
|
|||
|
|
@ -1,16 +1,19 @@
|
|||
from __future__ import print_function
|
||||
|
||||
# import ijson
|
||||
# from ijson.common import ObjectBuilder
|
||||
import os, sys
|
||||
from os.path import join
|
||||
from bz2 import BZ2File
|
||||
import os
|
||||
import pickle
|
||||
from util.file import list_dirs, list_files, makedirs_if_not_exist
|
||||
from itertools import islice
|
||||
import re
|
||||
from bz2 import BZ2File
|
||||
from itertools import islice
|
||||
from os.path import join
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
import numpy as np
|
||||
|
||||
from util.file import list_dirs, list_files
|
||||
|
||||
policies = ["IN_ALL_LANGS", "IN_ANY_LANG"]
|
||||
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
from nltk.corpus import stopwords
|
||||
from data.languages import NLTK_LANGMAP
|
||||
from nltk import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import SnowballStemmer
|
||||
|
||||
from data.languages import NLTK_LANGMAP
|
||||
|
||||
|
||||
def preprocess_documents(documents, lang):
|
||||
tokens = NLTKStemTokenizer(lang, verbose=True)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import math
|
||||
|
||||
import numpy as np
|
||||
from scipy.stats import t
|
||||
from joblib import Parallel, delayed
|
||||
from scipy.sparse import csr_matrix, csc_matrix
|
||||
from scipy.stats import t
|
||||
|
||||
|
||||
def get_probs(tpr, fpr, pc):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from models.learners import *
|
||||
from view_generators import VanillaFunGen
|
||||
from util.common import _normalize
|
||||
from view_generators import VanillaFunGen
|
||||
|
||||
|
||||
class DocEmbedderList:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
from argparse import ArgumentParser
|
||||
from funnelling import *
|
||||
from view_generators import *
|
||||
|
||||
from data.dataset_builder import MultilingualDataset
|
||||
from funnelling import *
|
||||
from util.common import MultilingualIndex, get_params, get_method_name
|
||||
from util.evaluation import evaluate
|
||||
from util.results_csv import CSVlog
|
||||
from time import time
|
||||
from view_generators import *
|
||||
|
||||
|
||||
def main(args):
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import numpy as np
|
||||
import time
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import SVC
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
from sklearn.svm import SVC
|
||||
|
||||
from util.standardizer import StandardizeTransformer
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#taken from https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
|
||||
from models.helpers import *
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
import torch
|
||||
import pytorch_lightning as pl
|
||||
import torch
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
from transformers import BertForSequenceClassification, AdamW
|
||||
from util.pl_metrics import CustomF1, CustomK
|
||||
|
||||
from util.common import define_pad_length, pad
|
||||
from util.pl_metrics import CustomF1, CustomK
|
||||
|
||||
|
||||
class BertModel(pl.LightningModule):
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
# Lightning modules, see https://pytorch-lightning.readthedocs.io/en/latest/lightning_module.html
|
||||
import pytorch_lightning as pl
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.autograd import Variable
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
from transformers import AdamW
|
||||
import pytorch_lightning as pl
|
||||
|
||||
from models.helpers import init_embeddings
|
||||
from util.pl_metrics import CustomF1, CustomK
|
||||
from util.common import define_pad_length, pad
|
||||
from util.pl_metrics import CustomF1, CustomK
|
||||
|
||||
|
||||
class RecurrentModel(pl.LightningModule):
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.preprocessing import normalize
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import normalize
|
||||
|
||||
from util.embeddings_manager import supervised_embeddings_tfidf
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
from torchtext.vocab import Vectors
|
||||
import torch
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchtext.vocab import Vectors
|
||||
|
||||
from util.SIF_embed import remove_pc
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from joblib import Parallel, delayed
|
||||
from util.metrics import *
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from util.metrics import *
|
||||
|
||||
|
||||
def evaluation_metrics(y, y_):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import urllib
|
||||
from os import listdir, makedirs
|
||||
from os.path import isdir, isfile, join, exists, dirname
|
||||
import urllib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import torch
|
||||
from pytorch_lightning.metrics import Metric
|
||||
|
||||
from util.common import is_false, is_true
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class CSVlog:
|
||||
|
|
|
|||
|
|
@ -16,16 +16,18 @@ This module contains the view generators that take care of computing the view sp
|
|||
- View generator (-b): generates document embedding via mBERT model.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from models.learners import *
|
||||
from util.embeddings_manager import MuseLoader, XdotM, wce_matrix
|
||||
from util.common import TfidfVectorizerMultilingual, _normalize
|
||||
from models.pl_gru import RecurrentModel
|
||||
from models.pl_bert import BertModel
|
||||
from pytorch_lightning import Trainer
|
||||
from data.datamodule import RecurrentDataModule, BertDataModule, tokenize
|
||||
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
|
||||
from time import time
|
||||
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.loggers import TensorBoardLogger
|
||||
|
||||
from data.datamodule import RecurrentDataModule, BertDataModule, tokenize
|
||||
from models.learners import *
|
||||
from models.pl_bert import BertModel
|
||||
from models.pl_gru import RecurrentModel
|
||||
from util.common import TfidfVectorizerMultilingual, _normalize
|
||||
from util.embeddings_manager import MuseLoader, XdotM, wce_matrix
|
||||
|
||||
|
||||
class ViewGen(ABC):
|
||||
@abstractmethod
|
||||
|
|
|
|||
Loading…
Reference in New Issue