refactoring emebed method into Class StorageEmbeddings. refactoring class EmbeddingsAligned.
tSVD and T-SNE for supervised embeddings
This commit is contained in:
parent
cf29826a32
commit
4de6b3e250
|
|
@ -11,7 +11,8 @@ from sklearn.svm import SVC
|
||||||
parser = OptionParser()
|
parser = OptionParser()
|
||||||
|
|
||||||
parser.add_option("-d", "--dataset", dest="dataset",
|
parser.add_option("-d", "--dataset", dest="dataset",
|
||||||
help="Path to the multilingual dataset processed and stored in .pickle format")
|
help="Path to the multilingual dataset processed and stored in .pickle format",
|
||||||
|
default="/home/moreo/CLESA/rcv2/rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle")
|
||||||
|
|
||||||
parser.add_option("-o", "--output", dest="output",
|
parser.add_option("-o", "--output", dest="output",
|
||||||
help="Result file", type=str, default='./results/results.csv')
|
help="Result file", type=str, default='./results/results.csv')
|
||||||
|
|
@ -23,7 +24,7 @@ parser.add_option("-w", "--we-path", dest="we_path",
|
||||||
help="Path to the polylingual word embeddings", default='../embeddings/')
|
help="Path to the polylingual word embeddings", default='../embeddings/')
|
||||||
|
|
||||||
parser.add_option('-t', "--we-type", dest="we_type", help="Aligned embeddings to use [FastText, MUSE]", type=str,
|
parser.add_option('-t', "--we-type", dest="we_type", help="Aligned embeddings to use [FastText, MUSE]", type=str,
|
||||||
default='FastText')
|
default='MUSE')
|
||||||
|
|
||||||
parser.add_option("-s", "--set_c", dest="set_c",type=float,
|
parser.add_option("-s", "--set_c", dest="set_c",type=float,
|
||||||
help="Set the C parameter", default=1)
|
help="Set the C parameter", default=1)
|
||||||
|
|
@ -36,7 +37,7 @@ parser.add_option("-j", "--n_jobs", dest="n_jobs",type=int,
|
||||||
|
|
||||||
|
|
||||||
def get_learner(calibrate=False, kernel='linear'):
|
def get_learner(calibrate=False, kernel='linear'):
|
||||||
return SVC(kernel=kernel, probability=calibrate, cache_size=1000, C=op.set_c, random_state=1, class_weight='balanced')
|
return SVC(kernel=kernel, probability=calibrate, cache_size=1000, C=op.set_c, random_state=1, class_weight='balanced', gamma='auto')
|
||||||
|
|
||||||
|
|
||||||
def get_params(dense=False):
|
def get_params(dense=False):
|
||||||
|
|
@ -64,6 +65,7 @@ if __name__ == '__main__':
|
||||||
data.show_dimensions()
|
data.show_dimensions()
|
||||||
|
|
||||||
# data.set_view(languages=['en','it'], categories=list(range(10)))
|
# data.set_view(languages=['en','it'], categories=list(range(10)))
|
||||||
|
# data.set_view(languages=['en','it'])
|
||||||
lXtr, lytr = data.training()
|
lXtr, lytr = data.training()
|
||||||
lXte, lyte = data.test()
|
lXte, lyte = data.test()
|
||||||
|
|
||||||
|
|
@ -100,6 +102,10 @@ if __name__ == '__main__':
|
||||||
'we_type': op.we_type}
|
'we_type': op.we_type}
|
||||||
_config_id = 'M_and_F'
|
_config_id = 'M_and_F'
|
||||||
|
|
||||||
|
##### TODO - config dict is redundant - we have already op argparse ...
|
||||||
|
config['reduction'] = 'tSVD'
|
||||||
|
config['max_label_space'] = 50
|
||||||
|
|
||||||
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
|
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
|
||||||
|
|
||||||
print(f'### PolyEmbedd_andrea_{_config_id}\n')
|
print(f'### PolyEmbedd_andrea_{_config_id}\n')
|
||||||
|
|
@ -114,7 +120,7 @@ if __name__ == '__main__':
|
||||||
print('# Fitting ...')
|
print('# Fitting ...')
|
||||||
classifier.fit(lXtr, lytr)
|
classifier.fit(lXtr, lytr)
|
||||||
|
|
||||||
print('# Evaluating ...')
|
print('\n# Evaluating ...')
|
||||||
l_eval = evaluate_method(classifier, lXte, lyte)
|
l_eval = evaluate_method(classifier, lXte, lyte)
|
||||||
|
|
||||||
metrics = []
|
metrics = []
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ from torchtext.vocab import Vectors
|
||||||
import torch
|
import torch
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from data.supervised import get_supervised_embeddings
|
from data.supervised import get_supervised_embeddings
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
|
|
||||||
class PretrainedEmbeddings(ABC):
|
class PretrainedEmbeddings(ABC):
|
||||||
|
|
@ -157,16 +158,41 @@ class FastTextWikiNews(Vectors):
|
||||||
super(FastTextWikiNews, self).__init__(name, cache=cache, url=url, **kwargs)
|
super(FastTextWikiNews, self).__init__(name, cache=cache, url=url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# class EmbeddingsAligned(Vectors):
|
||||||
|
#
|
||||||
|
# def __init__(self, type, path, lang):
|
||||||
|
#
|
||||||
|
# self.name = '/embeddings/wiki.multi.{}.vec' if type == 'MUSE' else '/embeddings_polyFASTTEXT/wiki.{}.align.vec'
|
||||||
|
# # todo - rewrite as relative path
|
||||||
|
# self.cache_path = '/home/andreapdr/CLESA/embeddings' if type == 'MUSE' else '/home/andreapdr/CLESA/embeddings_polyFASTTEXT'
|
||||||
|
# self.path = path + self.name.format(lang)
|
||||||
|
# assert os.path.exists(path), f'pre-trained vectors not found in {path}'
|
||||||
|
# super(EmbeddingsAligned, self).__init__(self.path, cache=self.cache_path)
|
||||||
|
# # self.vectors = self.extract(voc)
|
||||||
|
#
|
||||||
|
# def vocabulary(self):
|
||||||
|
# return set(self.stoi.keys())
|
||||||
|
#
|
||||||
|
# def dim(self):
|
||||||
|
# return self.dim
|
||||||
|
#
|
||||||
|
# def extract(self, words):
|
||||||
|
# source_idx, target_idx = PretrainedEmbeddings.reindex(words, self.stoi)
|
||||||
|
# extraction = torch.zeros((len(words), self.dim))
|
||||||
|
# extraction[source_idx] = self.vectors[target_idx]
|
||||||
|
# return extraction
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingsAligned(Vectors):
|
class EmbeddingsAligned(Vectors):
|
||||||
|
|
||||||
def __init__(self, type, path, lang):
|
def __init__(self, type, path, lang, voc):
|
||||||
|
|
||||||
self.name = '/embeddings/wiki.multi.{}.vec' if type == 'MUSE' else '/embeddings_polyFASTTEXT/wiki.{}.align.vec'
|
|
||||||
# todo - rewrite as relative path
|
# todo - rewrite as relative path
|
||||||
|
self.name = '/embeddings/wiki.multi.{}.vec' if type == 'MUSE' else '/embeddings_polyFASTTEXT/wiki.{}.align.vec'
|
||||||
self.cache_path = '/home/andreapdr/CLESA/embeddings' if type == 'MUSE' else '/home/andreapdr/CLESA/embeddings_polyFASTTEXT'
|
self.cache_path = '/home/andreapdr/CLESA/embeddings' if type == 'MUSE' else '/home/andreapdr/CLESA/embeddings_polyFASTTEXT'
|
||||||
self.path = path + self.name.format(lang)
|
self.path = path + self.name.format(lang)
|
||||||
assert os.path.exists(path), f'pre-trained vectors not found in {path}'
|
assert os.path.exists(path), f'pre-trained vectors not found in {path}'
|
||||||
super(EmbeddingsAligned, self).__init__(self.path, cache=self.cache_path)
|
super(EmbeddingsAligned, self).__init__(self.path, cache=self.cache_path)
|
||||||
|
self.vectors = self.extract(voc)
|
||||||
|
|
||||||
def vocabulary(self):
|
def vocabulary(self):
|
||||||
return set(self.stoi.keys())
|
return set(self.stoi.keys())
|
||||||
|
|
@ -203,20 +229,69 @@ class FastTextMUSE(PretrainedEmbeddings):
|
||||||
return extraction
|
return extraction
|
||||||
|
|
||||||
|
|
||||||
def embedding_matrix(type, path, voc, lang):
|
class StorageEmbeddings:
|
||||||
vocabulary = np.asarray(list(zip(*sorted(voc.items(), key=lambda x:x[1])))[0])
|
def __init__(self, path):
|
||||||
|
self.path = path
|
||||||
|
self.lang_U = dict()
|
||||||
|
self.lang_S = dict()
|
||||||
|
|
||||||
print('[embedding matrix]')
|
def _add_embeddings_unsupervised(self, type, docs, vocs):
|
||||||
print(f'# [pretrained-matrix: {type} {lang}]')
|
for lang in docs.keys():
|
||||||
pretrained = EmbeddingsAligned(type, path, lang)
|
print(f'# [unsupervised-matrix {type}] for {lang}')
|
||||||
P = pretrained.extract(vocabulary).numpy()
|
voc = np.asarray(list(zip(*sorted(vocs[lang].items(), key=lambda x: x[1])))[0])
|
||||||
del pretrained
|
self.lang_U[lang] = EmbeddingsAligned(type, self.path, lang, voc).vectors
|
||||||
print(f'[embedding matrix done] of shape={P.shape}\n')
|
print(f'Matrix U (weighted sum) of shape {self.lang_U[lang].shape}\n')
|
||||||
|
return
|
||||||
|
|
||||||
return vocabulary, P
|
def _add_emebeddings_supervised(self, docs, labels, reduction, max_label_space):
|
||||||
|
for lang in docs.keys():
|
||||||
|
print(f'# [supervised-matrix] for {lang}')
|
||||||
|
# should also pass max_label_space and reduction techniques
|
||||||
|
self.lang_S[lang] = get_supervised_embeddings(docs[lang], labels[lang], reduction, max_label_space)
|
||||||
|
print(f'[embedding matrix done] of shape={self.lang_S[lang].shape}\n')
|
||||||
|
return
|
||||||
|
|
||||||
|
def _concatenate_embeddings(self, docs):
|
||||||
|
_r = dict()
|
||||||
|
for lang in self.lang_U.keys():
|
||||||
|
_r[lang] = np.hstack((docs[lang].dot(self.lang_U[lang]), docs[lang].dot(self.lang_S[lang])))
|
||||||
|
return _r
|
||||||
|
|
||||||
|
def fit(self, config, docs, vocs, labels):
|
||||||
|
if config['unsupervised']:
|
||||||
|
self._add_embeddings_unsupervised(config['we_type'], docs, vocs)
|
||||||
|
if config['supervised']:
|
||||||
|
self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'])
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, config, docs):
|
||||||
|
if config['supervised'] and config['unsupervised']:
|
||||||
|
return self._concatenate_embeddings(docs)
|
||||||
|
elif config['supervised']:
|
||||||
|
_r = dict()
|
||||||
|
for lang in docs.keys():
|
||||||
|
_r[lang] = docs[lang].dot(self.lang_S[lang])
|
||||||
|
else:
|
||||||
|
_r = dict()
|
||||||
|
for lang in docs.keys():
|
||||||
|
_r[lang] = docs[lang].dot(self.lang_U[lang])
|
||||||
|
return _r
|
||||||
|
|
||||||
|
|
||||||
def WCE_matrix(Xtr, Ytr, lang):
|
# def embedding_matrix(type, path, voc, lang):
|
||||||
|
# vocabulary = np.asarray(list(zip(*sorted(voc.items(), key=lambda x: x[1])))[0])
|
||||||
|
#
|
||||||
|
# print('[embedding matrix]')
|
||||||
|
# print(f'# [pretrained-matrix: {type} {lang}]')
|
||||||
|
# pretrained = EmbeddingsAligned(type, path, lang)
|
||||||
|
# P = pretrained.extract(vocabulary).numpy()
|
||||||
|
# del pretrained
|
||||||
|
# print(f'[embedding matrix done] of shape={P.shape}\n')
|
||||||
|
#
|
||||||
|
# return vocabulary, P
|
||||||
|
|
||||||
|
|
||||||
|
def WCE_matrix(Xtr, Ytr, lang, reduction=None, n_components=50):
|
||||||
print('\n# [supervised-matrix]')
|
print('\n# [supervised-matrix]')
|
||||||
S = get_supervised_embeddings(Xtr[lang], Ytr[lang])
|
S = get_supervised_embeddings(Xtr[lang], Ytr[lang])
|
||||||
print(f'[embedding matrix done] of shape={S.shape}\n')
|
print(f'[embedding matrix done] of shape={S.shape}\n')
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from data.tsr_function__ import get_supervised_matrix, get_tsr_matrix, information_gain, chi_square
|
from data.tsr_function__ import get_supervised_matrix, get_tsr_matrix, information_gain, chi_square
|
||||||
# from util.common import *
|
from sklearn.decomposition import PCA, TruncatedSVD
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.manifold import TSNE
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -40,7 +40,7 @@ def supervised_embeddings_tsr(X,Y, tsr_function=information_gain, max_documents=
|
||||||
return F
|
return F
|
||||||
|
|
||||||
|
|
||||||
def get_supervised_embeddings(X, Y, max_label_space=300, binary_structural_problems=-1, method='dotn', dozscore=True):
|
def get_supervised_embeddings(X, Y, reduction, max_label_space=300, binary_structural_problems=-1, method='dotn', dozscore=True):
|
||||||
print('computing supervised embeddings...')
|
print('computing supervised embeddings...')
|
||||||
|
|
||||||
nC = Y.shape[1]
|
nC = Y.shape[1]
|
||||||
|
|
@ -60,10 +60,21 @@ def get_supervised_embeddings(X, Y, max_label_space=300, binary_structural_probl
|
||||||
F = zscores(F, axis=0)
|
F = zscores(F, axis=0)
|
||||||
|
|
||||||
if nC > max_label_space:
|
if nC > max_label_space:
|
||||||
print(f'supervised matrix has more dimensions ({nC}) than the allowed limit {max_label_space}. '
|
if reduction == 'PCA':
|
||||||
f'Applying PCA(n_components={max_label_space})')
|
print(f'supervised matrix has more dimensions ({nC}) than the allowed limit {max_label_space}. '
|
||||||
pca = PCA(n_components=max_label_space)
|
f'Applying PCA(n_components={max_label_space})')
|
||||||
F = pca.fit(F).transform(F)
|
pca = PCA(n_components=max_label_space)
|
||||||
|
F = pca.fit(F).transform(F)
|
||||||
|
elif reduction == 'TSNE':
|
||||||
|
print(f'supervised matrix has more dimensions ({nC}) than the allowed limit {max_label_space}. '
|
||||||
|
f'Applying t-SNE(n_components={max_label_space})')
|
||||||
|
tsne = TSNE(n_components=max_label_space)
|
||||||
|
F = tsne.fit(F).fit_transform(F)
|
||||||
|
elif reduction == 'tSVD':
|
||||||
|
print(f'supervised matrix has more dimensions ({nC}) than the allowed limit {max_label_space}. '
|
||||||
|
f'Applying truncatedSVD(n_components={max_label_space})')
|
||||||
|
tSVD = TruncatedSVD(n_components=max_label_space)
|
||||||
|
F = tSVD.fit(F).fit_transform(F)
|
||||||
|
|
||||||
return F
|
return F
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
from data.embeddings import WordEmbeddings, embedding_matrix, WCE_matrix
|
from data.embeddings import WordEmbeddings, WCE_matrix, StorageEmbeddings
|
||||||
from scipy.sparse import issparse
|
from scipy.sparse import issparse
|
||||||
from sklearn.multiclass import OneVsRestClassifier
|
from sklearn.multiclass import OneVsRestClassifier
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
@ -458,8 +458,9 @@ class AndreaCLF(FunnellingPolylingualClassifier):
|
||||||
self.lang_word2idx = dict()
|
self.lang_word2idx = dict()
|
||||||
self.languages = []
|
self.languages = []
|
||||||
self.lang_tfidf = {}
|
self.lang_tfidf = {}
|
||||||
self.word_embeddings = {}
|
# self.word_embeddings = {}
|
||||||
self.supervised_embeddings = {}
|
# self.supervised_embeddings = {}
|
||||||
|
self.embedding_space = None
|
||||||
self.model = None
|
self.model = None
|
||||||
self.time = None
|
self.time = None
|
||||||
|
|
||||||
|
|
@ -492,42 +493,42 @@ class AndreaCLF(FunnellingPolylingualClassifier):
|
||||||
|
|
||||||
return lZ, lYtr
|
return lZ, lYtr
|
||||||
|
|
||||||
def embed(self, lX, ly, unsupervised=False, supervised=False, prediction=False):
|
# def embed(self, lX, ly, unsupervised=False, supervised=False, prediction=False):
|
||||||
"""
|
# """
|
||||||
build embedding matrix for given language and returns its weighted sum wrt tf-idf score
|
# build embedding matrix for given language and returns its weighted sum wrt tf-idf score
|
||||||
"""
|
# """
|
||||||
_r = dict()
|
# _r = dict()
|
||||||
languages = list(lX.keys())
|
# languages = list(lX.keys())
|
||||||
|
#
|
||||||
if prediction:
|
# if prediction:
|
||||||
for lang in languages:
|
# for lang in languages:
|
||||||
if unsupervised: # If unsupervised embeddings ...
|
# if unsupervised: # If unsupervised embeddings ...
|
||||||
M = self.word_embeddings[lang]
|
# M = self.word_embeddings[lang]
|
||||||
if supervised: # and also unsupervised --> get both (M) and (S) weighted sum matrices and hstack them
|
# if supervised: # and also unsupervised --> get both (M) and (S) weighted sum matrices and hstack them
|
||||||
S = self.supervised_embeddings[lang]
|
# S = self.supervised_embeddings[lang]
|
||||||
_r[lang] = np.hstack((lX[lang].dot(M), lX[lang].dot(S)))
|
# _r[lang] = np.hstack((lX[lang].dot(M), lX[lang].dot(S)))
|
||||||
continue
|
# continue
|
||||||
_r[lang] = lX[lang].dot(M) # if not supervised --> just get weighted sum of unsupervised (M) embeddings
|
# _r[lang] = lX[lang].dot(M) # if not supervised --> just get weighted sum of unsupervised (M) embeddings
|
||||||
else: # If not unsupervised --> get (S) matrix and its weighted sum
|
# else: # If not unsupervised --> get (S) matrix and its weighted sum
|
||||||
S = self.supervised_embeddings[lang]
|
# S = self.supervised_embeddings[lang]
|
||||||
_r[lang] = lX[lang].dot(S)
|
# _r[lang] = lX[lang].dot(S)
|
||||||
return _r
|
# return _r
|
||||||
|
#
|
||||||
if unsupervised:
|
# if unsupervised:
|
||||||
for lang in languages:
|
# for lang in languages:
|
||||||
_, M = embedding_matrix(self.config['we_type'], self.we_path, self.lang_word2idx[lang], lang)
|
# _, M = embedding_matrix(self.config['we_type'], self.we_path, self.lang_word2idx[lang], lang)
|
||||||
self.word_embeddings[lang] = M
|
# self.word_embeddings[lang] = M
|
||||||
_r[lang] = lX[lang].dot(M)
|
# _r[lang] = lX[lang].dot(M)
|
||||||
|
#
|
||||||
if supervised:
|
# if supervised:
|
||||||
for lang in languages:
|
# for lang in languages:
|
||||||
S = WCE_matrix(lX, ly, lang)
|
# S = WCE_matrix(lX, ly, lang)
|
||||||
self.supervised_embeddings[lang] = S
|
# self.supervised_embeddings[lang] = S
|
||||||
if unsupervised:
|
# if unsupervised:
|
||||||
_r[lang] = np.hstack((_r[lang], lX[lang].dot(S)))
|
# _r[lang] = np.hstack((_r[lang], lX[lang].dot(S)))
|
||||||
else:
|
# else:
|
||||||
_r[lang] = lX[lang].dot(S)
|
# _r[lang] = lX[lang].dot(S)
|
||||||
return _r
|
# return _r
|
||||||
|
|
||||||
# @override std class method
|
# @override std class method
|
||||||
def fit(self, lX, ly):
|
def fit(self, lX, ly):
|
||||||
|
|
@ -541,17 +542,11 @@ class AndreaCLF(FunnellingPolylingualClassifier):
|
||||||
Z, zy = self._get_zspace(lX, ly)
|
Z, zy = self._get_zspace(lX, ly)
|
||||||
|
|
||||||
if self.config['supervised'] or self.config['unsupervised']:
|
if self.config['supervised'] or self.config['unsupervised']:
|
||||||
# Z vectors is concatenated with doc's embedding weighted sum
|
self.embedding_space = StorageEmbeddings(self.we_path).fit(self.config, lX, self.lang_word2idx, ly)
|
||||||
Z_embedded = dict()
|
_embedding_space = self.embedding_space.predict(self.config, lX)
|
||||||
l_weighted_em = self.embed(lX, ly,
|
# h_stacking posterior probabilities with (U) and/or (S) matrices
|
||||||
unsupervised=self.config['unsupervised'],
|
for lang in self.languages:
|
||||||
supervised=self.config['supervised'])
|
Z[lang] = np.hstack((Z[lang], _embedding_space[lang]))
|
||||||
|
|
||||||
# stacking Z space horizontally with unsupervised (M) and/or supervised (F) embeddings
|
|
||||||
for lang in list(lX.keys()):
|
|
||||||
Z_embedded[lang] = np.hstack((Z[lang], l_weighted_em[lang]))
|
|
||||||
Z = Z_embedded
|
|
||||||
|
|
||||||
|
|
||||||
# stacking Z space vertically
|
# stacking Z space vertically
|
||||||
_vertical_Z = np.vstack([Z[lang] for lang in self.languages])
|
_vertical_Z = np.vstack([Z[lang] for lang in self.languages])
|
||||||
|
|
@ -573,14 +568,15 @@ class AndreaCLF(FunnellingPolylingualClassifier):
|
||||||
lZ = self._projection(self.doc_projector, lX)
|
lZ = self._projection(self.doc_projector, lX)
|
||||||
|
|
||||||
if self.config['supervised'] or self.config['unsupervised']:
|
if self.config['supervised'] or self.config['unsupervised']:
|
||||||
l_weighted_em = self.embed(lX, ly,
|
_embedding_space = self.embedding_space.predict(self.config, lX)
|
||||||
unsupervised=self.config['unsupervised'],
|
# l_weighted_em = self.embed(lX, ly,
|
||||||
supervised=self.config['supervised'],
|
# unsupervised=self.config['unsupervised'],
|
||||||
prediction=True)
|
# supervised=self.config['supervised'],
|
||||||
Z_embedded = dict()
|
# prediction=True)
|
||||||
|
# Z_embedded = dict()
|
||||||
for lang in lX.keys():
|
for lang in lX.keys():
|
||||||
Z_embedded[lang] = np.hstack((lZ[lang], l_weighted_em[lang]))
|
lZ[lang] = np.hstack((lZ[lang], _embedding_space[lang]))
|
||||||
lZ = Z_embedded
|
# lZ = Z_embedded
|
||||||
|
|
||||||
for lang in lZ.keys():
|
for lang in lZ.keys():
|
||||||
print(lZ[lang].shape)
|
print(lZ[lang].shape)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ class StandardizeTransformer:
|
||||||
self.std = np.clip(std, 1e-5, None)
|
self.std = np.clip(std, 1e-5, None)
|
||||||
self.mean = np.mean(X, axis=self.axis)
|
self.mean = np.mean(X, axis=self.axis)
|
||||||
self.yetfit=True
|
self.yetfit=True
|
||||||
print('done')
|
print('done\n')
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue