Implemented funnelling architecture
This commit is contained in:
parent
a5af2134bf
commit
108f423d41
|
|
@ -1,6 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.decomposition import TruncatedSVD
|
from sklearn.decomposition import TruncatedSVD
|
||||||
|
|
||||||
|
|
||||||
def get_weighted_average(We, x, w):
|
def get_weighted_average(We, x, w):
|
||||||
"""
|
"""
|
||||||
Compute the weighted average vectors
|
Compute the weighted average vectors
|
||||||
|
|
@ -15,6 +16,7 @@ def get_weighted_average(We, x, w):
|
||||||
emb[i,:] = w[i,:].dot(We[x[i,:],:]) / np.count_nonzero(w[i,:])
|
emb[i,:] = w[i,:].dot(We[x[i,:],:]) / np.count_nonzero(w[i,:])
|
||||||
return emb
|
return emb
|
||||||
|
|
||||||
|
|
||||||
def compute_pc(X,npc=1):
|
def compute_pc(X,npc=1):
|
||||||
"""
|
"""
|
||||||
Compute the principal components.
|
Compute the principal components.
|
||||||
|
|
@ -26,6 +28,7 @@ def compute_pc(X,npc=1):
|
||||||
svd.fit(X)
|
svd.fit(X)
|
||||||
return svd.components_
|
return svd.components_
|
||||||
|
|
||||||
|
|
||||||
def remove_pc(X, npc=1):
|
def remove_pc(X, npc=1):
|
||||||
"""
|
"""
|
||||||
Remove the projection on the principal components
|
Remove the projection on the principal components
|
||||||
|
|
@ -34,7 +37,7 @@ def remove_pc(X, npc=1):
|
||||||
:return: XX[i, :] is the data point after removing its projection
|
:return: XX[i, :] is the data point after removing its projection
|
||||||
"""
|
"""
|
||||||
pc = compute_pc(X, npc)
|
pc = compute_pc(X, npc)
|
||||||
if npc==1:
|
if npc == 1:
|
||||||
XX = X - X.dot(pc.transpose()) * pc
|
XX = X - X.dot(pc.transpose()) * pc
|
||||||
else:
|
else:
|
||||||
XX = X - X.dot(pc.transpose()).dot(pc)
|
XX = X - X.dot(pc.transpose()).dot(pc)
|
||||||
|
|
|
||||||
|
|
@ -368,4 +368,4 @@ def get_params(optimc=False):
|
||||||
return None
|
return None
|
||||||
c_range = [1e4, 1e3, 1e2, 1e1, 1, 1e-1]
|
c_range = [1e4, 1e3, 1e2, 1e1, 1, 1e-1]
|
||||||
kernel = 'rbf'
|
kernel = 'rbf'
|
||||||
return [{'kernel': [kernel], 'C': c_range, 'gamma':['auto']}]
|
return [{'kernel': [kernel], 'C': c_range, 'gamma':['auto']}]
|
||||||
|
|
|
||||||
|
|
@ -41,17 +41,20 @@ class ViewGen(ABC):
|
||||||
|
|
||||||
|
|
||||||
class VanillaFunGen(ViewGen):
|
class VanillaFunGen(ViewGen):
|
||||||
def __init__(self, base_learner, n_jobs=-1):
|
def __init__(self, base_learner, first_tier_parameters=None, n_jobs=-1):
|
||||||
"""
|
"""
|
||||||
Original funnelling architecture proposed by Moreo, Esuli and Sebastiani in DOI: https://doi.org/10.1145/3326065
|
Original funnelling architecture proposed by Moreo, Esuli and Sebastiani in DOI: https://doi.org/10.1145/3326065
|
||||||
:param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to
|
:param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to
|
||||||
return posterior probabilities.
|
return posterior probabilities.
|
||||||
|
:param base_learner:
|
||||||
:param n_jobs: integer, number of concurrent workers
|
:param n_jobs: integer, number of concurrent workers
|
||||||
"""
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.learners = base_learner
|
self.learners = base_learner
|
||||||
|
self.first_tier_parameters = first_tier_parameters
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
self.doc_projector = NaivePolylingualClassifier(self.learners)
|
self.doc_projector = NaivePolylingualClassifier(base_learner=self.learners,
|
||||||
|
parameters=self.first_tier_parameters, n_jobs=self.n_jobs)
|
||||||
self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
|
self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
|
||||||
|
|
||||||
def fit(self, lX, lY):
|
def fit(self, lX, lY):
|
||||||
|
|
@ -61,8 +64,16 @@ class VanillaFunGen(ViewGen):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def transform(self, lX):
|
def transform(self, lX):
|
||||||
|
"""
|
||||||
|
(1) Vectorize documents
|
||||||
|
(2) Project them according to the learners SVMs
|
||||||
|
(3) Apply L2 normalization to the projection
|
||||||
|
:param lX:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
lX = self.vectorizer.transform(lX)
|
lX = self.vectorizer.transform(lX)
|
||||||
lZ = self.doc_projector.predict_proba(lX)
|
lZ = self.doc_projector.predict_proba(lX)
|
||||||
|
lZ = _normalize(lZ, l2=True)
|
||||||
return lZ
|
return lZ
|
||||||
|
|
||||||
def fit_transform(self, lX, ly):
|
def fit_transform(self, lX, ly):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue