From 108f423d415c79e0e58b306f66ad191d85ec640d Mon Sep 17 00:00:00 2001
From: andrea <andrea.pdr@hotmail.it>
Date: Tue, 26 Jan 2021 10:15:55 +0100
Subject: [PATCH] Implemented funnelling architecture

---
 refactor/util/SIF_embed.py  |  5 ++++-
 refactor/util/common.py     |  2 +-
 refactor/view_generators.py | 15 +++++++++++++--
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/refactor/util/SIF_embed.py b/refactor/util/SIF_embed.py
index cfe096e..4a3d712 100644
--- a/refactor/util/SIF_embed.py
+++ b/refactor/util/SIF_embed.py
@@ -1,6 +1,7 @@
 import numpy as np
 from sklearn.decomposition import TruncatedSVD
 
+
 def get_weighted_average(We, x, w):
     """
     Compute the weighted average vectors
@@ -15,6 +16,7 @@ def get_weighted_average(We, x, w):
         emb[i,:] = w[i,:].dot(We[x[i,:],:]) / np.count_nonzero(w[i,:])
     return emb
 
+
 def compute_pc(X,npc=1):
     """
     Compute the principal components.
@@ -26,6 +28,7 @@ def compute_pc(X,npc=1):
     svd.fit(X)
     return svd.components_
 
+
 def remove_pc(X, npc=1):
     """
     Remove the projection on the principal components
@@ -34,7 +37,7 @@ def remove_pc(X, npc=1):
     :return: XX[i, :] is the data point after removing its projection
     """
     pc = compute_pc(X, npc)
-    if npc==1:
+    if npc == 1:
         XX = X - X.dot(pc.transpose()) * pc
     else:
         XX = X - X.dot(pc.transpose()).dot(pc)
diff --git a/refactor/util/common.py b/refactor/util/common.py
index 3ffda78..a624528 100644
--- a/refactor/util/common.py
+++ b/refactor/util/common.py
@@ -368,4 +368,4 @@ def get_params(optimc=False):
         return None
     c_range = [1e4, 1e3, 1e2, 1e1, 1, 1e-1]
     kernel = 'rbf'
-    return [{'kernel': [kernel], 'C': c_range, 'gamma':['auto']}]
\ No newline at end of file
+    return [{'kernel': [kernel], 'C': c_range, 'gamma':['auto']}]
diff --git a/refactor/view_generators.py b/refactor/view_generators.py
index 579b8f1..2d82a20 100644
--- a/refactor/view_generators.py
+++ b/refactor/view_generators.py
@@ -41,17 +41,20 @@ class ViewGen(ABC):
 
 
 class VanillaFunGen(ViewGen):
-    def __init__(self, base_learner, n_jobs=-1):
+    def __init__(self, base_learner, first_tier_parameters=None, n_jobs=-1):
         """
         Original funnelling architecture proposed by Moreo, Esuli and Sebastiani in DOI: https://doi.org/10.1145/3326065
         :param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to
         return posterior probabilities.
+        :param base_learner:
         :param n_jobs: integer, number of concurrent workers
         """
         super().__init__()
         self.learners = base_learner
+        self.first_tier_parameters = first_tier_parameters
         self.n_jobs = n_jobs
-        self.doc_projector = NaivePolylingualClassifier(self.learners)
+        self.doc_projector = NaivePolylingualClassifier(base_learner=self.learners,
+                                                        parameters=self.first_tier_parameters, n_jobs=self.n_jobs)
         self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
 
     def fit(self, lX, lY):
@@ -61,8 +64,16 @@ class VanillaFunGen(ViewGen):
         return self
 
     def transform(self, lX):
+        """
+        (1) Vectorize documents
+        (2) Project them according to the learners SVMs
+        (3) Apply L2 normalization to the projection
+        :param lX:
+        :return:
+        """
         lX = self.vectorizer.transform(lX)
         lZ = self.doc_projector.predict_proba(lX)
+        lZ = _normalize(lZ, l2=True)
         return lZ
 
     def fit_transform(self, lX, ly):