from gfun.vgfs.viewGen import ViewGen from gfun.vgfs.learners.svms import NaivePolylingualClassifier from gfun.vgfs.commons import _normalize class VanillaFunGen(ViewGen): """ View Generator (x): original funnelling architecture proposed by Moreo, Esuli and Sebastiani in DOI: https://doi.org/10.1145/3326065 """ def __init__(self, base_learner, n_jobs=-1): """ Init Posterior Probabilities embedder (i.e., VanillaFunGen) :param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to return posterior probabilities. :param base_learner: :param n_jobs: integer, number of concurrent workers """ print("- init VanillaFun View Generating Function") self.learners = base_learner self.n_jobs = n_jobs self.doc_projector = NaivePolylingualClassifier( base_learner=self.learners, n_jobs=self.n_jobs, ) self.vectorizer = None self.load_trained = False def fit(self, lX, lY): if self.load_trained: return self.load_trained() print("- fitting VanillaFun View Generating Function") lX = self.vectorizer.transform(lX) self.doc_projector.fit(lX, lY) return self def transform(self, lX): """ (1) Vectorize documents; (2) Project them according to the learners SVMs; (3) Apply L2 normalization to the projection and returns it. :param lX: dict {lang: indexed documents} :return: document projection to the common latent space. """ lX = self.vectorizer.transform(lX) lZ = self.doc_projector.predict_proba(lX) lZ = _normalize(lZ, l2=True) return lZ def fit_transform(self, lX, lY): return self.fit(lX, lY).transform(lX) def save_vgf(self, model_id): import pickle from os.path import join from os import makedirs vgf_name = "vanillaFunGen" _basedir = join("models", "vgfs", "posterior") makedirs(_basedir, exist_ok=True) _path = join(_basedir, f"{vgf_name}_{model_id}.pkl") with open(_path, "wb") as f: pickle.dump(self, f) return self def __str__(self): _str = f"[VanillaFunGen (-p)]\n- base learner: {self.learners}\n- n_jobs: {self.n_jobs}\n" # - parameters: {self.first_tier_parameters} return _str