diff --git a/refactor/data/datamodule.py b/refactor/data/datamodule.py
index 1121a58..da6ec92 100644
--- a/refactor/data/datamodule.py
+++ b/refactor/data/datamodule.py
@@ -88,14 +88,21 @@ class RecurrentDataset(Dataset):
 
 
 class RecurrentDataModule(pl.LightningDataModule):
-    def __init__(self, multilingualIndex, batchsize=64):
+    """
+    Pytorch Lightning Datamodule to be deployed with RecurrentGen.
+    https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
+    """
+    def __init__(self, multilingualIndex, batchsize=64, n_jobs=-1):
         """
-        Pytorch-lightning DataModule: https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
-        :param multilingualIndex:
-        :param batchsize:
+        Init RecurrentDataModule.
+        :param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
+        indexed by language code.
+        :param batchsize: int, number of sample per batch.
+        :param n_jobs: int, number of concurrent workers to be deployed (i.e., parallelizing data loading).
         """
         self.multilingualIndex = multilingualIndex
         self.batchsize = batchsize
+        self.n_jobs = n_jobs
         super().__init__()
 
     def prepare_data(self, *args, **kwargs):
@@ -128,15 +135,15 @@ class RecurrentDataModule(pl.LightningDataModule):
                                                  lPad_index=self.multilingualIndex.l_pad())
 
     def train_dataloader(self):
-        return DataLoader(self.training_dataset, batch_size=self.batchsize, num_workers=N_WORKERS,
+        return DataLoader(self.training_dataset, batch_size=self.batchsize, num_workers=self.n_jobs,
                           collate_fn=self.training_dataset.collate_fn)
 
     def val_dataloader(self):
-        return DataLoader(self.val_dataset, batch_size=self.batchsize, num_workers=N_WORKERS,
+        return DataLoader(self.val_dataset, batch_size=self.batchsize, num_workers=self.n_jobs,
                           collate_fn=self.val_dataset.collate_fn)
 
     def test_dataloader(self):
-        return DataLoader(self.test_dataset, batch_size=self.batchsize, num_workers=N_WORKERS,
+        return DataLoader(self.test_dataset, batch_size=self.batchsize, num_workers=self.n_jobs,
                           collate_fn=self.test_dataset.collate_fn)
 
 
@@ -156,7 +163,18 @@ def tokenize(l_raw, max_len):
 
 
 class BertDataModule(RecurrentDataModule):
+    """
+    Pytorch Lightning Datamodule to be deployed with BertGen.
+    https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
+    """
     def __init__(self, multilingualIndex, batchsize=64, max_len=512):
+        """
+        Init BertDataModule.
+        :param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
+        indexed by language code.
+        :param batchsize: int, number of sample per batch.
+        :param max_len: int, max number of token per document. Absolute cap is 512.
+        """
         super().__init__(multilingualIndex, batchsize)
         self.max_len = max_len
 
diff --git a/refactor/funnelling.py b/refactor/funnelling.py
index 4d19e1a..812a937 100644
--- a/refactor/funnelling.py
+++ b/refactor/funnelling.py
@@ -4,9 +4,13 @@ from view_generators import VanillaFunGen
 
 
 class DocEmbedderList:
+    """
+    Class that takes care of calling fit and transform function for every init embedder. Every ViewGenerator should be
+    contained by this class in order to seamlessly train the overall architecture.
+    """
     def __init__(self, embedder_list, probabilistic=True):
         """
-        Class that takes care of calling fit and transform function for every init embedder.
+        Init the DocEmbedderList.
         :param embedder_list: list of embedders to be deployed
         :param probabilistic: whether to recast view generators output to vectors of posterior probabilities or not
         """
@@ -23,11 +27,22 @@ class DocEmbedderList:
         self.embedders = _tmp
 
     def fit(self, lX, ly):
+        """
+        Fit all the ViewGenerators contained by DocEmbedderList.
+        :param lX:
+        :param ly:
+        :return: self
+        """
         for embedder in self.embedders:
             embedder.fit(lX, ly)
         return self
 
     def transform(self, lX):
+        """
+        Project documents by means of every ViewGenerators. Projections are then averaged together and returned.
+        :param lX:
+        :return: common latent space (averaged).
+        """
         langs = sorted(lX.keys())
         lZparts = {lang: None for lang in langs}
 
@@ -40,14 +55,24 @@ class DocEmbedderList:
                 else:
                     lZparts[lang] += Z
         n_embedders = len(self.embedders)
-        return {lang: lZparts[lang]/n_embedders for lang in langs}
+        return {lang: lZparts[lang]/n_embedders for lang in langs}  # Averaging feature spaces
 
     def fit_transform(self, lX, ly):
         return self.fit(lX, ly).transform(lX)
 
 
 class FeatureSet2Posteriors:
+    """
+    Takes care of recasting features outputted by the embedders to vecotrs of posterior probabilities by means of
+    a multiclass SVM.
+    """
     def __init__(self, embedder, l2=True, n_jobs=-1):
+        """
+        Init the class.
+        :param embedder: ViewGen, view generators which does not natively outputs posterior probabilities.
+        :param l2: bool, whether to apply or not L2 normalization to the projection
+        :param n_jobs: int, number of concurrent workers.
+        """
         self.embedder = embedder
         self.l2 = l2
         self.n_jobs = n_jobs
@@ -77,6 +102,11 @@ class FeatureSet2Posteriors:
 
 
 class Funnelling:
+    """
+    Funnelling Architecture. It is composed by two tiers. The first-tier is a set of heterogeneous document embedders.
+    The second-tier (i.e., the metaclassifier), operates the classification of the common latent space computed by
+    the first-tier learners.
+    """
     def __init__(self, first_tier: DocEmbedderList, meta_classifier: MetaClassifier, n_jobs=-1):
         self.first_tier = first_tier
         self.meta = meta_classifier
diff --git a/refactor/main.py b/refactor/main.py
index 48936d0..ebc43a3 100644
--- a/refactor/main.py
+++ b/refactor/main.py
@@ -26,6 +26,7 @@ def main(args):
         lMuse = MuseLoader(langs=sorted(lX.keys()), cache=args.muse_dir)
         multilingualIndex.index(lX, ly, lXte, lyte, l_pretrained_vocabulary=lMuse.vocabulary())
 
+    # Init ViewGenerators and append them to embedder_list
     embedder_list = []
     if args.post_embedder:
         posteriorEmbedder = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=args.n_jobs)
diff --git a/refactor/view_generators.py b/refactor/view_generators.py
index 6cdd4a9..384ec76 100644
--- a/refactor/view_generators.py
+++ b/refactor/view_generators.py
@@ -30,6 +30,10 @@ from util.embeddings_manager import MuseLoader, XdotM, wce_matrix
 
 
 class ViewGen(ABC):
+    """
+    Abstract class for ViewGenerators implementations. Every ViewGen should implement these three methods in order to
+    be seamlessly integrated in the overall architecture.
+    """
     @abstractmethod
     def fit(self, lX, ly):
         pass
@@ -44,9 +48,13 @@ class ViewGen(ABC):
 
 
 class VanillaFunGen(ViewGen):
+    """
+    View Generator (x): original funnelling architecture proposed by Moreo, Esuli and
+    Sebastiani in DOI: https://doi.org/10.1145/3326065
+    """
     def __init__(self, base_learner, first_tier_parameters=None, n_jobs=-1):
         """
-        Original funnelling architecture proposed by Moreo, Esuli and Sebastiani in DOI: https://doi.org/10.1145/3326065
+        Init Posterior Probabilities embedder (i.e., VanillaFunGen)
         :param base_learner: naive monolingual learners to be deployed as first-tier learners. Should be able to
         return posterior probabilities.
         :param base_learner:
@@ -68,11 +76,10 @@ class VanillaFunGen(ViewGen):
 
     def transform(self, lX):
         """
-        (1) Vectorize documents
-        (2) Project them according to the learners SVMs
-        (3) Apply L2 normalization to the projection
-        :param lX:
-        :return:
+        (1) Vectorize documents; (2) Project them according to the learners SVMs, finally (3) Apply L2 normalization
+        to the projection and returns it.
+        :param lX: dict {lang: indexed documents}
+        :return: document projection to the common latent space.
         """
         lX = self.vectorizer.transform(lX)
         lZ = self.doc_projector.predict_proba(lX)
@@ -84,10 +91,13 @@ class VanillaFunGen(ViewGen):
 
 
 class MuseGen(ViewGen):
+    """
+    View Generator (m): generates document representation via MUSE embeddings (Fasttext multilingual word
+    embeddings). Document embeddings are obtained via weighted sum of document's constituent embeddings.
+    """
     def __init__(self, muse_dir='../embeddings', n_jobs=-1):
         """
-        generates document representation via MUSE embeddings (Fasttext multilingual word
-        embeddings). Document embeddings are obtained via weighted sum of document's constituent embeddings.
+        Init the MuseGen.
         :param muse_dir: string, path to folder containing muse embeddings
         :param n_jobs: int, number of concurrent workers
         """
@@ -99,6 +109,12 @@ class MuseGen(ViewGen):
         self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
 
     def fit(self, lX, ly):
+        """
+        (1) Vectorize documents; (2) Load muse embeddings for words encountered while vectorizing.
+        :param lX: dict {lang: indexed documents}
+        :param ly: dict {lang: target vectors}
+        :return: self.
+        """
         print('# Fitting MuseGen (M)...')
         self.vectorizer.fit(lX)
         self.langs = sorted(lX.keys())
@@ -109,6 +125,12 @@ class MuseGen(ViewGen):
         return self
 
     def transform(self, lX):
+        """
+        (1) Vectorize documents; (2) computes the weighted sum of MUSE embeddings found at document level,
+         finally (3) Apply L2 normalization embedding and returns it.
+        :param lX: dict {lang: indexed documents}
+        :return: document projection to the common latent space.
+        """
         lX = self.vectorizer.transform(lX)
         XdotMUSE = Parallel(n_jobs=self.n_jobs)(
             delayed(XdotM)(lX[lang], self.lMuse[lang], sif=True) for lang in self.langs)
@@ -121,10 +143,13 @@ class MuseGen(ViewGen):
 
 
 class WordClassGen(ViewGen):
+    """
+    View Generator (w): generates document representation via Word-Class-Embeddings.
+    Document embeddings are obtained via weighted sum of document's constituent embeddings.
+    """
     def __init__(self, n_jobs=-1):
         """
-        generates document representation via Word-Class-Embeddings.
-        Document embeddings are obtained via weighted sum of document's constituent embeddings.
+        Init WordClassGen.
         :param n_jobs: int, number of concurrent workers
         """
         super().__init__()
@@ -134,6 +159,12 @@ class WordClassGen(ViewGen):
         self.vectorizer = TfidfVectorizerMultilingual(sublinear_tf=True, use_idf=True)
 
     def fit(self, lX, ly):
+        """
+        (1) Vectorize documents; (2) Load muse embeddings for words encountered while vectorizing.
+        :param lX: dict {lang: indexed documents}
+        :param ly: dict {lang: target vectors}
+        :return: self.
+        """
         print('# Fitting WordClassGen (W)...')
         lX = self.vectorizer.fit_transform(lX)
         self.langs = sorted(lX.keys())
@@ -144,6 +175,12 @@ class WordClassGen(ViewGen):
         return self
 
     def transform(self, lX):
+        """
+        (1) Vectorize documents; (2) computes the weighted sum of Word-Class Embeddings found at document level,
+         finally (3) Apply L2 normalization embedding and returns it.
+        :param lX: dict {lang: indexed documents}
+        :return: document projection to the common latent space.
+        """
         lX = self.vectorizer.transform(lX)
         XdotWce = Parallel(n_jobs=self.n_jobs)(
             delayed(XdotM)(lX[lang], self.lWce[lang], sif=True) for lang in self.langs)
@@ -156,17 +193,28 @@ class WordClassGen(ViewGen):
 
 
 class RecurrentGen(ViewGen):
+    """
+    View Generator (G): generates document embedding by means of a Gated Recurrent Units. The model can be
+    initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
+    Output dimension is (n_docs, 512). The training will happen end-to-end. At inference time, the model returns
+    the network internal state at the second feed-forward layer level. Training metrics are logged via TensorBoard.
+    """
     def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
                  gpus=0, n_jobs=-1, stored_path=None):
         """
-        generates document embedding by means of a Gated Recurrent Units. The model can be
-        initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
-        Output dimension is (n_docs, 512).
-        :param multilingualIndex:
-        :param pretrained_embeddings:
-        :param wce:
-        :param gpus:
-        :param n_jobs:
+        Init RecurrentGen.
+        :param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
+        indexed by language code.
+        :param pretrained_embeddings: dict {lang: tensor of embeddings}, it contains the pretrained embeddings to use
+        as embedding layer.
+        :param wce: Bool, whether to deploy Word-Class Embeddings (as proposed by A. Moreo). If True, supervised
+        embeddings are concatenated to the deployed supervised embeddings. WCE dimensionality is equal to
+        the number of target classes.
+        :param batch_size: int, number of samples in a batch.
+        :param nepochs: int, number of max epochs to train the model.
+        :param gpus: int,  specifies how many GPUs to use per node. If False computation will take place on cpu.
+        :param n_jobs: int, number of concurrent workers (i.e., parallelizing data loading).
+        :param stored_path: str, path to a pretrained model. If None the model will be trained from scratch.
         """
         super().__init__()
         self.multilingualIndex = multilingualIndex
@@ -212,14 +260,15 @@ class RecurrentGen(ViewGen):
 
     def fit(self, lX, ly):
         """
+        Train the Neural Network end-to-end.
         lX and ly are not directly used. We rather get them from the multilingual index used in the instantiation
         of the Dataset object (RecurrentDataset) in the GfunDataModule class.
-        :param lX:
-        :param ly:
-        :return:
+        :param lX: dict {lang: indexed documents}
+        :param ly: dict {lang: target vectors}
+        :return: self.
         """
         print('# Fitting RecurrentGen (G)...')
-        recurrentDataModule = RecurrentDataModule(self.multilingualIndex, batchsize=self.batch_size)
+        recurrentDataModule = RecurrentDataModule(self.multilingualIndex, batchsize=self.batch_size, n_jobs=self.n_jobs)
         trainer = Trainer(gradient_clip_val=1e-1, gpus=self.gpus, logger=self.logger, max_epochs=self.nepochs,
                           checkpoint_callback=False)
 
@@ -236,9 +285,9 @@ class RecurrentGen(ViewGen):
 
     def transform(self, lX):
         """
-        Project documents to the common latent space
-        :param lX:
-        :return:
+        Project documents to the common latent space. Output dimensionality is 512.
+        :param lX: dict {lang: indexed documents}
+        :return: documents projected to the common latent space.
         """
         l_pad = self.multilingualIndex.l_pad()
         data = self.multilingualIndex.l_devel_index()
@@ -255,7 +304,22 @@ class RecurrentGen(ViewGen):
 
 
 class BertGen(ViewGen):
+    """
+    View Generator (b):  generates document embedding via Bert model. The training happens end-to-end.
+    At inference time, the model returns the network internal state at the last original layer (i.e. 12th). Document
+    embeddings are the state associated with the "start" token. Training metrics are logged via TensorBoard.
+    """
     def __init__(self, multilingualIndex, batch_size=128, nepochs=50, gpus=0, n_jobs=-1, stored_path=None):
+        """
+        Init Bert model
+        :param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
+        indexed by language code.
+        :param batch_size: int, number of samples per batch.
+        :param nepochs: int, number of max epochs to train the model.
+        :param gpus: int,  specifies how many GPUs to use per node. If False computation will take place on cpu.
+        :param n_jobs: int, number of concurrent workers.
+        :param stored_path: str, path to a pretrained model. If None the model will be trained from scratch.
+        """
         super().__init__()
         self.multilingualIndex = multilingualIndex
         self.nepochs = nepochs
@@ -271,6 +335,14 @@ class BertGen(ViewGen):
         return BertModel(output_size=output_size, stored_path=self.stored_path, gpus=self.gpus)
 
     def fit(self, lX, ly):
+        """
+        Train the Neural Network end-to-end.
+        lX and ly are not directly used. We rather get them from the multilingual index used in the instantiation
+        of the Dataset object (RecurrentDataset) in the GfunDataModule class.
+        :param lX: dict {lang: indexed documents}
+        :param ly: dict {lang: target vectors}
+        :return: self.
+        """
         print('# Fitting BertGen (M)...')
         self.multilingualIndex.train_val_split(val_prop=0.2, max_val=2000, seed=1)
         bertDataModule = BertDataModule(self.multilingualIndex, batchsize=self.batch_size, max_len=512)
@@ -281,7 +353,11 @@ class BertGen(ViewGen):
         return self
 
     def transform(self, lX):
-        # lX is raw text data. It has to be first indexed via Bert Tokenizer.
+        """
+        Project documents to the common latent space. Output dimensionality is 768.
+        :param lX: dict {lang: indexed documents}
+        :return: documents projected to the common latent space.
+        """
         data = self.multilingualIndex.l_devel_raw_index()
         data = tokenize(data, max_len=512)
         self.model.to('cuda' if self.gpus else 'cpu')