diff --git a/.gitignore b/.gitignore
index 37dc9c4..949775a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 /commentaries/*.xml
 /commentaries/*.xsd
 /commentaries/*.zip
+/entity_linker/knowledge_base/*.pickle
 
 # User-specific stuff
 .idea/**/workspace.xml
diff --git a/entity_linker/KB_builder.py b/entity_linker/KB_builder.py
index 4836f9f..6518f2c 100644
--- a/entity_linker/KB_builder.py
+++ b/entity_linker/KB_builder.py
@@ -39,18 +39,23 @@ def testing_wikidata(entity_q):
     entity = client.get(entity_q, load=True)
     notable_work = client.get('P800')
     present_in_work = client.get('P1441')
-    # date_of_birth = client.get('P569')
-    # birth = entity.get(date_of_birth)   # TODO: debug this
+    date_of_birth = client.get('P569')
+
     aut_names = entity.label.texts
     _works = entity.get(notable_work)
     _present_in_work = entity.get(present_in_work)
+    _birth = entity.get(date_of_birth)
+
     if _works is not None:
         for work in _works:
             dict_works[work.id] = work.label.texts
     if _present_in_work is not None:
         for p_work in _present_in_work:
             dict_present_in_works[p_work.id] = p_work.label.texts
-    return entity, aut_names, dict_works, dict_present_in_works
+    if _birth is not None:
+        _birth = _birth[0]
+
+    return entity, aut_names, dict_works, dict_present_in_works, _birth
 
 
 def print_results(results):
@@ -73,7 +78,7 @@ def extract_wikidata_endpoint(author_names, show_warnings=True):
         return endpoint
     except IndexError:
         if show_warnings:
-            warnings.warn('Entity has not a wikimdata endpoint ')
+            warnings.warn('Entity has not a wikidata endpoint ')
         return None
 
 
@@ -94,12 +99,13 @@ for auth in tqdm.tqdm(full_auth_list):
     wikidata_endp = extract_wikidata_endpoint(entity_q, show_warnings=False)
     dict_res[wikidata_endp] = None
     if wikidata_endp is not None:
-        _, names, works, other_works = testing_wikidata(wikidata_endp)
+        _, names, works, other_works, y_birth = testing_wikidata(wikidata_endp)
         dict_res[wikidata_endp] = {'aut_name': names,
                                    'aut_works': works,
-                                   'aut_present_work': other_works}
+                                   'aut_present_work': other_works,
+                                   'birth': y_birth}
 
-with open('knowledge_base/KB_wikimedia.json', 'w+') as f:
+with open('knowledge_base/KB_wikimedia_with_dates.json', 'w+') as f:
     json.dump(dict_res, f)
 
 print(f'# Process finished in: {round((time.time()-stime), 5)}')
diff --git a/entity_linker/_merge_kbs.py b/entity_linker/_merge_kbs.py
index c7bf435..dd8f63f 100644
--- a/entity_linker/_merge_kbs.py
+++ b/entity_linker/_merge_kbs.py
@@ -30,7 +30,7 @@ with open('./KB_abs_merged.pickle', 'wb') as infile:
 from pprint import pprint
 pprint(merged['Giles_of_Rome'])
 """
-with open('./KB_abs_merged.pickle', 'rb') as infile:
+with open('knowledge_base/KB_abs_merged.pickle', 'rb') as infile:
     kb = pickle.load(infile)
 
 reversed_dict = {}
@@ -45,5 +45,5 @@ for key in kb.keys():
 
 print(len(reversed_dict))
         
-with open('./KB_abs_reversed.pickle', 'wb') as outfile:
+with open('knowledge_base/KB_abs_reversed.pickle', 'wb') as outfile:
     pickle.dump(reversed_dict, outfile)
\ No newline at end of file
diff --git a/entity_linker/kb_fastText b/entity_linker/kb_fastText
deleted file mode 100644
index ae91031..0000000
Binary files a/entity_linker/kb_fastText and /dev/null differ
diff --git a/entity_linker/kb_test b/entity_linker/kb_test
deleted file mode 100644
index 41eeff4..0000000
Binary files a/entity_linker/kb_test and /dev/null differ
diff --git a/entity_linker/knowledge_base.py b/entity_linker/knowledge_base.py
index 6ad6490..cd73afa 100644
--- a/entity_linker/knowledge_base.py
+++ b/entity_linker/knowledge_base.py
@@ -1,82 +1,62 @@
-"""
-Should also evaluate IF and HOW actual spaCy KB could be deoloyed in this scenario
-
-https://github.com/seatgeek/fuzzywuzzy?source=post_page---------------------------
-"""
 # TODO: work on fuzzy matching. See https://github.com/gandersen101/spaczz
-
 from difflib import SequenceMatcher
-from pprint import pprint
-import pickle
+import json
 
 
-class Knowledge_base:
+class KnowledgeBase:
 
     def __init__(self, kb_path):
         with open(kb_path, 'rb') as infile:
-            data = pickle.load(infile)
-        
-        self.kb = data 
-        #self.utt2ent = self._generate_utter_2_ent()
+            data = json.load(infile)
 
+        self.id2aut = data
+        self.aut2id = {}
+        self._popolate_aut2id()
 
-    def link_entities(self, preds):
-        PER_preds = [pred[0] for pred in preds if pred[1] == 'PER']
+    def link_entities(self, preds, deepfuzz=False):
+        PER_preds = [pred[0] for pred in preds if pred[1] == 'PER' and pred[0] != 'Dante']
         WORK_preds = [pred[0] for pred in preds if pred[1] == 'WORK_OF_ART']
+        print('-'*50)
         print(f'Candidate authors (i.e., entitites matched): {PER_preds}')
         # print(f'Candidates work:\n{WORK_preds}')
 
         COMMEDIA_DATE = 1321
+        print('-'*50 + '\nChecking in KB...')
 
-        """
-        for target in set(PER_preds):
-            if target in self.utt2ent.keys():
-                print(target, self.utt2ent[target])
-        """ 
-        print('#'*50 + '\nChecking in KB...')   
-
-        # TODO: in the author dict I should insert also the single name (e.g., Tommaso --> Tommaso d'aquino)
+        # TODO: in the author dict I should insert also the single name (e.g., Tommaso --> Tommaso d'Aquino)
 
         for target in set(PER_preds):
             scores = []
-            for auth in self.kb.keys():
+            deepscore = []
+            for auth in self.aut2id.keys():
                 sim = self._similar(target, auth)
                 scores.append((auth, sim))
 
             scores.sort(key=lambda tup: tup[1], reverse=True)
+            success = False
             for i in range(3):
                 if scores[i][1] > .8:
-                    print(f'Prediction: {target} - {scores[i]} - born in {self.kb[scores[i][0]]["birth"]}')
+                    print(f'Prediction (F): {target} - {self.id2aut[self.aut2id[scores[i][0]]]["aut_name"]["it"], scores[i][1]} - born in {self.id2aut[self.aut2id[scores[i][0]]]["birth"]}')
+                    success = True
                     break
-            #elif scores[0][1] == 0:
-            #    print(f'Author {target} not in KB ')
+            if deepfuzz and not success:
+                for aut in self.aut2id.keys():
+                    _splitname = aut.split(' ')
+                    sim = 0
+                    for split in _splitname:
+                        _sim = self._similar(target, split)
+                        if _sim > sim:
+                            sim = _sim
+                        deepscore.append((aut, sim))
+                deepscore.sort(key=lambda tup: tup[1], reverse=True)
+                for j in range(3):
+                    if deepscore[j][1] > .8:
+                        print(
+                            f'Prediction (S): {target} - {self.id2aut[self.aut2id[deepscore[j][0]]]["aut_name"]["it"], deepscore[j][1]} - born in {self.id2aut[self.aut2id[deepscore[j][0]]]["birth"]}')
+                        break
             
         return 0
 
-        """
-        for target in set(PER_preds):
-            #print(f'TARGET: {target}')
-            
-            scores = []
-            for auth in self.kb.keys():
-                sim = self._similar(target, auth)
-                scores.append((auth, sim))
-
-            scores.sort(key=lambda tup: tup[1], reverse=True)
-            # pprint(scores[:3])
-
-            all_lang_scores = self._check_other_lang(scores[0], target)
-            
-            if all_lang_scores[0][1] >= 0.8:    # with this threshold 'Tommaso' is not linked to 'Tommaso d'aquino' ...
-                print(f'TARGET: {target}')
-                print(f'{all_lang_scores[0][0]} was born in year: {self.kb[scores[0][0]]["birth"]}')
-                #print(all_lang_scores)
-            else:
-                continue
-                #print('Author not in KB')
-            print('-'*15)
-
-        """
     def _generate_utter_2_ent(self):
         utt_2_ent = {}
         for ent_en in self.kb.keys():
@@ -84,7 +64,6 @@ class Knowledge_base:
                 utt_2_ent[utt] = ent_en
         return utt_2_ent
 
-        
     def _check_other_lang(self, target, original_name):
         other_names = self.kb[target[0]]['names']
 
@@ -97,3 +76,12 @@ class Knowledge_base:
 
     def _similar(self,a, b):
         return SequenceMatcher(None, a, b).ratio()
+
+    def _popolate_aut2id(self):
+        for qid, values in self.id2aut.items():
+            if values is not None:
+                l_names = set(values['aut_name'].values())
+                for name in l_names:
+                    self.aut2id[name] = qid
+        return self
+
diff --git a/entity_linker/knowledge_base_spacy.py b/entity_linker/knowledge_base_spacy.py
index 1e599d9..96876c1 100644
--- a/entity_linker/knowledge_base_spacy.py
+++ b/entity_linker/knowledge_base_spacy.py
@@ -6,12 +6,13 @@ import numpy as np
 from tqdm import tqdm
 
 #with open('./KB_abs_reversed.pickle', 'rb') as infile:
-with open('./KB_abs_merged.pickle', 'rb') as infile:
+with open('knowledge_base/KB_abs_merged.pickle', 'rb') as infile:
     entities_dict = pickle.load(infile)
 
 print(f'Number of entities in original knowledge Base: {len(entities_dict)}')
 #print(entities_dict.keys())
 
+
 def load_word_vectors(model, path_to_vec, max_vec=100000):
     with open(path_to_vec, 'r') as infile:
         header = infile.readline()
@@ -31,6 +32,7 @@ def load_word_vectors(model, path_to_vec, max_vec=100000):
     
     return model
 
+
 def generate_IDs(entities_dict_keys):
     """
     Entities dictionary keys are english spelled names (if such an entities is
diff --git a/main.py b/main.py
index f6efee0..0a16215 100644
--- a/main.py
+++ b/main.py
@@ -4,7 +4,7 @@ import numpy as np
 from spacy.util import minibatch, compounding
 import warnings
 from preprocessing.ner_dataset_builder import DataSetBuilder
-from entity_linker.knowledge_base import Knowledge_base
+from entity_linker.knowledge_base import KnowledgeBase
 from tqdm import tqdm
 from pathlib import Path
 import pickle
@@ -18,11 +18,10 @@ df_convivio = pd.read_csv(DF_COMMENTARIES_PATH + 'convivio_DF.csv')
 df_ner_unique = pd.read_csv(DF_COMMENTARIES_PATH + 'ner_unique_monarchia.csv')
 
 
-def train_model(model, TRAIN_DATA, clean_commentaries, df_eval, output_dir, SPACY_MODEL_STD='it_core_news_sm'):
+def train_model(TRAIN_DATA, clean_commentaries, df_eval, output_dir, SPACY_MODEL_STD='it_core_news_sm'):
 
         model = spacy.load(SPACY_MODEL_STD)
         print(f'Enabled pipes: {[pipe for pipe in model.pipe_names]}')
-        TRAIN_DATA = TRAIN_DATA
 
         ner = model.get_pipe('ner')
 
@@ -64,7 +63,6 @@ def train_model(model, TRAIN_DATA, clean_commentaries, df_eval, output_dir, SPAC
                 i += 100
             print(comment[i:len(comment)])
 
-
         disabled.restore()
 
         print(f'Enabled pipes: {[pipe for pipe in model.pipe_names]}')
@@ -100,7 +98,7 @@ def train_model(model, TRAIN_DATA, clean_commentaries, df_eval, output_dir, SPAC
         print(gold)
 
         print(f'Enabled pipes: {[pipe for pipe in model.pipe_names]}')
-        save_model(model, 'it_dante', output_dir)
+        save_model(model, 'it_dante_new', output_dir)
 
         return model
 
@@ -130,11 +128,11 @@ def predict_candidates(model, comment, labels=None):
     if labels is not None:
         query = comment
         gold = labels[labels['comment'] == query][['quot_title', 'quot_author', 'quot_type', 'quot_uri']]
-        print(f'{len(gold)} GOLD TARGETS ' + '#'*50)
+        print(f'{len(gold)} GOLD TARGETS ' + '-'*50)
         for i in range(len(gold)):
             elem = gold.iloc[i]
             print(f'Title: {elem["quot_title"]}\nAuthor:{elem["quot_author"]}\nType: {elem["quot_type"]}')
-        print('\n')
+        # print('\n')
 
     return candidates, gold
 
@@ -173,28 +171,28 @@ def load_word_vectors(model, path_to_vec, max_vec=100000):
 def main():
     df_TRAIN = df_monarchia
     df_eval = df_convivio
-    # df_eval = df_monarchia
     dataset = DataSetBuilder(df_TRAIN, df_eval, df_ner_unique)
     TRAIN_DATA = dataset.import_dataset_doccano('./commentaries/data_parsed/doccano_data/from_doccano_hdn1.json')
     commentaries_convivio_eva = dataset.clean_commentaries_eva
     commentaries_monarchia = dataset.clean_commentaries
     raw_commentaries_convivio = dataset.commentaries_eva
-    #nlp = spacy.load('it_core_news_sm')
-    #nlp = load_word_vectors(nlp, './embeddings/cc.it.300.vec', 50000)
-    #nlp = train_model(nlp, TRAIN_DATA, commentaries_convivio_eva, df_eval, './model_fastText/')
-    #dataset_convivio = DataSetBuilder(df_eval, df_eval)
-    #dataset_convivio.export_dataset_doccano('std_convivio')
 
-    nlp = spacy.load('./model_fastText/')
-    #nlp = load_word_vectors(nlp, './embeddings/cc.it.300.vec', 50000)
-    
-    #print(len(list(nlp.vocab.strings))) # get whole model vocabulary    
-    
+    # train_model(TRAIN_DATA, commentaries_convivio_eva, df_eval, './model_fastText/model_spacy_latest')
+    # nlp = spacy.load('it_core_news_sm')
+    # nlp = load_word_vectors(nlp, './embeddings/cc.it.300.vec', 50000)
+    # dataset_convivio = DataSetBuilder(df_eval, df_eval)
+    # dataset_convivio.export_dataset_doccano('std_convivio')
+
+    nlp = spacy.load('./model_fastText/model_spacy_latest')
+    # nlp = load_word_vectors(nlp, './embeddings/cc.it.300.vec', 50000)
+    # print(len(list(nlp.vocab.strings))) # get whole model vocabulary
+
     seed = random.randint(1, len(commentaries_convivio_eva))
     preds, df_gold = predict_candidates(nlp, raw_commentaries_convivio[seed], df_eval)
 
-    kb = Knowledge_base('./entity_linker/KB_abs_reversed.pickle')
-    kb.link_entities(preds)
+    kb = KnowledgeBase('entity_linker/knowledge_base/KB_wikimedia_with_dates.json')
+    kb.link_entities(preds, deepfuzz=True)
+    print(f'\nComment Numbert: {seed}')
     
     exit()
 
diff --git a/preprocessing/ner_dataset_builder.py b/preprocessing/ner_dataset_builder.py
index 4eb4fb4..7b9309a 100644
--- a/preprocessing/ner_dataset_builder.py
+++ b/preprocessing/ner_dataset_builder.py
@@ -14,6 +14,7 @@ COMMENTARIES_PATH = './commentaries/'
 DF_COMMENTARIES_PATH = './commentaries/data_parsed/'
 df_commentary_monarchia = pd.read_csv(DF_COMMENTARIES_PATH + 'monarchia_DF.csv')
 df_ner_unique = pd.read_csv(DF_COMMENTARIES_PATH + 'ner_unique_monarchia.csv')
+
 """
 df_ner_unique ATM contains <i>terms</i> found in "De Monarchia". The .csv file should
 contain all the occurrences of tagged terms across all of the (tagged) documents!