diff --git a/src/author_identification.py b/src/author_identification.py
index 1cbe4ca..21bd6d3 100755
--- a/src/author_identification.py
+++ b/src/author_identification.py
@@ -2,15 +2,6 @@ from sklearn.linear_model import LogisticRegression
 from data.dante_loader import load_texts
 from data.features import *
 from model import AuthorshipVerificator, f1_from_counters
-from sklearn.svm import LinearSVC, SVC
-from util.color_visualization import color
-
-# DONE: ngrams should contain punctuation marks according to Sapkota et al. [39] in the PAN 2015 overview
-# (More recently, it was shown that character
-# n-grams corresponding to word affixes and including punctuation marks are the most
-# significant features in cross-topic authorship attribution [57].)  #we have cancelled the
-# TODO: inspect the impact of chi-squared correlations against positive-only (or positive and negative) correlations for feature selection
-# TODO: sentence length (Mendenhall-style) ?
 
 
 for epistola in [1]:
@@ -37,7 +28,9 @@ for epistola in [1]:
         if epistola==2:
             path+='_interaEpistola'
 
-        positive, negative, pos_files, neg_files, ep_text = load_texts(path, positive_author=author, unknown_target='EpistolaXIII_{}.txt'.format(epistola))
+        positive, negative, pos_files, neg_files, ep_text = load_texts(
+            path, positive_author=author, unknown_target='EpistolaXIII_{}.txt'.format(epistola)
+        )
         files = np.asarray(pos_files + neg_files)
         if len(positive) < 2:
             discarded+=1
@@ -45,16 +38,20 @@ for epistola in [1]:
 
         n_full_docs = len(positive) + len(negative)
 
-        feature_extractor = FeatureExtractor(function_words_freq='latin',
-                                             conjugations_freq='latin',
-                                             features_Mendenhall=True,
-                                             features_sentenceLengths=True,
-                                             tfidf_feat_selection_ratio=0.1,
-                                             wordngrams=True, n_wordngrams=(1, 2),
-                                             charngrams=True, n_charngrams=(3, 4, 5),
-                                             preserve_punctuation=False,
-                                             split_documents=True, split_policy=split_by_sentences, window_size=3,
-                                             normalize_features=True)
+        feature_extractor = FeatureExtractor(
+            function_words_freq='latin',
+            conjugations_freq='latin',
+            features_Mendenhall=True,
+            features_sentenceLengths=True,
+            tfidf_feat_selection_ratio=0.1,
+            wordngrams=True, n_wordngrams=(1, 2),
+            charngrams=True, n_charngrams=(3, 4, 5),
+            preserve_punctuation=False,
+            split_documents=True,
+            split_policy=split_by_sentences,
+            window_size=3,
+            normalize_features=True
+        )
 
 
         Xtr,ytr,groups = feature_extractor.fit_transform(positive, negative)
@@ -66,22 +63,23 @@ for epistola in [1]:
         av = AuthorshipVerificator(nfolds=10, estimator=LogisticRegression)
         av.fit(Xtr,ytr,groups)
 
-        score_ave, score_std, tp, fp, fn, tn = av.leave_one_out(Xtr, ytr, files, groups, test_lowest_index_only=True, counters=True)
-        # print('LOO[full-docs]={:.3f} +-{:.5f}'.format(score_ave, score_std))
+        score_ave, score_std, tp, fp, fn, tn = av.leave_one_out(
+            Xtr, ytr, files, groups, test_lowest_index_only=True, counters=True
+        )
         f1_scores.append(f1_from_counters(tp, fp, fn, tn))
         counters.append((tp, fp, fn, tn))
-        print('F1 for {} = {:.3f}'.format(author,f1_scores[-1]))
+        print('F1 for {author} = {f1_scores[-1]:.3f}')
 
 
-    print('Computing macro- and micro-averages (discarded {}/{})'.format(discarded,len(authors)))
+    print(f'Computing macro- and micro-averages (discarded {discarded}/{len(authors)})')
     f1_scores = np.array(f1_scores)
     counters = np.array(counters)
 
     macro_f1 = f1_scores.mean()
     micro_f1 = f1_from_counters(*counters.sum(axis=0).tolist())
 
-    print('Macro-F1 = {:.3f}'.format(macro_f1))
-    print('Micro-F1 = {:.3f}'.format(micro_f1))
+    print(f'Macro-F1 = {macro_f1:.3f}')
+    print(f'Micro-F1 = {micro_f1:.3f}')
     print()
 
 
diff --git a/src/data/features.py b/src/data/features.py
index 5b742b0..e36418a 100755
--- a/src/data/features.py
+++ b/src/data/features.py
@@ -229,6 +229,11 @@ def _features_tfidf(documents, tfidf_vectorizer=None, min_df = 1, ngrams=(1,1)):
     return features, tfidf_vectorizer
 
 
+# We have implemented ngrams extration generically, following Sapkota et al. (ref [39] in the PAN 2015 overview), i.e.,
+# containing punctuation marks. However, this does not apply to this study since punctuation marks are filtered-out in
+# editions of Latin texts.
+# More recently, it was shown that character n-grams corresponding to word affixes and including punctuation
+# marks are the most significant features in cross-topic authorship attribution [57].
 def _features_ngrams(documents, ns=[4, 5], ngrams_vectorizer=None, min_df = 10, preserve_punctuation=True):
     doc_ngrams = ngrams_extractor(documents, ns, preserve_punctuation)
     return _features_tfidf(doc_ngrams, tfidf_vectorizer=ngrams_vectorizer, min_df = min_df)
@@ -507,6 +512,7 @@ class FeatureExtractor:
 
 
 
+
 if __name__=='__main__':
     from collections import Counter
 
@@ -594,4 +600,3 @@ if __name__=='__main__':
                 dots=False
     print(counter)
     print('rows',rows)
-