From ce5d4ab84322780a49e67559b9e1231db6ef84f5 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 1 Apr 2020 19:06:39 +0200
Subject: [PATCH] bugfix

---
 src/author_identification.py | 20 ++++++++++++--------
 src/util/epistole_split.py   | 25 -------------------------
 2 files changed, 12 insertions(+), 33 deletions(-)
 delete mode 100755 src/util/epistole_split.py
diff --git a/src/author_identification.py b/src/author_identification.py
index 5764988..30fb6e4 100755
--- a/src/author_identification.py
+++ b/src/author_identification.py
@@ -12,6 +12,7 @@ AUTHORS_CORPUS_II = ['Dante', 'BeneFlorentinus', 'BenvenutoDaImola', 'Boncompagn
                            'IulianusDeSpira', 'NicolaTrevet', 'PierDellaVigna', 'PietroAlighieri', 'RaimundusLullus',
                            'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
 
+
 def main():
     discarded = 0
     f1_scores = []
@@ -21,7 +22,7 @@ def main():
         print('='*80)
         print(f'Authorship Identification for {author} (complete {i}/{len(args.authors)})')
         print(f'Corpus {path}')
-        print('='*80)
+        print('-'*80)
 
         positive, negative, pos_files, neg_files, ep_text = load_latin_corpus(
             path, positive_author=author, unknown_target=args.unknown
@@ -50,13 +51,14 @@ def main():
         )
 
         Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
+
+        print('Fitting the Verificator')
         av = AuthorshipVerificator(nfolds=10, estimator=LogisticRegression)
+        av.fit(Xtr, ytr, groups)
+
         if args.unknown:
             print(f'Checking for the hypothesis that {author} was the author of {args.unknown}')
             ep, ep_fragments = feature_extractor.transform(ep_text, return_fragments=True, window_size=3)
-
-            print('Fitting the Verificator')
-            av.fit(Xtr, ytr, groups)
             av.predict_proba(ep, args.unknown)
 
         if args.loo:
@@ -87,12 +89,14 @@ if __name__ == '__main__':
     # Training settings
     parser = argparse.ArgumentParser(description='Authorship verification for Epistola XIII')
     parser.add_argument('corpuspath', type=str, metavar='PATH',
-                        help=f'Path to the directory containing the corpus (documents must be named <author>_<texname>.txt')
+                        help=f'Path to the directory containing the corpus (documents must be named '
+                             f'<author>_<texname>.txt')
     parser.add_argument('positive', type=str, default="Dante",
-                        help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check every author')
+                        help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check '
+                              f'every author')
     parser.add_argument('--loo', default=False, action='store_true',
                         help='submit each binary classifier to leave-one-out validation')
-    parser.add_argument('--unknown', type=str, default=None,
+    parser.add_argument('--unknown', type=str, metavar='PATH', default=None,
                         help='path to the file of unknown paternity (default None)')
 
     args = parser.parse_args()
@@ -108,4 +112,4 @@ if __name__ == '__main__':
     assert args.unknown or args.loo, 'error: nor an unknown document, nor LOO have been requested. Nothing to do.'
     assert args.unknown is None or os.path.exists(args.unknown), 'unknown file does not exist'
 
-    main()
\ No newline at end of file
+    main()
diff --git a/src/util/epistole_split.py b/src/util/epistole_split.py
deleted file mode 100755
index 5f458e5..0000000
--- a/src/util/epistole_split.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-
-dir = '../../testi_1'
-author = 'Misc'
-file=author+'_Epistole.txt'
-
-
-
-order = 0
-epistola=[]
-for line in open(os.path.join(dir,file), 'rt').readlines():
-    line = line.strip()
-    if line:
-        epistola.append(line)
-    else:
-        epistola = '\n'.join(epistola)
-        open(os.path.join(dir,'{}_epistola{}.txt'.format(author,order)), 'wt').write(epistola)
-        order += 1
-        epistola = []
-
-if epistola:
-    epistola = '\n'.join(epistola)
-    open(os.path.join(dir, '{}_epistola{}.txt'.format(author,order)), 'wt').write(epistola)
-
-