bugfix
This commit is contained in:
parent
d36e4ce9da
commit
ce5d4ab843
|
|
@ -12,6 +12,7 @@ AUTHORS_CORPUS_II = ['Dante', 'BeneFlorentinus', 'BenvenutoDaImola', 'Boncompagn
|
||||||
'IulianusDeSpira', 'NicolaTrevet', 'PierDellaVigna', 'PietroAlighieri', 'RaimundusLullus',
|
'IulianusDeSpira', 'NicolaTrevet', 'PierDellaVigna', 'PietroAlighieri', 'RaimundusLullus',
|
||||||
'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
|
'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
discarded = 0
|
discarded = 0
|
||||||
f1_scores = []
|
f1_scores = []
|
||||||
|
|
@ -21,7 +22,7 @@ def main():
|
||||||
print('='*80)
|
print('='*80)
|
||||||
print(f'Authorship Identification for {author} (complete {i}/{len(args.authors)})')
|
print(f'Authorship Identification for {author} (complete {i}/{len(args.authors)})')
|
||||||
print(f'Corpus {path}')
|
print(f'Corpus {path}')
|
||||||
print('='*80)
|
print('-'*80)
|
||||||
|
|
||||||
positive, negative, pos_files, neg_files, ep_text = load_latin_corpus(
|
positive, negative, pos_files, neg_files, ep_text = load_latin_corpus(
|
||||||
path, positive_author=author, unknown_target=args.unknown
|
path, positive_author=author, unknown_target=args.unknown
|
||||||
|
|
@ -50,13 +51,14 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
|
Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
|
||||||
|
|
||||||
|
print('Fitting the Verificator')
|
||||||
av = AuthorshipVerificator(nfolds=10, estimator=LogisticRegression)
|
av = AuthorshipVerificator(nfolds=10, estimator=LogisticRegression)
|
||||||
|
av.fit(Xtr, ytr, groups)
|
||||||
|
|
||||||
if args.unknown:
|
if args.unknown:
|
||||||
print(f'Checking for the hypothesis that {author} was the author of {args.unknown}')
|
print(f'Checking for the hypothesis that {author} was the author of {args.unknown}')
|
||||||
ep, ep_fragments = feature_extractor.transform(ep_text, return_fragments=True, window_size=3)
|
ep, ep_fragments = feature_extractor.transform(ep_text, return_fragments=True, window_size=3)
|
||||||
|
|
||||||
print('Fitting the Verificator')
|
|
||||||
av.fit(Xtr, ytr, groups)
|
|
||||||
av.predict_proba(ep, args.unknown)
|
av.predict_proba(ep, args.unknown)
|
||||||
|
|
||||||
if args.loo:
|
if args.loo:
|
||||||
|
|
@ -87,12 +89,14 @@ if __name__ == '__main__':
|
||||||
# Training settings
|
# Training settings
|
||||||
parser = argparse.ArgumentParser(description='Authorship verification for Epistola XIII')
|
parser = argparse.ArgumentParser(description='Authorship verification for Epistola XIII')
|
||||||
parser.add_argument('corpuspath', type=str, metavar='PATH',
|
parser.add_argument('corpuspath', type=str, metavar='PATH',
|
||||||
help=f'Path to the directory containing the corpus (documents must be named <author>_<texname>.txt')
|
help=f'Path to the directory containing the corpus (documents must be named '
|
||||||
|
f'<author>_<texname>.txt')
|
||||||
parser.add_argument('positive', type=str, default="Dante",
|
parser.add_argument('positive', type=str, default="Dante",
|
||||||
help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check every author')
|
help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check '
|
||||||
|
f'every author')
|
||||||
parser.add_argument('--loo', default=False, action='store_true',
|
parser.add_argument('--loo', default=False, action='store_true',
|
||||||
help='submit each binary classifier to leave-one-out validation')
|
help='submit each binary classifier to leave-one-out validation')
|
||||||
parser.add_argument('--unknown', type=str, default=None,
|
parser.add_argument('--unknown', type=str, metavar='PATH', default=None,
|
||||||
help='path to the file of unknown paternity (default None)')
|
help='path to the file of unknown paternity (default None)')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
@ -108,4 +112,4 @@ if __name__ == '__main__':
|
||||||
assert args.unknown or args.loo, 'error: nor an unknown document, nor LOO have been requested. Nothing to do.'
|
assert args.unknown or args.loo, 'error: nor an unknown document, nor LOO have been requested. Nothing to do.'
|
||||||
assert args.unknown is None or os.path.exists(args.unknown), 'unknown file does not exist'
|
assert args.unknown is None or os.path.exists(args.unknown), 'unknown file does not exist'
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -1,25 +0,0 @@
|
||||||
import os
|
|
||||||
|
|
||||||
dir = '../../testi_1'
|
|
||||||
author = 'Misc'
|
|
||||||
file=author+'_Epistole.txt'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
order = 0
|
|
||||||
epistola=[]
|
|
||||||
for line in open(os.path.join(dir,file), 'rt').readlines():
|
|
||||||
line = line.strip()
|
|
||||||
if line:
|
|
||||||
epistola.append(line)
|
|
||||||
else:
|
|
||||||
epistola = '\n'.join(epistola)
|
|
||||||
open(os.path.join(dir,'{}_epistola{}.txt'.format(author,order)), 'wt').write(epistola)
|
|
||||||
order += 1
|
|
||||||
epistola = []
|
|
||||||
|
|
||||||
if epistola:
|
|
||||||
epistola = '\n'.join(epistola)
|
|
||||||
open(os.path.join(dir, '{}_epistola{}.txt'.format(author,order)), 'wt').write(epistola)
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue