gitea first commit
This commit is contained in:
parent
4c6c92f6d4
commit
c25af5e603
|
|
@ -14,7 +14,7 @@ AUTHORS_CORPUS_II = ['Dante', 'BeneFlorentinus', 'BenvenutoDaImola', 'Boncompagn
|
||||||
'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
|
'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
|
||||||
|
|
||||||
|
|
||||||
DEBUG_MODE = False
|
DEBUG_MODE = True
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -57,7 +57,8 @@ def main():
|
||||||
Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
|
Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
|
||||||
|
|
||||||
print('Fitting the Verificator')
|
print('Fitting the Verificator')
|
||||||
params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': np.logspace(-3, +3, 7)}
|
#params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': np.logspace(-3, +3, 7)}
|
||||||
|
params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': [1,10,100,1000,0.1,0.01,0.001]}
|
||||||
|
|
||||||
slice_charngrams = feature_extractor.feature_range['_cngrams_task']
|
slice_charngrams = feature_extractor.feature_range['_cngrams_task']
|
||||||
slice_wordngrams = feature_extractor.feature_range['_wngrams_task']
|
slice_wordngrams = feature_extractor.feature_range['_wngrams_task']
|
||||||
|
|
@ -66,8 +67,8 @@ def main():
|
||||||
else:
|
else:
|
||||||
slice_first, slice_second = slice_wordngrams, slice_charngrams
|
slice_first, slice_second = slice_wordngrams, slice_charngrams
|
||||||
av = Pipeline([
|
av = Pipeline([
|
||||||
('featsel_cngrams', RangeFeatureSelector(slice_second, 0.1)),
|
('featsel_cngrams', RangeFeatureSelector(slice_second, 0.05)),
|
||||||
('featsel_wngrams', RangeFeatureSelector(slice_first, 0.1)),
|
('featsel_wngrams', RangeFeatureSelector(slice_first, 0.05)),
|
||||||
('av', AuthorshipVerificator(C=1, param_grid=params))
|
('av', AuthorshipVerificator(C=1, param_grid=params))
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
@ -115,8 +116,9 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('positive', type=str, default="Dante", metavar='AUTHOR',
|
parser.add_argument('positive', type=str, default="Dante", metavar='AUTHOR',
|
||||||
help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check '
|
help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check '
|
||||||
f'every author')
|
f'every author')
|
||||||
parser.add_argument('--log', type=str, metavar='PATH', default='./results.txt',
|
parser.add_argument('--log', type=str, metavar='PATH', default=None,
|
||||||
help='path to the log file where to write the results (default ./results.txt)')
|
help='path to the log file where to write the results '
|
||||||
|
'(if not specified, then ./results_{corpuspath.name})')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,11 @@ if [ ! -d $corpus ]; then
|
||||||
rm ../MedLatin.zip
|
rm ../MedLatin.zip
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PY="python3 author_identification.py"
|
PY="python3 author_identification_loo.py"
|
||||||
MedLatin1="../MedLatin/Corpora/MedLatin1"
|
MedLatin1="../MedLatin/Corpora/MedLatin1"
|
||||||
MedLatin2="../MedLatin/Corpora/MedLatin2"
|
MedLatin2="../MedLatin/Corpora/MedLatin2"
|
||||||
EP1="../MedLatin/Epistle/EpistolaXIII_1.txt"
|
EP1="../MedLatin/Epistle/EpistolaXIII_1.txt"
|
||||||
EP2="../MedLatin/Epistle/EpistolaXIII_2.txt"
|
EP2="../MedLatin/Epistle/EpistolaXIII_2.txt"
|
||||||
|
|
||||||
$PY $MedLatin1 ALL --unknown $EP1 --loo --log ./results_EP1.txt
|
$PY $MedLatin1 ALL --log ./resultsLoo_EP1.txt
|
||||||
$PY $MedLatin2 ALL --unknown $EP2 --loo --log ./results_EP2.txt
|
$PY $MedLatin2 ALL --log ./resultsLoo_EP2.txt
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ class AuthorshipVerificator(BaseEstimator):
|
||||||
self.author_name = author_name
|
self.author_name = author_name
|
||||||
|
|
||||||
def fit(self, X, y):
|
def fit(self, X, y):
|
||||||
self.classifier = LogisticRegression(C=self.C, class_weight='balanced')
|
self.classifier = LogisticRegression(C=self.C, class_weight='balanced', solver='lbfgs', max_iter=1000)
|
||||||
y = np.asarray(y)
|
y = np.asarray(y)
|
||||||
positive_examples = y.sum()
|
positive_examples = y.sum()
|
||||||
if positive_examples >= self.nfolds and self.param_grid is not None:
|
if positive_examples >= self.nfolds and self.param_grid is not None:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
joblib==0.11
|
||||||
|
nltk==3.4.5
|
||||||
|
numpy==1.18.2
|
||||||
|
scikit-learn==0.22.2.post1
|
||||||
|
scipy==1.4.1
|
||||||
Loading…
Reference in New Issue