gitea first commit
This commit is contained in:
parent
4c6c92f6d4
commit
c25af5e603
|
|
@ -14,7 +14,7 @@ AUTHORS_CORPUS_II = ['Dante', 'BeneFlorentinus', 'BenvenutoDaImola', 'Boncompagn
|
|||
'RyccardusDeSanctoGermano', 'ZonoDeMagnalis']
|
||||
|
||||
|
||||
DEBUG_MODE = False
|
||||
DEBUG_MODE = True
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -57,7 +57,8 @@ def main():
|
|||
Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative)
|
||||
|
||||
print('Fitting the Verificator')
|
||||
params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': np.logspace(-3, +3, 7)}
|
||||
#params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': np.logspace(-3, +3, 7)}
|
||||
params = {'C': np.logspace(0, 1, 2)} if DEBUG_MODE else {'C': [1,10,100,1000,0.1,0.01,0.001]}
|
||||
|
||||
slice_charngrams = feature_extractor.feature_range['_cngrams_task']
|
||||
slice_wordngrams = feature_extractor.feature_range['_wngrams_task']
|
||||
|
|
@ -66,8 +67,8 @@ def main():
|
|||
else:
|
||||
slice_first, slice_second = slice_wordngrams, slice_charngrams
|
||||
av = Pipeline([
|
||||
('featsel_cngrams', RangeFeatureSelector(slice_second, 0.1)),
|
||||
('featsel_wngrams', RangeFeatureSelector(slice_first, 0.1)),
|
||||
('featsel_cngrams', RangeFeatureSelector(slice_second, 0.05)),
|
||||
('featsel_wngrams', RangeFeatureSelector(slice_first, 0.05)),
|
||||
('av', AuthorshipVerificator(C=1, param_grid=params))
|
||||
])
|
||||
|
||||
|
|
@ -115,8 +116,9 @@ if __name__ == '__main__':
|
|||
parser.add_argument('positive', type=str, default="Dante", metavar='AUTHOR',
|
||||
help= f'Positive author for the hypothesis (default "Dante"); set to "ALL" to check '
|
||||
f'every author')
|
||||
parser.add_argument('--log', type=str, metavar='PATH', default='./results.txt',
|
||||
help='path to the log file where to write the results (default ./results.txt)')
|
||||
parser.add_argument('--log', type=str, metavar='PATH', default=None,
|
||||
help='path to the log file where to write the results '
|
||||
'(if not specified, then ./results_{corpuspath.name})')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -9,11 +9,11 @@ if [ ! -d $corpus ]; then
|
|||
rm ../MedLatin.zip
|
||||
fi
|
||||
|
||||
PY="python3 author_identification.py"
|
||||
PY="python3 author_identification_loo.py"
|
||||
MedLatin1="../MedLatin/Corpora/MedLatin1"
|
||||
MedLatin2="../MedLatin/Corpora/MedLatin2"
|
||||
EP1="../MedLatin/Epistle/EpistolaXIII_1.txt"
|
||||
EP2="../MedLatin/Epistle/EpistolaXIII_2.txt"
|
||||
|
||||
$PY $MedLatin1 ALL --unknown $EP1 --loo --log ./results_EP1.txt
|
||||
$PY $MedLatin2 ALL --unknown $EP2 --loo --log ./results_EP2.txt
|
||||
$PY $MedLatin1 ALL --log ./resultsLoo_EP1.txt
|
||||
$PY $MedLatin2 ALL --log ./resultsLoo_EP2.txt
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class AuthorshipVerificator(BaseEstimator):
|
|||
self.author_name = author_name
|
||||
|
||||
def fit(self, X, y):
|
||||
self.classifier = LogisticRegression(C=self.C, class_weight='balanced')
|
||||
self.classifier = LogisticRegression(C=self.C, class_weight='balanced', solver='lbfgs', max_iter=1000)
|
||||
y = np.asarray(y)
|
||||
positive_examples = y.sum()
|
||||
if positive_examples >= self.nfolds and self.param_grid is not None:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
joblib==0.11
|
||||
nltk==3.4.5
|
||||
numpy==1.18.2
|
||||
scikit-learn==0.22.2.post1
|
||||
scipy==1.4.1
|
||||
Loading…
Reference in New Issue