2023-01-24 09:48:21 +01:00
|
|
|
import numpy as np
|
|
|
|
from abstention.calibration import NoBiasVectorScaling, VectorScaling, TempScaling
|
|
|
|
from sklearn.calibration import CalibratedClassifierCV
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
import quapy as qp
|
|
|
|
import quapy.functional as F
|
2023-01-27 18:13:23 +01:00
|
|
|
from classification.calibration import RecalibratedProbabilisticClassifierBase, NBVSCalibration, \
|
2023-01-24 09:48:21 +01:00
|
|
|
BCTSCalibration
|
|
|
|
from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022
|
|
|
|
from evaluation import evaluation_report
|
|
|
|
from method.aggregative import EMQ
|
|
|
|
from model_selection import GridSearchQ
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
for task in ['T1A', 'T1B']:
|
|
|
|
|
|
|
|
# calibration = TempScaling(verbose=False, bias_positions='all')
|
|
|
|
|
|
|
|
qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task]
|
|
|
|
training, val_generator, test_generator = fetch_lequa2022(task=task)
|
|
|
|
|
|
|
|
# define the quantifier
|
|
|
|
# learner = BCTSCalibration(LogisticRegression(), n_jobs=-1)
|
|
|
|
# learner = CalibratedClassifierCV(LogisticRegression())
|
|
|
|
learner = LogisticRegression()
|
2023-01-27 18:13:23 +01:00
|
|
|
quantifier = EMQ(classifier=learner)
|
2023-01-24 09:48:21 +01:00
|
|
|
|
|
|
|
# model selection
|
2023-01-27 18:13:23 +01:00
|
|
|
param_grid = {
|
|
|
|
'classifier__C': np.logspace(-3, 3, 7),
|
|
|
|
'classifier__class_weight': ['balanced', None],
|
|
|
|
'recalib': ['platt', 'ts', 'vs', 'nbvs', 'bcts', None],
|
|
|
|
'exact_train_prev': [False, True]
|
|
|
|
}
|
2023-01-24 09:48:21 +01:00
|
|
|
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', n_jobs=-1, refit=False, verbose=True)
|
|
|
|
quantifier = model_selection.fit(training)
|
|
|
|
|
|
|
|
# evaluation
|
|
|
|
report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True)
|
|
|
|
|
2023-01-27 18:13:23 +01:00
|
|
|
# import os
|
|
|
|
# os.makedirs(f'./out', exist_ok=True)
|
|
|
|
# with open(f'./out/EMQ_{calib}_{task}.txt', 'wt') as foo:
|
|
|
|
# estim_prev = report['estim-prev'].values
|
|
|
|
# nclasses = len(estim_prev[0])
|
|
|
|
# foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n')
|
|
|
|
# for id, prev in enumerate(estim_prev):
|
|
|
|
# foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n')
|
|
|
|
#
|
|
|
|
# #os.makedirs(f'./errors/{task}', exist_ok=True)
|
|
|
|
# with open(f'./out/EMQ_{calib}_{task}_errors.txt', 'wt') as foo:
|
|
|
|
# maes, mraes = report['mae'].values, report['mrae'].values
|
|
|
|
# foo.write(f'id,AE,RAE\n')
|
|
|
|
# for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)):
|
|
|
|
# foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n')
|
2023-01-24 09:48:21 +01:00
|
|
|
|
|
|
|
# printing results
|
|
|
|
pd.set_option('display.expand_frame_repr', False)
|
|
|
|
report['estim-prev'] = report['estim-prev'].map(F.strprev)
|
|
|
|
print(report)
|
|
|
|
|
|
|
|
print('Averaged values:')
|
|
|
|
print(report.mean())
|