QuaPy/examples/4.lequa2022_experiments.py

54 lines
2.3 KiB
Python
Raw Normal View History

import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
import quapy.functional as F
2023-02-14 19:15:59 +01:00
from quapy.data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022
from quapy.evaluation import evaluation_report
from quapy.method.aggregative import EMQ
from quapy.model_selection import GridSearchQ
import pandas as pd
2023-02-10 19:02:17 +01:00
"""
This example shows hoy to use the LeQua datasets (new in v0.1.7). For more information about the datasets, and the
LeQua competition itself, check:
https://lequa2022.github.io/index (the site of the competition)
https://ceur-ws.org/Vol-3180/paper-146.pdf (the overview paper)
"""
# there are 4 tasks (T1A, T1B, T2A, T2B)
task = 'T1A'
2023-02-10 19:02:17 +01:00
# set the sample size in the environment. The sample size is task-dendendent and can be consulted by doing:
qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task]
2023-02-10 19:02:17 +01:00
qp.environ['N_JOBS'] = -1
# the fetch method returns a training set (an instance of LabelledCollection) and two generators: one for the
# validation set and another for the test sets. These generators are both instances of classes that extend
# AbstractProtocol (i.e., classes that implement sampling generation procedures) and, in particular, are instances
# of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
# stored in a directory.
training, val_generator, test_generator = fetch_lequa2022(task=task)
# define the quantifier
2023-02-10 19:02:17 +01:00
quantifier = EMQ(classifier=LogisticRegression())
# model selection
2023-02-10 19:02:17 +01:00
param_grid = {
'classifier__C': np.logspace(-3, 3, 7), # classifier-dependent: inverse of regularization strength
'classifier__class_weight': ['balanced', None], # classifier-dependent: weights of each class
'recalib': ['bcts', 'platt', None] # quantifier-dependent: recalibration method (new in v0.1.7)
}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
quantifier = model_selection.fit(training)
# evaluation
report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True)
# printing results
pd.set_option('display.expand_frame_repr', False)
report['estim-prev'] = report['estim-prev'].map(F.strprev)
print(report)
print('Averaged values:')
print(report.mean())