import quapy as qp from quapy.protocol import APP from quapy.method.aggregative import DistributionMatching from sklearn.linear_model import LogisticRegression import numpy as np """ In this example, we show how to perform model selection on a DistributionMatching quantifier. """ model = DistributionMatching(LogisticRegression()) qp.environ['SAMPLE_SIZE'] = 100 qp.environ['N_JOBS'] = -1 training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test # The model will be returned by the fit method of GridSearchQ. # Every combination of hyper-parameters will be evaluated by confronting the # quantifier thus configured against a series of samples generated by means # of a sample generation protocol. For this example, we will use the # artificial-prevalence protocol (APP), that generates samples with prevalence # values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]). # We devote 30% of the dataset for this exploration. training, validation = training.split_stratified(train_prop=0.7) protocol = APP(validation) # We will explore a classification-dependent hyper-parameter (e.g., the 'C' # hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter # (e.g., the number of bins in a DistributionMatching quantifier. # Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__" # in order to let the quantifier know this hyper-parameter belongs to its underlying # classifier. param_grid = { 'classifier__C': np.logspace(-3,3,7), 'nbins': [8, 16, 32, 64], } model = qp.model_selection.GridSearchQ( model=model, param_grid=param_grid, protocol=protocol, error='mae', # the error to optimize is the MAE (a quantification-oriented loss) refit=True, # retrain on the whole labelled set once done verbose=True # show information as the process goes on ).fit(training) print(f'model selection ended: best hyper-parameters={model.best_params_}') model = model.best_model_ # evaluation in terms of MAE # we use the same evaluation protocol (APP) on the test set mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae') print(f'MAE={mae_score:.5f}')