1
0
Fork 0

model seletion in two levels, classifier oriented and quantifier oriented

This commit is contained in:
Alejandro Moreo Fernandez 2023-11-16 14:29:34 +01:00
parent e870d798b7
commit 513c78f1f3
4 changed files with 296 additions and 147 deletions

View File

@ -2,7 +2,9 @@ import quapy as qp
from quapy.protocol import APP from quapy.protocol import APP
from quapy.method.aggregative import DMy from quapy.method.aggregative import DMy
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from examples.comparing_gridsearch import OLD_GridSearchQ
import numpy as np import numpy as np
from time import time
""" """
In this example, we show how to perform model selection on a DistributionMatching quantifier. In this example, we show how to perform model selection on a DistributionMatching quantifier.
@ -15,35 +17,44 @@ qp.environ['N_JOBS'] = -1
training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
# The model will be returned by the fit method of GridSearchQ. with qp.util.temp_seed(0):
# Every combination of hyper-parameters will be evaluated by confronting the
# quantifier thus configured against a series of samples generated by means
# of a sample generation protocol. For this example, we will use the
# artificial-prevalence protocol (APP), that generates samples with prevalence
# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
# We devote 30% of the dataset for this exploration.
training, validation = training.split_stratified(train_prop=0.7)
protocol = APP(validation)
# We will explore a classification-dependent hyper-parameter (e.g., the 'C' # The model will be returned by the fit method of GridSearchQ.
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter # Every combination of hyper-parameters will be evaluated by confronting the
# (e.g., the number of bins in a DistributionMatching quantifier. # quantifier thus configured against a series of samples generated by means
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__" # of a sample generation protocol. For this example, we will use the
# in order to let the quantifier know this hyper-parameter belongs to its underlying # artificial-prevalence protocol (APP), that generates samples with prevalence
# classifier. # values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
param_grid = { # We devote 30% of the dataset for this exploration.
training, validation = training.split_stratified(train_prop=0.7)
protocol = APP(validation)
# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
# (e.g., the number of bins in a DistributionMatching quantifier.
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
# in order to let the quantifier know this hyper-parameter belongs to its underlying
# classifier.
param_grid = {
'classifier__C': np.logspace(-3,3,7), 'classifier__C': np.logspace(-3,3,7),
'classifier__class_weight': ['balanced', None],
'nbins': [8, 16, 32, 64], 'nbins': [8, 16, 32, 64],
} }
model = qp.model_selection.GridSearchQ( tinit = time()
# model = OLD_GridSearchQ(
model = qp.model_selection.GridSearchQ(
model=model, model=model,
param_grid=param_grid, param_grid=param_grid,
protocol=protocol, protocol=protocol,
error='mae', # the error to optimize is the MAE (a quantification-oriented loss) error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
refit=True, # retrain on the whole labelled set once done refit=False, # retrain on the whole labelled set once done
verbose=True # show information as the process goes on verbose=True # show information as the process goes on
).fit(training) ).fit(training)
tend = time()
print(f'model selection ended: best hyper-parameters={model.best_params_}') print(f'model selection ended: best hyper-parameters={model.best_params_}')
model = model.best_model_ model = model.best_model_
@ -53,5 +64,5 @@ model = model.best_model_
mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae') mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
print(f'MAE={mae_score:.5f}') print(f'MAE={mae_score:.5f}')
print(f'model selection took {tend-tinit}s')

View File

@ -37,7 +37,20 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
and :meth:`aggregate`. and :meth:`aggregate`.
""" """
def fit(self, data: LabelledCollection, fit_classifier=True): val_split_ = None
@property
def val_split(self):
return self.val_split_
@val_split.setter
def val_split(self, val_split):
if isinstance(val_split, LabelledCollection):
print('warning: setting val_split with a LabelledCollection will be inefficient in'
'model selection. Rather pass the LabelledCollection at fit time')
self.val_split_ = val_split
def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
""" """
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function. Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
@ -46,7 +59,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
learner has been trained outside the quantifier. learner has been trained outside the quantifier.
:return: self :return: self
""" """
classif_predictions = self.classifier_fit_predict(data, fit_classifier) classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
self.aggregation_fit(classif_predictions, data) self.aggregation_fit(classif_predictions, data)
return self return self
@ -69,6 +82,9 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba')) self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
if predict_on is None:
predict_on = self.val_split
if predict_on is None: if predict_on is None:
if fit_classifier: if fit_classifier:
self.classifier.fit(*data.Xy) self.classifier.fit(*data.Xy)
@ -228,7 +244,6 @@ class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
:return: the string "predict", i.e., the standard method name for scikit-learn hard predictions :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
""" """
print('using predict')
return 'predict' return 'predict'
def _check_classifier(self, adapt_if_necessary=False): def _check_classifier(self, adapt_if_necessary=False):
@ -264,7 +279,6 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
:return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
""" """
print('using predict_proba')
return 'predict_proba' return 'predict_proba'
def _check_classifier(self, adapt_if_necessary=False): def _check_classifier(self, adapt_if_necessary=False):
@ -289,35 +303,35 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier): # class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
""" # """
Abstract class for quantification methods that carry out an adjustment (or correction) that requires, # Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that # at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
is not the training set. There are three ways in which this distinction can be made, depending on how # is not the training set. There are three ways in which this distinction can be made, depending on how
the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion # the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
of training instances that should be devoted to validate, or (ii) an integer indicating the # of training instances that should be devoted to validate, or (ii) an integer indicating the
number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to # number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
use for validation. # use for validation.
""" # """
#
@property # @property
def val_split(self): # def val_split(self):
return self.val_split_ # return self.val_split_
#
@val_split.setter # @val_split.setter
def val_split(self, val_split): # def val_split(self, val_split):
if isinstance(val_split, LabelledCollection): # if isinstance(val_split, LabelledCollection):
print('warning: setting val_split with a LabelledCollection will be inefficient in' # print('warning: setting val_split with a LabelledCollection will be inefficient in'
'model selection. Rather pass the LabelledCollection at fit time') # 'model selection. Rather pass the LabelledCollection at fit time')
self.val_split_ = val_split # self.val_split_ = val_split
#
def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None): # def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
print('method from CorrectionbasedAggregativeQuantifier') # print('method from CorrectionbasedAggregativeQuantifier')
if predict_on is None: # if predict_on is None:
predict_on = self.val_split # predict_on = self.val_split
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on) # classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
self.aggregation_fit(classif_predictions, data) # self.aggregation_fit(classif_predictions, data)
return self # return self
@ -352,7 +366,7 @@ class CC(AggregativeCrispQuantifier):
return F.prevalence_from_labels(classif_predictions, self.classes_) return F.prevalence_from_labels(classif_predictions, self.classes_)
class ACC(AggregativeCrispQuantifier, CorrectionbasedAggregativeQuantifier): class ACC(AggregativeCrispQuantifier):
""" """
`Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_, `Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
the "adjusted" variant of :class:`CC`, that corrects the predictions of CC the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
@ -447,7 +461,7 @@ class PCC(AggregativeSoftQuantifier):
return F.prevalence_from_probabilities(classif_posteriors, binarize=False) return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
class PACC(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier): class PACC(AggregativeSoftQuantifier):
""" """
`Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_, `Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier. the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
@ -570,7 +584,7 @@ class EMQ(AggregativeSoftQuantifier):
return qs, ps return qs, ps
class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier): class EMQrecalib(AggregativeSoftQuantifier):
""" """
`Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ), `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
@ -657,7 +671,7 @@ class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier
return posteriors return posteriors
class HDy(AggregativeSoftQuantifier, BinaryQuantifier, CorrectionbasedAggregativeQuantifier): class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
""" """
`Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy). `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
@ -844,7 +858,7 @@ class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
return np.asarray([1 - class1_prev, class1_prev]) return np.asarray([1 - class1_prev, class1_prev])
class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier): class DMy(AggregativeSoftQuantifier):
""" """
Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
@ -865,7 +879,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
:param n_jobs: number of parallel workers (default None) :param n_jobs: number of parallel workers (default None)
""" """
def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD', def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
cdf=False, search='optim_minimize', n_jobs=None): cdf=False, search='optim_minimize', n_jobs=None):
self.classifier = classifier self.classifier = classifier
self.val_split = val_split self.val_split = val_split
@ -875,15 +889,15 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
self.search = search self.search = search
self.n_jobs = n_jobs self.n_jobs = n_jobs
@classmethod # @classmethod
def HDy(cls, classifier, val_split=0.4, n_jobs=None): # def HDy(cls, classifier, val_split=0.4, n_jobs=None):
from quapy.method.meta import MedianEstimator # from quapy.method.meta import MedianEstimator
#
# hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
# hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
# return hdy
hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD') def _get_distributions(self, posteriors):
hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
return hdy
def __get_distributions(self, posteriors):
histograms = [] histograms = []
post_dims = posteriors.shape[1] post_dims = posteriors.shape[1]
if post_dims == 2: if post_dims == 2:
@ -919,9 +933,10 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
n_classes = len(self.classifier.classes_) n_classes = len(self.classifier.classes_)
self.validation_distribution = qp.util.parallel( self.validation_distribution = qp.util.parallel(
func=self.__get_distributions, func=self._get_distributions,
args=[posteriors[true_labels==cat] for cat in range(n_classes)], args=[posteriors[true_labels==cat] for cat in range(n_classes)],
n_jobs=self.n_jobs n_jobs=self.n_jobs,
backend='threading'
) )
def aggregate(self, posteriors: np.ndarray): def aggregate(self, posteriors: np.ndarray):
@ -935,7 +950,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
:param posteriors: posterior probabilities of the instances in the sample :param posteriors: posterior probabilities of the instances in the sample
:return: a vector of class prevalence estimates :return: a vector of class prevalence estimates
""" """
test_distribution = self.__get_distributions(posteriors) test_distribution = self._get_distributions(posteriors)
divergence = get_divergence(self.divergence) divergence = get_divergence(self.divergence)
n_classes, n_channels, nbins = self.validation_distribution.shape n_classes, n_channels, nbins = self.validation_distribution.shape
def loss(prev): def loss(prev):
@ -1449,13 +1464,10 @@ class AggregativeMedianEstimator(BinaryQuantifier):
def _delayed_fit_aggregation(self, args): def _delayed_fit_aggregation(self, args):
with qp.util.temp_seed(self.random_state): with qp.util.temp_seed(self.random_state):
print('\tenter job')
((model, predictions), q_params), training = args ((model, predictions), q_params), training = args
model = deepcopy(model) model = deepcopy(model)
print('fitaggr', model, predictions, len(predictions), print(self.training))
model.set_params(**q_params) model.set_params(**q_params)
model.aggregation_fit(predictions, training) model.aggregation_fit(predictions, training)
print('\texit job')
return model return model
@ -1473,7 +1485,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
((params, training, kwargs) for params in cls_configs), ((params, training, kwargs) for params in cls_configs),
seed=qp.environ.get('_R_SEED', None), seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs, n_jobs=self.n_jobs,
asarray=False asarray=False,
backend='threading'
) )
else: else:
print('only 1') print('only 1')
@ -1482,27 +1495,13 @@ class AggregativeMedianEstimator(BinaryQuantifier):
predictions = model.classifier_fit_predict(training, **kwargs) predictions = model.classifier_fit_predict(training, **kwargs)
models_preds = [(model, predictions)] models_preds = [(model, predictions)]
self.training = training self.models = qp.util.parallel(
self._delayed_fit_aggregation,
self.models = [] ((setup, training) for setup in itertools.product(models_preds, q_configs)),
print('WITHOUT PARALLEL JOBS') seed=qp.environ.get('_R_SEED', None),
for ((model, predictions), q_params) in itertools.product(models_preds, q_configs): n_jobs=self.n_jobs,
print('\tenter job') backend='threading'
model = deepcopy(model) )
print('fitaggr', model, predictions, len(predictions), print(self.training))
model.set_params(**q_params)
model.aggregation_fit(predictions, training)
self.models.append(model)
print('\texit job')
# self.models = qp.util.parallel(
# self._delayed_fit_aggregation,
# ((setup, training) for setup in itertools.product(models_preds, q_configs)),
# seed=qp.environ.get('_R_SEED', None),
# n_jobs=self.n_jobs,
# asarray=False
# )
else: else:
configs = qp.model_selection.expand_grid(self.param_grid) configs = qp.model_selection.expand_grid(self.param_grid)
self.models = qp.util.parallel( self.models = qp.util.parallel(
@ -1510,7 +1509,7 @@ class AggregativeMedianEstimator(BinaryQuantifier):
((params, training) for params in configs), ((params, training) for params in configs),
seed=qp.environ.get('_R_SEED', None), seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs, n_jobs=self.n_jobs,
asarray=False backend='threading'
) )
return self return self
@ -1524,9 +1523,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
((model, instances) for model in self.models), ((model, instances) for model in self.models),
seed=qp.environ.get('_R_SEED', None), seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs, n_jobs=self.n_jobs,
asarray=False backend='threading'
) )
prev_preds = np.asarray(prev_preds)
return np.median(prev_preds, axis=0) return np.median(prev_preds, axis=0)
#--------------------------------------------------------------- #---------------------------------------------------------------

View File

@ -1,6 +1,7 @@
import itertools import itertools
import signal import signal
from copy import deepcopy from copy import deepcopy
from enum import Enum
from typing import Union, Callable from typing import Union, Callable
import numpy as np import numpy as np
@ -10,10 +11,16 @@ import quapy as qp
from quapy import evaluation from quapy import evaluation
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
from quapy.data.base import LabelledCollection from quapy.data.base import LabelledCollection
from quapy.method.aggregative import BaseQuantifier from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
from time import time from time import time
class Status(Enum):
SUCCESS = 1
TIMEOUT = 2
INVALID = 3
ERROR = 4
class GridSearchQ(BaseQuantifier): class GridSearchQ(BaseQuantifier):
"""Grid Search optimization targeting a quantification-oriented metric. """Grid Search optimization targeting a quantification-oriented metric.
@ -69,21 +76,7 @@ class GridSearchQ(BaseQuantifier):
raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}') f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
def fit(self, training: LabelledCollection): def _fit_nonaggregative(self, training):
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
the error metric.
:param training: the training set on which to optimize the hyperparameters
:return: self
"""
protocol = self.protocol
self.param_scores_ = {}
self.best_score_ = None
tinit = time()
configs = expand_grid(self.param_grid) configs = expand_grid(self.param_grid)
self._sout(f'starting model selection with {self.n_jobs =}') self._sout(f'starting model selection with {self.n_jobs =}')
@ -94,34 +87,106 @@ class GridSearchQ(BaseQuantifier):
seed=qp.environ.get('_R_SEED', None), seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs n_jobs=self.n_jobs
) )
return scores
for params, score, model in scores: def _delayed_fit_classifier(self, args):
if score is not None: cls_params, training = args
if self.best_score_ is None or score < self.best_score_: model = deepcopy(self.model)
self.best_score_ = score model.set_params(**cls_params)
self.best_params_ = params predictions = model.classifier_fit_predict(training)
self.best_model_ = model return (model, predictions, cls_params)
self.param_scores_[str(params)] = score
else:
self.param_scores_[str(params)] = 'timeout'
tend = time()-tinit def _eval_aggregative(self, args):
((model, predictions, cls_params), q_params), training = args
model = deepcopy(model)
# overrides default parameters with the parameters being explored at this iteration
model.set_params(**q_params)
model.aggregation_fit(predictions, training)
params = {**cls_params, **q_params}
return model, params
if self.best_score_ is None: def _delayed_evaluation__(self, args):
raise TimeoutError('no combination of hyperparameters seem to work')
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) ' exit_status = Status.SUCCESS
f'[took {tend:.4f}s]')
if self.refit: tinit = time()
if isinstance(protocol, OnLabelledCollectionProtocol): if self.timeout > 0:
self._sout(f'refitting on the whole development set') def handler(signum, frame):
self.best_model_.fit(training + protocol.get_labelled_collection()) raise TimeoutError()
else:
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
return self signal.signal(signal.SIGALRM, handler)
signal.alarm(self.timeout)
try:
model, params = self._eval_aggregative(args)
score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
ttime = time() - tinit
self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
if self.timeout > 0:
signal.alarm(0)
except TimeoutError:
self._sout(f'timeout ({self.timeout}s) reached for config {params}')
score = None
exit_status = Status.TIMEOUT
except ValueError as e:
self._sout(f'the combination of hyperparameters {params} is invalid')
score = None
exit_status = Status.INVALID
except Exception as e:
self._sout(f'something went wrong for config {params}; skipping:')
self._sout(f'\tException: {e}')
score = None
exit_status = Status.ERROR
return params, score, model, exit_status
# def _delayed_fit_aggregation_and_eval(self, args):
#
# ((model, predictions, cls_params), q_params), training = args
# exit_status = Status.SUCCESS
#
# tinit = time()
# if self.timeout > 0:
# def handler(signum, frame):
# raise TimeoutError()
# signal.signal(signal.SIGALRM, handler)
# signal.alarm(self.timeout)
#
# try:
# model = deepcopy(model)
# # overrides default parameters with the parameters being explored at this iteration
# model.set_params(**q_params)
# model.aggregation_fit(predictions, training)
# score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
#
# ttime = time() - tinit
# self._sout(f'hyperparams=[cls:{cls_params}, q:{q_params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
#
# if self.timeout > 0:
# signal.alarm(0)
# except TimeoutError:
# self._sout(f'timeout ({self.timeout}s) reached for config {q_params}')
# score = None
# exit_status = Status.TIMEOUT
# except ValueError as e:
# self._sout(f'the combination of hyperparameters {q_params} is invalid')
# score = None
# exit_status = Status.INVALID
# except Exception as e:
# self._sout(f'something went wrong for config {q_params}; skipping:')
# self._sout(f'\tException: {e}')
# score = None
# exit_status = Status.ERROR
#
# params = {**cls_params, **q_params}
# return params, score, model, exit_status
def _delayed_eval(self, args): def _delayed_eval(self, args):
params, training = args params, training = args
@ -163,8 +228,83 @@ class GridSearchQ(BaseQuantifier):
self._sout(f'\tException: {e}') self._sout(f'\tException: {e}')
score = None score = None
return params, score, model return params, score, model, status
def _fit_aggregative(self, training):
# break down the set of hyperparameters into two: classifier-specific, quantifier-specific
cls_configs, q_configs = group_params(self.param_grid)
# train all classifiers and get the predictions
models_preds_clsconfigs = qp.util.parallel(
self._delayed_fit_classifier,
((params, training) for params in cls_configs),
seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs,
asarray=False,
)
# explore the quantifier-specific hyperparameters for each training configuration
scores = qp.util.parallel(
self._delayed_fit_aggregation_and_eval,
((setup, training) for setup in itertools.product(models_preds_clsconfigs, q_configs)),
seed=qp.environ.get('_R_SEED', None),
n_jobs=self.n_jobs
)
return scores
def fit(self, training: LabelledCollection):
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
the error metric.
:param training: the training set on which to optimize the hyperparameters
:return: self
"""
if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol):
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
tinit = time()
if isinstance(self.model, AggregativeQuantifier):
self.results = self._fit_aggregative(training)
else:
self.results = self._fit_nonaggregative(training)
self.param_scores_ = {}
self.best_score_ = None
for params, score, model in self.results:
if score is not None:
if self.best_score_ is None or score < self.best_score_:
self.best_score_ = score
self.best_params_ = params
self.best_model_ = model
self.param_scores_[str(params)] = score
else:
self.param_scores_[str(params)] = 'timeout'
tend = time()-tinit
if self.best_score_ is None:
raise TimeoutError('no combination of hyperparameters seem to work')
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
f'[took {tend:.4f}s]')
if self.refit:
if isinstance(self.protocol, OnLabelledCollectionProtocol):
tinit = time()
self._sout(f'refitting on the whole development set')
self.best_model_.fit(training + self.protocol.get_labelled_collection())
tend = time() - tinit
self.refit_time_ = tend
else:
raise RuntimeWarning(f'the model cannot be refit on the whole dataset')
return self
def quantify(self, instances): def quantify(self, instances):
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method. """Estimate class prevalence values using the best model found after calling the :meth:`fit` method.

View File

@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs):
return list(itertools.chain.from_iterable(results)) return list(itertools.chain.from_iterable(results))
def parallel(func, args, n_jobs, seed=None, asarray=True): def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
""" """
A wrapper of multiprocessing: A wrapper of multiprocessing:
@ -58,7 +58,7 @@ def parallel(func, args, n_jobs, seed=None, asarray=True):
stack.enter_context(qp.util.temp_seed(seed)) stack.enter_context(qp.util.temp_seed(seed))
return func(*args) return func(*args)
out = Parallel(n_jobs=n_jobs)( out = Parallel(n_jobs=n_jobs, backend=backend)(
delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args) delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
) )
if asarray: if asarray: