model seletion in two levels, classifier oriented and quantifier oriented
This commit is contained in:
parent
e870d798b7
commit
513c78f1f3
|
@ -2,7 +2,9 @@ import quapy as qp
|
|||
from quapy.protocol import APP
|
||||
from quapy.method.aggregative import DMy
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from examples.comparing_gridsearch import OLD_GridSearchQ
|
||||
import numpy as np
|
||||
from time import time
|
||||
|
||||
"""
|
||||
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
||||
|
@ -15,35 +17,44 @@ qp.environ['N_JOBS'] = -1
|
|||
|
||||
training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
|
||||
|
||||
# The model will be returned by the fit method of GridSearchQ.
|
||||
# Every combination of hyper-parameters will be evaluated by confronting the
|
||||
# quantifier thus configured against a series of samples generated by means
|
||||
# of a sample generation protocol. For this example, we will use the
|
||||
# artificial-prevalence protocol (APP), that generates samples with prevalence
|
||||
# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
|
||||
# We devote 30% of the dataset for this exploration.
|
||||
training, validation = training.split_stratified(train_prop=0.7)
|
||||
protocol = APP(validation)
|
||||
with qp.util.temp_seed(0):
|
||||
|
||||
# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
|
||||
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
|
||||
# (e.g., the number of bins in a DistributionMatching quantifier.
|
||||
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
|
||||
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
||||
# classifier.
|
||||
param_grid = {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'nbins': [8, 16, 32, 64],
|
||||
}
|
||||
# The model will be returned by the fit method of GridSearchQ.
|
||||
# Every combination of hyper-parameters will be evaluated by confronting the
|
||||
# quantifier thus configured against a series of samples generated by means
|
||||
# of a sample generation protocol. For this example, we will use the
|
||||
# artificial-prevalence protocol (APP), that generates samples with prevalence
|
||||
# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
|
||||
# We devote 30% of the dataset for this exploration.
|
||||
training, validation = training.split_stratified(train_prop=0.7)
|
||||
protocol = APP(validation)
|
||||
|
||||
model = qp.model_selection.GridSearchQ(
|
||||
model=model,
|
||||
param_grid=param_grid,
|
||||
protocol=protocol,
|
||||
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
||||
refit=True, # retrain on the whole labelled set once done
|
||||
verbose=True # show information as the process goes on
|
||||
).fit(training)
|
||||
# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
|
||||
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
|
||||
# (e.g., the number of bins in a DistributionMatching quantifier.
|
||||
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
|
||||
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
||||
# classifier.
|
||||
param_grid = {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': ['balanced', None],
|
||||
'nbins': [8, 16, 32, 64],
|
||||
}
|
||||
|
||||
tinit = time()
|
||||
|
||||
|
||||
# model = OLD_GridSearchQ(
|
||||
model = qp.model_selection.GridSearchQ(
|
||||
model=model,
|
||||
param_grid=param_grid,
|
||||
protocol=protocol,
|
||||
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
||||
refit=False, # retrain on the whole labelled set once done
|
||||
verbose=True # show information as the process goes on
|
||||
).fit(training)
|
||||
|
||||
tend = time()
|
||||
|
||||
print(f'model selection ended: best hyper-parameters={model.best_params_}')
|
||||
model = model.best_model_
|
||||
|
@ -53,5 +64,5 @@ model = model.best_model_
|
|||
mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
|
||||
|
||||
print(f'MAE={mae_score:.5f}')
|
||||
|
||||
print(f'model selection took {tend-tinit}s')
|
||||
|
||||
|
|
|
@ -37,7 +37,20 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
|||
and :meth:`aggregate`.
|
||||
"""
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
||||
val_split_ = None
|
||||
|
||||
@property
|
||||
def val_split(self):
|
||||
return self.val_split_
|
||||
|
||||
@val_split.setter
|
||||
def val_split(self, val_split):
|
||||
if isinstance(val_split, LabelledCollection):
|
||||
print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
||||
'model selection. Rather pass the LabelledCollection at fit time')
|
||||
self.val_split_ = val_split
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
|
||||
"""
|
||||
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
|
||||
|
||||
|
@ -46,7 +59,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
|||
learner has been trained outside the quantifier.
|
||||
:return: self
|
||||
"""
|
||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier)
|
||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
|
||||
self.aggregation_fit(classif_predictions, data)
|
||||
return self
|
||||
|
||||
|
@ -69,6 +82,9 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
|||
|
||||
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
|
||||
|
||||
if predict_on is None:
|
||||
predict_on = self.val_split
|
||||
|
||||
if predict_on is None:
|
||||
if fit_classifier:
|
||||
self.classifier.fit(*data.Xy)
|
||||
|
@ -228,7 +244,6 @@ class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
|
|||
|
||||
:return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
|
||||
"""
|
||||
print('using predict')
|
||||
return 'predict'
|
||||
|
||||
def _check_classifier(self, adapt_if_necessary=False):
|
||||
|
@ -264,7 +279,6 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
|
|||
|
||||
:return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
|
||||
"""
|
||||
print('using predict_proba')
|
||||
return 'predict_proba'
|
||||
|
||||
def _check_classifier(self, adapt_if_necessary=False):
|
||||
|
@ -289,35 +303,35 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
|
|||
|
||||
|
||||
|
||||
class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
|
||||
"""
|
||||
Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
|
||||
at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
|
||||
is not the training set. There are three ways in which this distinction can be made, depending on how
|
||||
the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
|
||||
of training instances that should be devoted to validate, or (ii) an integer indicating the
|
||||
number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
|
||||
use for validation.
|
||||
"""
|
||||
|
||||
@property
|
||||
def val_split(self):
|
||||
return self.val_split_
|
||||
|
||||
@val_split.setter
|
||||
def val_split(self, val_split):
|
||||
if isinstance(val_split, LabelledCollection):
|
||||
print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
||||
'model selection. Rather pass the LabelledCollection at fit time')
|
||||
self.val_split_ = val_split
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
|
||||
print('method from CorrectionbasedAggregativeQuantifier')
|
||||
if predict_on is None:
|
||||
predict_on = self.val_split
|
||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
|
||||
self.aggregation_fit(classif_predictions, data)
|
||||
return self
|
||||
# class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
|
||||
# """
|
||||
# Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
|
||||
# at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
|
||||
# is not the training set. There are three ways in which this distinction can be made, depending on how
|
||||
# the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
|
||||
# of training instances that should be devoted to validate, or (ii) an integer indicating the
|
||||
# number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
|
||||
# use for validation.
|
||||
# """
|
||||
#
|
||||
# @property
|
||||
# def val_split(self):
|
||||
# return self.val_split_
|
||||
#
|
||||
# @val_split.setter
|
||||
# def val_split(self, val_split):
|
||||
# if isinstance(val_split, LabelledCollection):
|
||||
# print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
||||
# 'model selection. Rather pass the LabelledCollection at fit time')
|
||||
# self.val_split_ = val_split
|
||||
#
|
||||
# def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
|
||||
# print('method from CorrectionbasedAggregativeQuantifier')
|
||||
# if predict_on is None:
|
||||
# predict_on = self.val_split
|
||||
# classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
|
||||
# self.aggregation_fit(classif_predictions, data)
|
||||
# return self
|
||||
|
||||
|
||||
|
||||
|
@ -352,7 +366,7 @@ class CC(AggregativeCrispQuantifier):
|
|||
return F.prevalence_from_labels(classif_predictions, self.classes_)
|
||||
|
||||
|
||||
class ACC(AggregativeCrispQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||
class ACC(AggregativeCrispQuantifier):
|
||||
"""
|
||||
`Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
|
||||
the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
|
||||
|
@ -447,7 +461,7 @@ class PCC(AggregativeSoftQuantifier):
|
|||
return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
|
||||
|
||||
|
||||
class PACC(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||
class PACC(AggregativeSoftQuantifier):
|
||||
"""
|
||||
`Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
|
||||
the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
|
||||
|
@ -570,7 +584,7 @@ class EMQ(AggregativeSoftQuantifier):
|
|||
return qs, ps
|
||||
|
||||
|
||||
class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||
class EMQrecalib(AggregativeSoftQuantifier):
|
||||
"""
|
||||
`Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
|
||||
aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
|
||||
|
@ -657,7 +671,7 @@ class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier
|
|||
return posteriors
|
||||
|
||||
|
||||
class HDy(AggregativeSoftQuantifier, BinaryQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||
class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
|
||||
"""
|
||||
`Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
|
||||
HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
|
||||
|
@ -844,7 +858,7 @@ class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
|
|||
return np.asarray([1 - class1_prev, class1_prev])
|
||||
|
||||
|
||||
class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||
class DMy(AggregativeSoftQuantifier):
|
||||
"""
|
||||
Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
|
||||
probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
|
||||
|
@ -865,7 +879,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
|||
:param n_jobs: number of parallel workers (default None)
|
||||
"""
|
||||
|
||||
def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD',
|
||||
def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
|
||||
cdf=False, search='optim_minimize', n_jobs=None):
|
||||
self.classifier = classifier
|
||||
self.val_split = val_split
|
||||
|
@ -875,15 +889,15 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
|||
self.search = search
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
@classmethod
|
||||
def HDy(cls, classifier, val_split=0.4, n_jobs=None):
|
||||
from quapy.method.meta import MedianEstimator
|
||||
# @classmethod
|
||||
# def HDy(cls, classifier, val_split=0.4, n_jobs=None):
|
||||
# from quapy.method.meta import MedianEstimator
|
||||
#
|
||||
# hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
|
||||
# hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
|
||||
# return hdy
|
||||
|
||||
hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
|
||||
hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
|
||||
return hdy
|
||||
|
||||
def __get_distributions(self, posteriors):
|
||||
def _get_distributions(self, posteriors):
|
||||
histograms = []
|
||||
post_dims = posteriors.shape[1]
|
||||
if post_dims == 2:
|
||||
|
@ -919,9 +933,10 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
|||
n_classes = len(self.classifier.classes_)
|
||||
|
||||
self.validation_distribution = qp.util.parallel(
|
||||
func=self.__get_distributions,
|
||||
func=self._get_distributions,
|
||||
args=[posteriors[true_labels==cat] for cat in range(n_classes)],
|
||||
n_jobs=self.n_jobs
|
||||
n_jobs=self.n_jobs,
|
||||
backend='threading'
|
||||
)
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
|
@ -935,7 +950,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
|||
:param posteriors: posterior probabilities of the instances in the sample
|
||||
:return: a vector of class prevalence estimates
|
||||
"""
|
||||
test_distribution = self.__get_distributions(posteriors)
|
||||
test_distribution = self._get_distributions(posteriors)
|
||||
divergence = get_divergence(self.divergence)
|
||||
n_classes, n_channels, nbins = self.validation_distribution.shape
|
||||
def loss(prev):
|
||||
|
@ -1449,13 +1464,10 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
|||
|
||||
def _delayed_fit_aggregation(self, args):
|
||||
with qp.util.temp_seed(self.random_state):
|
||||
print('\tenter job')
|
||||
((model, predictions), q_params), training = args
|
||||
model = deepcopy(model)
|
||||
print('fitaggr', model, predictions, len(predictions), print(self.training))
|
||||
model.set_params(**q_params)
|
||||
model.aggregation_fit(predictions, training)
|
||||
print('\texit job')
|
||||
return model
|
||||
|
||||
|
||||
|
@ -1473,7 +1485,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
|||
((params, training, kwargs) for params in cls_configs),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs,
|
||||
asarray=False
|
||||
asarray=False,
|
||||
backend='threading'
|
||||
)
|
||||
else:
|
||||
print('only 1')
|
||||
|
@ -1482,27 +1495,13 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
|||
predictions = model.classifier_fit_predict(training, **kwargs)
|
||||
models_preds = [(model, predictions)]
|
||||
|
||||
self.training = training
|
||||
|
||||
self.models = []
|
||||
print('WITHOUT PARALLEL JOBS')
|
||||
for ((model, predictions), q_params) in itertools.product(models_preds, q_configs):
|
||||
print('\tenter job')
|
||||
model = deepcopy(model)
|
||||
print('fitaggr', model, predictions, len(predictions), print(self.training))
|
||||
model.set_params(**q_params)
|
||||
model.aggregation_fit(predictions, training)
|
||||
self.models.append(model)
|
||||
print('\texit job')
|
||||
|
||||
|
||||
# self.models = qp.util.parallel(
|
||||
# self._delayed_fit_aggregation,
|
||||
# ((setup, training) for setup in itertools.product(models_preds, q_configs)),
|
||||
# seed=qp.environ.get('_R_SEED', None),
|
||||
# n_jobs=self.n_jobs,
|
||||
# asarray=False
|
||||
# )
|
||||
self.models = qp.util.parallel(
|
||||
self._delayed_fit_aggregation,
|
||||
((setup, training) for setup in itertools.product(models_preds, q_configs)),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs,
|
||||
backend='threading'
|
||||
)
|
||||
else:
|
||||
configs = qp.model_selection.expand_grid(self.param_grid)
|
||||
self.models = qp.util.parallel(
|
||||
|
@ -1510,7 +1509,7 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
|||
((params, training) for params in configs),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs,
|
||||
asarray=False
|
||||
backend='threading'
|
||||
)
|
||||
return self
|
||||
|
||||
|
@ -1524,9 +1523,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
|||
((model, instances) for model in self.models),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs,
|
||||
asarray=False
|
||||
backend='threading'
|
||||
)
|
||||
prev_preds = np.asarray(prev_preds)
|
||||
return np.median(prev_preds, axis=0)
|
||||
|
||||
#---------------------------------------------------------------
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import itertools
|
||||
import signal
|
||||
from copy import deepcopy
|
||||
from enum import Enum
|
||||
from typing import Union, Callable
|
||||
|
||||
import numpy as np
|
||||
|
@ -10,10 +11,16 @@ import quapy as qp
|
|||
from quapy import evaluation
|
||||
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
from quapy.method.aggregative import BaseQuantifier
|
||||
from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
|
||||
from time import time
|
||||
|
||||
|
||||
class Status(Enum):
|
||||
SUCCESS = 1
|
||||
TIMEOUT = 2
|
||||
INVALID = 3
|
||||
ERROR = 4
|
||||
|
||||
class GridSearchQ(BaseQuantifier):
|
||||
"""Grid Search optimization targeting a quantification-oriented metric.
|
||||
|
||||
|
@ -69,21 +76,7 @@ class GridSearchQ(BaseQuantifier):
|
|||
raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
|
||||
f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
|
||||
|
||||
def fit(self, training: LabelledCollection):
|
||||
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||
the error metric.
|
||||
|
||||
:param training: the training set on which to optimize the hyperparameters
|
||||
:return: self
|
||||
"""
|
||||
|
||||
protocol = self.protocol
|
||||
|
||||
self.param_scores_ = {}
|
||||
self.best_score_ = None
|
||||
|
||||
tinit = time()
|
||||
|
||||
def _fit_nonaggregative(self, training):
|
||||
configs = expand_grid(self.param_grid)
|
||||
|
||||
self._sout(f'starting model selection with {self.n_jobs =}')
|
||||
|
@ -94,34 +87,106 @@ class GridSearchQ(BaseQuantifier):
|
|||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs
|
||||
)
|
||||
return scores
|
||||
|
||||
for params, score, model in scores:
|
||||
if score is not None:
|
||||
if self.best_score_ is None or score < self.best_score_:
|
||||
self.best_score_ = score
|
||||
self.best_params_ = params
|
||||
self.best_model_ = model
|
||||
self.param_scores_[str(params)] = score
|
||||
else:
|
||||
self.param_scores_[str(params)] = 'timeout'
|
||||
def _delayed_fit_classifier(self, args):
|
||||
cls_params, training = args
|
||||
model = deepcopy(self.model)
|
||||
model.set_params(**cls_params)
|
||||
predictions = model.classifier_fit_predict(training)
|
||||
return (model, predictions, cls_params)
|
||||
|
||||
tend = time()-tinit
|
||||
def _eval_aggregative(self, args):
|
||||
((model, predictions, cls_params), q_params), training = args
|
||||
model = deepcopy(model)
|
||||
# overrides default parameters with the parameters being explored at this iteration
|
||||
model.set_params(**q_params)
|
||||
model.aggregation_fit(predictions, training)
|
||||
params = {**cls_params, **q_params}
|
||||
return model, params
|
||||
|
||||
if self.best_score_ is None:
|
||||
raise TimeoutError('no combination of hyperparameters seem to work')
|
||||
def _delayed_evaluation__(self, args):
|
||||
|
||||
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
|
||||
f'[took {tend:.4f}s]')
|
||||
exit_status = Status.SUCCESS
|
||||
|
||||
if self.refit:
|
||||
if isinstance(protocol, OnLabelledCollectionProtocol):
|
||||
self._sout(f'refitting on the whole development set')
|
||||
self.best_model_.fit(training + protocol.get_labelled_collection())
|
||||
else:
|
||||
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
|
||||
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
|
||||
tinit = time()
|
||||
if self.timeout > 0:
|
||||
def handler(signum, frame):
|
||||
raise TimeoutError()
|
||||
|
||||
return self
|
||||
signal.signal(signal.SIGALRM, handler)
|
||||
signal.alarm(self.timeout)
|
||||
|
||||
try:
|
||||
model, params = self._eval_aggregative(args)
|
||||
|
||||
score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
|
||||
|
||||
ttime = time() - tinit
|
||||
self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
|
||||
|
||||
if self.timeout > 0:
|
||||
signal.alarm(0)
|
||||
|
||||
except TimeoutError:
|
||||
self._sout(f'timeout ({self.timeout}s) reached for config {params}')
|
||||
score = None
|
||||
exit_status = Status.TIMEOUT
|
||||
|
||||
except ValueError as e:
|
||||
self._sout(f'the combination of hyperparameters {params} is invalid')
|
||||
score = None
|
||||
exit_status = Status.INVALID
|
||||
|
||||
except Exception as e:
|
||||
self._sout(f'something went wrong for config {params}; skipping:')
|
||||
self._sout(f'\tException: {e}')
|
||||
score = None
|
||||
exit_status = Status.ERROR
|
||||
|
||||
|
||||
return params, score, model, exit_status
|
||||
|
||||
# def _delayed_fit_aggregation_and_eval(self, args):
|
||||
#
|
||||
# ((model, predictions, cls_params), q_params), training = args
|
||||
# exit_status = Status.SUCCESS
|
||||
#
|
||||
# tinit = time()
|
||||
# if self.timeout > 0:
|
||||
# def handler(signum, frame):
|
||||
# raise TimeoutError()
|
||||
# signal.signal(signal.SIGALRM, handler)
|
||||
# signal.alarm(self.timeout)
|
||||
#
|
||||
# try:
|
||||
# model = deepcopy(model)
|
||||
# # overrides default parameters with the parameters being explored at this iteration
|
||||
# model.set_params(**q_params)
|
||||
# model.aggregation_fit(predictions, training)
|
||||
# score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
|
||||
#
|
||||
# ttime = time() - tinit
|
||||
# self._sout(f'hyperparams=[cls:{cls_params}, q:{q_params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
|
||||
#
|
||||
# if self.timeout > 0:
|
||||
# signal.alarm(0)
|
||||
# except TimeoutError:
|
||||
# self._sout(f'timeout ({self.timeout}s) reached for config {q_params}')
|
||||
# score = None
|
||||
# exit_status = Status.TIMEOUT
|
||||
# except ValueError as e:
|
||||
# self._sout(f'the combination of hyperparameters {q_params} is invalid')
|
||||
# score = None
|
||||
# exit_status = Status.INVALID
|
||||
# except Exception as e:
|
||||
# self._sout(f'something went wrong for config {q_params}; skipping:')
|
||||
# self._sout(f'\tException: {e}')
|
||||
# score = None
|
||||
# exit_status = Status.ERROR
|
||||
#
|
||||
# params = {**cls_params, **q_params}
|
||||
# return params, score, model, exit_status
|
||||
|
||||
def _delayed_eval(self, args):
|
||||
params, training = args
|
||||
|
@ -163,8 +228,83 @@ class GridSearchQ(BaseQuantifier):
|
|||
self._sout(f'\tException: {e}')
|
||||
score = None
|
||||
|
||||
return params, score, model
|
||||
return params, score, model, status
|
||||
|
||||
def _fit_aggregative(self, training):
|
||||
|
||||
# break down the set of hyperparameters into two: classifier-specific, quantifier-specific
|
||||
cls_configs, q_configs = group_params(self.param_grid)
|
||||
|
||||
# train all classifiers and get the predictions
|
||||
models_preds_clsconfigs = qp.util.parallel(
|
||||
self._delayed_fit_classifier,
|
||||
((params, training) for params in cls_configs),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs,
|
||||
asarray=False,
|
||||
)
|
||||
|
||||
# explore the quantifier-specific hyperparameters for each training configuration
|
||||
scores = qp.util.parallel(
|
||||
self._delayed_fit_aggregation_and_eval,
|
||||
((setup, training) for setup in itertools.product(models_preds_clsconfigs, q_configs)),
|
||||
seed=qp.environ.get('_R_SEED', None),
|
||||
n_jobs=self.n_jobs
|
||||
)
|
||||
|
||||
return scores
|
||||
|
||||
|
||||
def fit(self, training: LabelledCollection):
|
||||
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||
the error metric.
|
||||
|
||||
:param training: the training set on which to optimize the hyperparameters
|
||||
:return: self
|
||||
"""
|
||||
|
||||
if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol):
|
||||
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
|
||||
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
|
||||
|
||||
tinit = time()
|
||||
|
||||
if isinstance(self.model, AggregativeQuantifier):
|
||||
self.results = self._fit_aggregative(training)
|
||||
else:
|
||||
self.results = self._fit_nonaggregative(training)
|
||||
|
||||
self.param_scores_ = {}
|
||||
self.best_score_ = None
|
||||
for params, score, model in self.results:
|
||||
if score is not None:
|
||||
if self.best_score_ is None or score < self.best_score_:
|
||||
self.best_score_ = score
|
||||
self.best_params_ = params
|
||||
self.best_model_ = model
|
||||
self.param_scores_[str(params)] = score
|
||||
else:
|
||||
self.param_scores_[str(params)] = 'timeout'
|
||||
|
||||
tend = time()-tinit
|
||||
|
||||
if self.best_score_ is None:
|
||||
raise TimeoutError('no combination of hyperparameters seem to work')
|
||||
|
||||
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
|
||||
f'[took {tend:.4f}s]')
|
||||
|
||||
if self.refit:
|
||||
if isinstance(self.protocol, OnLabelledCollectionProtocol):
|
||||
tinit = time()
|
||||
self._sout(f'refitting on the whole development set')
|
||||
self.best_model_.fit(training + self.protocol.get_labelled_collection())
|
||||
tend = time() - tinit
|
||||
self.refit_time_ = tend
|
||||
else:
|
||||
raise RuntimeWarning(f'the model cannot be refit on the whole dataset')
|
||||
|
||||
return self
|
||||
|
||||
def quantify(self, instances):
|
||||
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
||||
|
|
|
@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs):
|
|||
return list(itertools.chain.from_iterable(results))
|
||||
|
||||
|
||||
def parallel(func, args, n_jobs, seed=None, asarray=True):
|
||||
def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
|
||||
"""
|
||||
A wrapper of multiprocessing:
|
||||
|
||||
|
@ -58,7 +58,7 @@ def parallel(func, args, n_jobs, seed=None, asarray=True):
|
|||
stack.enter_context(qp.util.temp_seed(seed))
|
||||
return func(*args)
|
||||
|
||||
out = Parallel(n_jobs=n_jobs)(
|
||||
out = Parallel(n_jobs=n_jobs, backend=backend)(
|
||||
delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
|
||||
)
|
||||
if asarray:
|
||||
|
|
Loading…
Reference in New Issue