From a2ec72496a988e132b785cf6c61b5e87c1b94b8e Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Tue, 9 Feb 2021 11:48:16 +0100 Subject: [PATCH] adding eval_budget to evaluation functions --- quapy/evaluation.py | 46 ++++++++++++++++++++++++++++++++++++---- quapy/model_selection.py | 35 ++++++------------------------ test.py | 2 +- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/quapy/evaluation.py b/quapy/evaluation.py index 5f5205c..f4f0411 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -11,12 +11,14 @@ from quapy.util import temp_seed import quapy.functional as F import pandas as pd + def artificial_sampling_prediction( model: BaseQuantifier, test: LabelledCollection, sample_size, n_prevpoints=210, n_repetitions=1, + eval_budget: int = None, n_jobs=1, random_seed=42, verbose=True @@ -26,8 +28,12 @@ def artificial_sampling_prediction( :param model: the model in charge of generating the class prevalence estimations :param test: the test set on which to perform arificial sampling :param sample_size: the size of the samples - :param n_prevpoints: the number of different prevalences to sample + :param n_prevpoints: the number of different prevalences to sample (or set to None if eval_budget is specified) :param n_repetitions: the number of repetitions for each prevalence + :param eval_budget: if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3 + classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15 + different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and since setting it n_prevpoints + to 6 would produce more than 20 evaluations. :param n_jobs: number of jobs to be run in parallel :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect any other random process. @@ -37,6 +43,8 @@ def artificial_sampling_prediction( contains the the prevalence estimations """ + n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose) + with temp_seed(random_seed): indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions)) @@ -60,7 +68,7 @@ def artificial_sampling_prediction( estim_prevalence = quantification_func(sample.instances) return true_prevalence, estim_prevalence - pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes + pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs) true_prevalences, estim_prevalences = zip(*results) @@ -76,6 +84,7 @@ def artificial_sampling_report( sample_size, n_prevpoints=210, n_repetitions=1, + eval_budget: int = None, n_jobs=1, random_seed=42, error_metrics:Iterable[Union[str,Callable]]='mae', @@ -90,7 +99,7 @@ def artificial_sampling_report( df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names) true_prevs, estim_prevs = artificial_sampling_prediction( - model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose + model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose ) for true_prev, estim_prev in zip(true_prevs, estim_prevs): series = {'true-prev': true_prev, 'estim-prev': estim_prev} @@ -108,6 +117,7 @@ def artificial_sampling_eval( sample_size, n_prevpoints=210, n_repetitions=1, + eval_budget: int = None, n_jobs=1, random_seed=42, error_metric:Union[str,Callable]='mae', @@ -119,7 +129,7 @@ def artificial_sampling_eval( assert hasattr(error_metric, '__call__'), 'invalid error function' true_prevs, estim_prevs = artificial_sampling_prediction( - model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose + model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose ) return error_metric(true_prevs, estim_prevs) @@ -138,3 +148,31 @@ def _delayed_eval(args): prev_true = test.prevalence() return error(prev_true, prev_estim) + +def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, n_repetitions=1, verbose=True): + if n_prevpoints is None and eval_budget is None: + raise ValueError('either n_prevpoints or eval_budget has to be specified') + elif n_prevpoints is None: + assert eval_budget > 0, 'eval_budget must be a positive integer' + n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions) + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions) + if verbose: + print(f'setting n_prevpoints={n_prevpoints} so that the number of ' + f'evaluations ({eval_computations}) does not exceed the evaluation ' + f'budget ({eval_budget})') + elif eval_budget is None: + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions) + if verbose: + print(f'{eval_computations} evaluations will be performed for each ' + f'combination of hyper-parameters') + else: + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions) + if eval_computations > eval_budget: + n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions) + new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions) + if verbose: + print(f'the budget of evaluations would be exceeded with ' + f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce ' + f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') + return n_prevpoints, eval_computations + diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 6d8664a..f3e9b18 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -18,7 +18,7 @@ class GridSearchQ(BaseQuantifier): sample_size: int, n_prevpoints: int = None, n_repetitions: int = 1, - eval_budget : int = None, + eval_budget: int = None, error: Union[Callable, str] = qp.error.mae, refit=False, val_split=0.4, @@ -86,29 +86,6 @@ class GridSearchQ(BaseQuantifier): raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the' f'proportion of training documents to extract (found) {type(validation)}') - def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes): - if n_prevpoints is None and eval_budget is None: - raise ValueError('either n_prevpoints or eval_budget has to be specified') - elif n_prevpoints is None: - assert eval_budget > 0, 'eval_budget must be a positive integer' - self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions) - eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions) - self.sout(f'setting n_prevpoints={self.n_prevpoints} so that the number of \n' - f'evaluations ({eval_computations}) does not exceed the evaluation budget ({eval_budget})') - elif eval_budget is None: - self.n_prevpoints = n_prevpoints - eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions) - self.sout(f'{eval_computations} evaluations will be performed for each ' - f'combination of hyper-parameters') - else: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions) - if eval_computations > eval_budget: - self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions) - new_eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions) - self.sout(f'the budget of evaluations would be exceeded with\n' - f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={self.n_prevpoints}. This will produce\n' - f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') - def __check_error(self, error): if error in qp.error.QUANTIFICATION_ERROR: self.error = error @@ -130,10 +107,7 @@ class GridSearchQ(BaseQuantifier): val_split = self.val_split training, val_split = self.__check_training_validation(training, val_split) assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer' - self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes) - # print(f'training size={len(training)}') - # print(f'validation size={len(val_split)}') params_keys = list(self.param_grid.keys()) params_values = list(self.param_grid.values()) @@ -161,7 +135,12 @@ class GridSearchQ(BaseQuantifier): model.set_params(**params) model.fit(training) true_prevalences, estim_prevalences = artificial_sampling_prediction( - model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed, + model, val_split, self.sample_size, + n_prevpoints=self.n_prevpoints, + n_repetitions=self.n_repetitions, + eval_budget=self.eval_budget, + n_jobs=n_jobs, + random_seed=self.random_seed, verbose=False ) diff --git a/test.py b/test.py index b7b75f3..3d664b3 100644 --- a/test.py +++ b/test.py @@ -23,7 +23,7 @@ nfolds=5 nrepeats=1 df = pd.DataFrame(columns=['dataset', 'method', 'mse']) -for datasetname in qp.datasets.UCI_DATASETS[2:]: +for datasetname in qp.datasets.UCI_DATASETS: collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False) scores = [] pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)