From a2ec72496a988e132b785cf6c61b5e87c1b94b8e Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 9 Feb 2021 11:48:16 +0100
Subject: [PATCH] adding eval_budget to evaluation functions

---
 quapy/evaluation.py      | 46 ++++++++++++++++++++++++++++++++++++----
 quapy/model_selection.py | 35 ++++++------------------------
 test.py                  |  2 +-
 3 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 5f5205c..f4f0411 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -11,12 +11,14 @@ from quapy.util import temp_seed
 import quapy.functional as F
 import pandas as pd
 
+
 def artificial_sampling_prediction(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
         n_prevpoints=210,
         n_repetitions=1,
+        eval_budget: int = None,
         n_jobs=1,
         random_seed=42,
         verbose=True
@@ -26,8 +28,12 @@ def artificial_sampling_prediction(
     :param model: the model in charge of generating the class prevalence estimations
     :param test: the test set on which to perform arificial sampling
     :param sample_size: the size of the samples
-    :param n_prevpoints: the number of different prevalences to sample
+    :param n_prevpoints: the number of different prevalences to sample (or set to None if eval_budget is specified)
     :param n_repetitions: the number of repetitions for each prevalence
+    :param eval_budget: if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3
+    classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15
+    different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and since setting it n_prevpoints
+    to 6 would produce more than 20 evaluations.
     :param n_jobs: number of jobs to be run in parallel
     :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
     any other random process.
@@ -37,6 +43,8 @@ def artificial_sampling_prediction(
      contains the the prevalence estimations
     """
 
+    n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose)
+
     with temp_seed(random_seed):
         indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
 
@@ -60,7 +68,7 @@ def artificial_sampling_prediction(
         estim_prevalence = quantification_func(sample.instances)
         return true_prevalence, estim_prevalence
 
-    pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes
+    pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes
     results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
 
     true_prevalences, estim_prevalences = zip(*results)
@@ -76,6 +84,7 @@ def artificial_sampling_report(
         sample_size,
         n_prevpoints=210,
         n_repetitions=1,
+        eval_budget: int = None,
         n_jobs=1,
         random_seed=42,
         error_metrics:Iterable[Union[str,Callable]]='mae',
@@ -90,7 +99,7 @@ def artificial_sampling_report(
 
     df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
     true_prevs, estim_prevs = artificial_sampling_prediction(
-        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
+        model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
     )
     for true_prev, estim_prev in zip(true_prevs, estim_prevs):
         series = {'true-prev': true_prev, 'estim-prev': estim_prev}
@@ -108,6 +117,7 @@ def artificial_sampling_eval(
         sample_size,
         n_prevpoints=210,
         n_repetitions=1,
+        eval_budget: int = None,
         n_jobs=1,
         random_seed=42,
         error_metric:Union[str,Callable]='mae',
@@ -119,7 +129,7 @@ def artificial_sampling_eval(
     assert hasattr(error_metric, '__call__'), 'invalid error function'
 
     true_prevs, estim_prevs = artificial_sampling_prediction(
-        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
+        model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
     )
 
     return error_metric(true_prevs, estim_prevs)
@@ -138,3 +148,31 @@ def _delayed_eval(args):
     prev_true  = test.prevalence()
     return error(prev_true, prev_estim)
 
+
+def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, n_repetitions=1, verbose=True):
+    if n_prevpoints is None and eval_budget is None:
+        raise ValueError('either n_prevpoints or eval_budget has to be specified')
+    elif n_prevpoints is None:
+        assert eval_budget > 0, 'eval_budget must be a positive integer'
+        n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
+        eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
+        if verbose:
+            print(f'setting n_prevpoints={n_prevpoints} so that the number of '
+                  f'evaluations ({eval_computations}) does not exceed the evaluation '
+                  f'budget ({eval_budget})')
+    elif eval_budget is None:
+        eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
+        if verbose:
+            print(f'{eval_computations} evaluations will be performed for each '
+                  f'combination of hyper-parameters')
+    else:
+        eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
+        if eval_computations > eval_budget:
+            n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
+            new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
+            if verbose:
+                print(f'the budget of evaluations would be exceeded with '
+                  f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce '
+                  f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
+    return n_prevpoints, eval_computations
+
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 6d8664a..f3e9b18 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -18,7 +18,7 @@ class GridSearchQ(BaseQuantifier):
                  sample_size: int,
                  n_prevpoints: int = None,
                  n_repetitions: int = 1,
-                 eval_budget : int = None,
+                 eval_budget: int = None,
                  error: Union[Callable, str] = qp.error.mae,
                  refit=False,
                  val_split=0.4,
@@ -86,29 +86,6 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
                              f'proportion of training documents to extract (found) {type(validation)}')
 
-    def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
-        if n_prevpoints is None and eval_budget is None:
-            raise ValueError('either n_prevpoints or eval_budget has to be specified')
-        elif n_prevpoints is None:
-            assert eval_budget > 0, 'eval_budget must be a positive integer'
-            self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
-            eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
-            self.sout(f'setting n_prevpoints={self.n_prevpoints} so that the number of \n'
-                  f'evaluations ({eval_computations}) does not exceed the evaluation budget ({eval_budget})')
-        elif eval_budget is None:
-            self.n_prevpoints = n_prevpoints
-            eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
-            self.sout(f'{eval_computations} evaluations will be performed for each '
-                  f'combination of hyper-parameters')
-        else:
-            eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
-            if eval_computations > eval_budget:
-                self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
-                new_eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
-                self.sout(f'the budget of evaluations would be exceeded with\n'
-                      f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={self.n_prevpoints}. This will produce\n'
-                      f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
-
     def __check_error(self, error):
         if error in qp.error.QUANTIFICATION_ERROR:
             self.error = error
@@ -130,10 +107,7 @@ class GridSearchQ(BaseQuantifier):
             val_split = self.val_split
         training, val_split = self.__check_training_validation(training, val_split)
         assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
-        self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes)
 
-        # print(f'training size={len(training)}')
-        # print(f'validation size={len(val_split)}')
         params_keys = list(self.param_grid.keys())
         params_values = list(self.param_grid.values())
 
@@ -161,7 +135,12 @@ class GridSearchQ(BaseQuantifier):
                 model.set_params(**params)
                 model.fit(training)
                 true_prevalences, estim_prevalences = artificial_sampling_prediction(
-                    model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
+                    model, val_split, self.sample_size,
+                    n_prevpoints=self.n_prevpoints,
+                    n_repetitions=self.n_repetitions,
+                    eval_budget=self.eval_budget,
+                    n_jobs=n_jobs,
+                    random_seed=self.random_seed,
                     verbose=False
                 )
 
diff --git a/test.py b/test.py
index b7b75f3..3d664b3 100644
--- a/test.py
+++ b/test.py
@@ -23,7 +23,7 @@ nfolds=5
 nrepeats=1
 
 df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
-for datasetname in qp.datasets.UCI_DATASETS[2:]:
+for datasetname in qp.datasets.UCI_DATASETS:
     collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
     scores = []
     pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)