setting a timeout for model_selection combinations in order to prevent some combinations to stuck the model selection

2021-01-15 17:42:19 +01:00 · 2021-01-15 17:42:19 +01:00 · 865dafaefc
parent 43ed808945
commit 865dafaefc
6 changed files with 52 additions and 34 deletions
--- a/TweetSentQuant/experiments.py
+++ b/TweetSentQuant/experiments.py
@ -7,7 +7,6 @@ import os
 import pickle
 import itertools
 from joblib import Parallel, delayed
-import multiprocessing
 import settings


@ -78,6 +77,7 @@ def run(experiment):
        return
    else:
        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
+    return

    benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
    benchmark_devel.stats()
@ -91,6 +91,7 @@ def run(experiment):
        n_repetitions=5,
        error=optim_loss,
        refit=False,
+        timeout=60*60,
        verbose=True
    )
    model_selection.fit(benchmark_devel.training, benchmark_devel.test)
--- a/TweetSentQuant/gen_tables.py
+++ b/TweetSentQuant/gen_tables.py
@ -144,7 +144,6 @@ for i, eval_func in enumerate(evaluation_measures):

    save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)

-
    # Tables ranks for AE and RAE (two tables)
    # ----------------------------------------------------
    methods = gao_seb_methods
--- a/TweetSentQuant/tabular.py
+++ b/TweetSentQuant/tabular.py
@ -95,11 +95,6 @@ class Table:
                    normval = 1 - normval
                self.map['color'][i, col_idx] = color_red2green_01(normval)

-    def _addlatex(self):
-        return
-        for i,j in self._getfilled():
-            self.map['latex'][i,j] = self.latex(self.rows[i], self.cols[j])
-

    def _run_ttest(self, row, col1, col2):
        mean1 = self.map['mean'][row, col1]
@ -153,7 +148,6 @@ class Table:
        self._addrank()
        self._addcolor()
        self._addttest()
-        self._addlatex()
        if self.add_average:
            self._addave()
        self.modif = False
--- a/quapy/init.py
+++ b/quapy/init.py
@ -20,4 +20,4 @@ environ = {


 def isbinary(x):
-    return data.isbinary(x) or method.isbinary(x)
+    return x.binary
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -6,6 +6,7 @@ from method.aggregative import BaseQuantifier
 from typing import Union, Callable
 import functional as F
 from copy import deepcopy
+import signal


 class GridSearchQ(BaseQuantifier):
@ -21,6 +22,7 @@ class GridSearchQ(BaseQuantifier):
                 refit=False,
                 n_jobs=-1,
                 random_seed=42,
+                 timeout=-1,
                 verbose=False):
        """
        Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation
@ -48,6 +50,9 @@ class GridSearchQ(BaseQuantifier):
        the best chosen hyperparameter combination
        :param n_jobs: number of parallel jobs
        :param random_seed: set the seed of the random generator to replicate experiments
+        :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.
+        Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up
+        being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.
        :param verbose: set to True to get information through the stdout
        """
        self.model = model
@ -59,8 +64,8 @@ class GridSearchQ(BaseQuantifier):
        self.refit = refit
        self.n_jobs = n_jobs
        self.random_seed = random_seed
+        self.timeout = timeout
        self.verbose = verbose
-
        self.__check_error(error)

    def sout(self, msg):
@ -129,28 +134,48 @@ class GridSearchQ(BaseQuantifier):
        model = self.model
        n_jobs = self.n_jobs

+        if self.timeout > 0:
+            def handler(signum, frame):
+                self.sout('timeout reached')
+                raise TimeoutError()
+            signal.signal(signal.SIGALRM, handler)
+
        self.sout(f'starting optimization with n_jobs={n_jobs}')
        self.param_scores_ = {}
        self.best_score_ = None
+        some_timeouts = False
        for values in itertools.product(*params_values):
            params = {k: values[i] for i, k in enumerate(params_keys)}

-            # overrides default parameters with the parameters being explored at this iteration
-            model.set_params(**params)
-            model.fit(training)
-            true_prevalences, estim_prevalences = artificial_sampling_prediction(
-                model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
-                verbose=False
-            )
+            if self.timeout > 0:
+                signal.alarm(self.timeout)

-            score = self.error(true_prevalences, estim_prevalences)
-            self.sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}')
-            if self.best_score_ is None or score < self.best_score_:
-                self.best_score_ = score
-                self.best_params_ = params
-                if not self.refit:
-                    self.best_model_ = deepcopy(model)
-            self.param_scores_[str(params)] = score
+            try:
+                # overrides default parameters with the parameters being explored at this iteration
+                model.set_params(**params)
+                model.fit(training)
+                true_prevalences, estim_prevalences = artificial_sampling_prediction(
+                    model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
+                    verbose=False
+                )
+
+                score = self.error(true_prevalences, estim_prevalences)
+                self.sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}')
+                if self.best_score_ is None or score < self.best_score_:
+                    self.best_score_ = score
+                    self.best_params_ = params
+                    if not self.refit:
+                        self.best_model_ = deepcopy(model)
+                self.param_scores_[str(params)] = score
+
+                if self.timeout > 0:
+                    signal.alarm(0)
+            except TimeoutError:
+                print(f'timeout reached for config {params}')
+                some_timeouts = True
+
+        if self.best_score_ is None and some_timeouts:
+            raise TimeoutError('all jobs took more than the timeout time to end')

        self.sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f})')
        model.set_params(**self.best_params_)
--- a/test.py
+++ b/test.py
@ -20,7 +20,7 @@ param_grid = {'C': np.logspace(0,3,4), 'class_weight': ['balanced']}
 max_evaluations = 5000

 sample_size = qp.environ['SAMPLE_SIZE']
-binary = True
+binary = False
 svmperf_home = './svm_perf_quantification'

 if binary:
@ -29,7 +29,7 @@ if binary:

 else:
    dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
-    dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
+    #dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)

 print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')

@ -52,14 +52,15 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes

 #learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
 learner = LogisticRegression(max_iter=1000)
-model = qp.method.meta.ECC(learner, size=20, red_size=10, param_grid=None, optim=None, policy='ds')
+model = qp.method.aggregative.ClassifyAndCount(learner)
+#model = qp.method.meta.ECC(learner, size=20, red_size=10, param_grid=None, optim=None, policy='ds')
 #model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
 #                           sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
 #model = qp.method.aggregative.ClassifyAndCount(learner)


-#if qp.isbinary(model) and not qp.isbinary(dataset):
-#    model = qp.method.aggregative.OneVsAll(model)
+if qp.isbinary(model) and not qp.isbinary(dataset):
+    model = qp.method.aggregative.OneVsAll(model)


 # Model fit and Evaluation on the test data
@ -91,7 +92,6 @@ print(f'mae={error:.3f}')
 # Model fit and Evaluation according to the artificial sampling protocol
 # ----------------------------------------------------------------------------

-
 n_prevpoints = F.get_nprevpoints_approximation(combinations_budget=max_evaluations, n_classes=dataset.n_classes)
 n_evaluations = F.num_prevalence_combinations(n_prevpoints, dataset.n_classes)
 print(f'the prevalence interval [0,1] will be split in {n_prevpoints} prevalence points for each class, so that\n'
@ -109,8 +109,6 @@ for error in qp.error.QUANTIFICATION_ERROR:

 # Model selection and Evaluation according to the artificial sampling protocol
 # ----------------------------------------------------------------------------
-sys.exit(0)
-

 model_selection = GridSearchQ(model,
                              param_grid=param_grid,
@ -118,7 +116,8 @@ model_selection = GridSearchQ(model,
                              eval_budget=max_evaluations//10,
                              error='mae',
                              refit=True,
-                              verbose=True)
+                              verbose=True,
+                              timeout=4)

 model = model_selection.fit(dataset.training, validation=0.3)
 #model = model_selection.fit(train, validation=val)