From 865dafaefc35132e396f9eecb1cc928a30df4a87 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Fri, 15 Jan 2021 17:42:19 +0100 Subject: [PATCH] setting a timeout for model_selection combinations in order to prevent some combinations to stuck the model selection --- TweetSentQuant/experiments.py | 3 +- TweetSentQuant/gen_tables.py | 1 - TweetSentQuant/tabular.py | 6 ---- quapy/__init__.py | 2 +- quapy/model_selection.py | 57 +++++++++++++++++++++++++---------- test.py | 17 +++++------ 6 files changed, 52 insertions(+), 34 deletions(-) diff --git a/TweetSentQuant/experiments.py b/TweetSentQuant/experiments.py index c8783e1..136ddea 100644 --- a/TweetSentQuant/experiments.py +++ b/TweetSentQuant/experiments.py @@ -7,7 +7,6 @@ import os import pickle import itertools from joblib import Parallel, delayed -import multiprocessing import settings @@ -78,6 +77,7 @@ def run(experiment): return else: print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') + return benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True) benchmark_devel.stats() @@ -91,6 +91,7 @@ def run(experiment): n_repetitions=5, error=optim_loss, refit=False, + timeout=60*60, verbose=True ) model_selection.fit(benchmark_devel.training, benchmark_devel.test) diff --git a/TweetSentQuant/gen_tables.py b/TweetSentQuant/gen_tables.py index dcf5b91..01d2e84 100644 --- a/TweetSentQuant/gen_tables.py +++ b/TweetSentQuant/gen_tables.py @@ -144,7 +144,6 @@ for i, eval_func in enumerate(evaluation_measures): save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular) - # Tables ranks for AE and RAE (two tables) # ---------------------------------------------------- methods = gao_seb_methods diff --git a/TweetSentQuant/tabular.py b/TweetSentQuant/tabular.py index 0add9a6..fe58d27 100644 --- a/TweetSentQuant/tabular.py +++ b/TweetSentQuant/tabular.py @@ -95,11 +95,6 @@ class Table: normval = 1 - normval self.map['color'][i, col_idx] = color_red2green_01(normval) - def _addlatex(self): - return - for i,j in self._getfilled(): - self.map['latex'][i,j] = self.latex(self.rows[i], self.cols[j]) - def _run_ttest(self, row, col1, col2): mean1 = self.map['mean'][row, col1] @@ -153,7 +148,6 @@ class Table: self._addrank() self._addcolor() self._addttest() - self._addlatex() if self.add_average: self._addave() self.modif = False diff --git a/quapy/__init__.py b/quapy/__init__.py index 9d368bf..011412e 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -20,4 +20,4 @@ environ = { def isbinary(x): - return data.isbinary(x) or method.isbinary(x) \ No newline at end of file + return x.binary diff --git a/quapy/model_selection.py b/quapy/model_selection.py index be330b5..8f782c1 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -6,6 +6,7 @@ from method.aggregative import BaseQuantifier from typing import Union, Callable import functional as F from copy import deepcopy +import signal class GridSearchQ(BaseQuantifier): @@ -21,6 +22,7 @@ class GridSearchQ(BaseQuantifier): refit=False, n_jobs=-1, random_seed=42, + timeout=-1, verbose=False): """ Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation @@ -48,6 +50,9 @@ class GridSearchQ(BaseQuantifier): the best chosen hyperparameter combination :param n_jobs: number of parallel jobs :param random_seed: set the seed of the random generator to replicate experiments + :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested. + Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up + being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set. :param verbose: set to True to get information through the stdout """ self.model = model @@ -59,8 +64,8 @@ class GridSearchQ(BaseQuantifier): self.refit = refit self.n_jobs = n_jobs self.random_seed = random_seed + self.timeout = timeout self.verbose = verbose - self.__check_error(error) def sout(self, msg): @@ -129,28 +134,48 @@ class GridSearchQ(BaseQuantifier): model = self.model n_jobs = self.n_jobs + if self.timeout > 0: + def handler(signum, frame): + self.sout('timeout reached') + raise TimeoutError() + signal.signal(signal.SIGALRM, handler) + self.sout(f'starting optimization with n_jobs={n_jobs}') self.param_scores_ = {} self.best_score_ = None + some_timeouts = False for values in itertools.product(*params_values): params = {k: values[i] for i, k in enumerate(params_keys)} - # overrides default parameters with the parameters being explored at this iteration - model.set_params(**params) - model.fit(training) - true_prevalences, estim_prevalences = artificial_sampling_prediction( - model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed, - verbose=False - ) + if self.timeout > 0: + signal.alarm(self.timeout) - score = self.error(true_prevalences, estim_prevalences) - self.sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}') - if self.best_score_ is None or score < self.best_score_: - self.best_score_ = score - self.best_params_ = params - if not self.refit: - self.best_model_ = deepcopy(model) - self.param_scores_[str(params)] = score + try: + # overrides default parameters with the parameters being explored at this iteration + model.set_params(**params) + model.fit(training) + true_prevalences, estim_prevalences = artificial_sampling_prediction( + model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed, + verbose=False + ) + + score = self.error(true_prevalences, estim_prevalences) + self.sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}') + if self.best_score_ is None or score < self.best_score_: + self.best_score_ = score + self.best_params_ = params + if not self.refit: + self.best_model_ = deepcopy(model) + self.param_scores_[str(params)] = score + + if self.timeout > 0: + signal.alarm(0) + except TimeoutError: + print(f'timeout reached for config {params}') + some_timeouts = True + + if self.best_score_ is None and some_timeouts: + raise TimeoutError('all jobs took more than the timeout time to end') self.sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f})') model.set_params(**self.best_params_) diff --git a/test.py b/test.py index a9cff1d..1d0dbb7 100644 --- a/test.py +++ b/test.py @@ -20,7 +20,7 @@ param_grid = {'C': np.logspace(0,3,4), 'class_weight': ['balanced']} max_evaluations = 5000 sample_size = qp.environ['SAMPLE_SIZE'] -binary = True +binary = False svmperf_home = './svm_perf_quantification' if binary: @@ -29,7 +29,7 @@ if binary: else: dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True) - dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3) + #dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3) print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}') @@ -52,14 +52,15 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes #learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1) learner = LogisticRegression(max_iter=1000) -model = qp.method.meta.ECC(learner, size=20, red_size=10, param_grid=None, optim=None, policy='ds') +model = qp.method.aggregative.ClassifyAndCount(learner) +#model = qp.method.meta.ECC(learner, size=20, red_size=10, param_grid=None, optim=None, policy='ds') #model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae', # sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1) #model = qp.method.aggregative.ClassifyAndCount(learner) -#if qp.isbinary(model) and not qp.isbinary(dataset): -# model = qp.method.aggregative.OneVsAll(model) +if qp.isbinary(model) and not qp.isbinary(dataset): + model = qp.method.aggregative.OneVsAll(model) # Model fit and Evaluation on the test data @@ -91,7 +92,6 @@ print(f'mae={error:.3f}') # Model fit and Evaluation according to the artificial sampling protocol # ---------------------------------------------------------------------------- - n_prevpoints = F.get_nprevpoints_approximation(combinations_budget=max_evaluations, n_classes=dataset.n_classes) n_evaluations = F.num_prevalence_combinations(n_prevpoints, dataset.n_classes) print(f'the prevalence interval [0,1] will be split in {n_prevpoints} prevalence points for each class, so that\n' @@ -109,8 +109,6 @@ for error in qp.error.QUANTIFICATION_ERROR: # Model selection and Evaluation according to the artificial sampling protocol # ---------------------------------------------------------------------------- -sys.exit(0) - model_selection = GridSearchQ(model, param_grid=param_grid, @@ -118,7 +116,8 @@ model_selection = GridSearchQ(model, eval_budget=max_evaluations//10, error='mae', refit=True, - verbose=True) + verbose=True, + timeout=4) model = model_selection.fit(dataset.training, validation=0.3) #model = model_selection.fit(train, validation=val)