diff --git a/TweetSentQuant/experiments.py b/TweetSentQuant/experiments.py index 84269f5..5867255 100644 --- a/TweetSentQuant/experiments.py +++ b/TweetSentQuant/experiments.py @@ -63,7 +63,8 @@ def quantification_ensembles(): 'n_jobs': settings.ENSEMBLE_N_JOBS, 'param_grid': lr_params, 'param_mod_sel': param_mod_sel, - 'val_split': 0.4 + 'val_split': 0.4, + 'min_pos': 10 } # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection @@ -71,13 +72,13 @@ def quantification_ensembles(): hyper_none = None yield 'epaccmaeptr', EPACC(newLR(), optim='mae', policy='ptr', **common), hyper_none yield 'epaccmaemae', EPACC(newLR(), optim='mae', policy='mae', **common), hyper_none - yield 'esldmaeptr', EEMQ(newLR(), optim='mae', policy='ptr', **common), hyper_none - yield 'esldmaemae', EEMQ(newLR(), optim='mae', policy='mae', **common), hyper_none + #yield 'esldmaeptr', EEMQ(newLR(), optim='mae', policy='ptr', **common), hyper_none + #yield 'esldmaemae', EEMQ(newLR(), optim='mae', policy='mae', **common), hyper_none yield 'epaccmraeptr', EPACC(newLR(), optim='mrae', policy='ptr', **common), hyper_none yield 'epaccmraemrae', EPACC(newLR(), optim='mrae', policy='mrae', **common), hyper_none - yield 'esldmraeptr', EEMQ(newLR(), optim='mrae', policy='ptr', **common), hyper_none - yield 'esldmraemrae', EEMQ(newLR(), optim='mrae', policy='mrae', **common), hyper_none + #yield 'esldmraeptr', EEMQ(newLR(), optim='mrae', policy='ptr', **common), hyper_none + #yield 'esldmraemrae', EEMQ(newLR(), optim='mrae', policy='mrae', **common), hyper_none def evaluate_experiment(true_prevalences, estim_prevalences): @@ -178,8 +179,8 @@ def run(experiment): benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence, best_params) - if isinstance(model, QuaNet): - model.clean_checkpoint_dir() + #if isinstance(model, QuaNet): + #model.clean_checkpoint_dir() if __name__ == '__main__': @@ -195,24 +196,24 @@ if __name__ == '__main__': print(f'Result folder: {args.results}') np.random.seed(0) - optim_losses = ['mae'] # ['mae', 'mrae'] + optim_losses = ['mae', 'mrae'] datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN - #models = quantification_models() - #Parallel(n_jobs=settings.N_JOBS)( - # delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) - #) + models = quantification_models() + Parallel(n_jobs=settings.N_JOBS)( + delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) + ) - #models = quantification_cuda_models() - #Parallel(n_jobs=settings.CUDA_N_JOBS)( - # delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) - #) + models = quantification_cuda_models() + Parallel(n_jobs=settings.CUDA_N_JOBS)( + delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) + ) models = quantification_ensembles() Parallel(n_jobs=1)( delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) ) - shutil.rmtree(args.checkpointdir, ignore_errors=True) + #shutil.rmtree(args.checkpointdir, ignore_errors=True) diff --git a/TweetSentQuant/settings.py b/TweetSentQuant/settings.py index 418385d..8064fa8 100644 --- a/TweetSentQuant/settings.py +++ b/TweetSentQuant/settings.py @@ -1,7 +1,7 @@ import multiprocessing N_JOBS = -2 #multiprocessing.cpu_count() -CUDA_N_JOBS = 1 +CUDA_N_JOBS = 2 ENSEMBLE_N_JOBS = -2 SAMPLE_SIZE = 100 diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 6251242..94ec99d 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -72,7 +72,8 @@ class Ensemble(BaseQuantifier): # randomly chooses the prevalences for each member of the ensemble (preventing classes with less than # min_pos positive examples) - prevs = [_draw_simplex(ndim=data.n_classes, min_val=self.min_pos / len(data)) for _ in range(self.size)] + sample_size = len(data) if self.max_sample_size is None else min(self.max_sample_size, len(data)) + prevs = [_draw_simplex(ndim=data.n_classes, min_val=self.min_pos / sample_size) for _ in range(self.size)] posteriors = None if self.policy == 'ds': @@ -80,7 +81,7 @@ class Ensemble(BaseQuantifier): posteriors, self.post_proba_fn = self.ds_policy_get_posteriors(data) is_static_policy = (self.policy in qp.error.QUANTIFICATION_ERROR_NAMES) - sample_size = len(data) if self.max_sample_size is None else min(self.max_sample_size, len(data)) + self.ensemble = Parallel(n_jobs=self.n_jobs)( delayed(_delayed_new_instance)( self.base_quantifier, data, val_split, prev, posteriors, keep_samples=is_static_policy,