refactoring

2023-12-11 16:43:45 +01:00 · 2023-12-11 16:43:45 +01:00 · 59500a5a42
parent a776816063
commit 59500a5a42
16 changed files with 188 additions and 217 deletions
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@ -1,23 +1,31 @@
 import numpy as np
 import pandas as pd
-from distribution_matching.method_kdey import KDEy
-from distribution_matching.method_kdey_closed import KDEyclosed
-from distribution_matching.method_kdey_closed_efficient_correct import KDEyclosed_efficient_corr
-from distribution_matching.methods_kdey import KDEyCS, KDEyHD, KDEyML
+from distribution_matching.method.kdex import KDExML
+from distribution_matching.method.method_kdey import KDEy
+from distribution_matching.method.method_kdey_closed_efficient_correct import KDEyclosed_efficient_corr
+from distribution_matching.method.kdey import KDEyCS, KDEyHD, KDEyML
 from quapy.method.aggregative import EMQ, CC, PCC, DistributionMatching, PACC, HDy, OneVsAllAggregative, ACC
-from distribution_matching.method_dirichlety import DIRy
+from distribution_matching.method.dirichlety import DIRy
 from sklearn.linear_model import LogisticRegression
-from distribution_matching.method_kdey_closed_efficient import KDEyclosed_efficient

-# the full list of methods tested in the paper (reported in the appendix)
-METHODS  = ['ACC', 'PACC', 'HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD', 'KDEy-HD2', 'DM-CS', 'KDEy-CS','KDEy-CS2',  'DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML', 'KDEy-ML2']
+# set to True to get the full list of methods tested in the paper (reported in the appendix)
+# set to False to get the reduced list (shown in the body of the paper)
+FULL_METHOD_LIST = True

-# uncomment this other list for the methods shown in the body of the paper (the other methods are not comparable in performance)
-#METHODS  = ['PACC',  'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS',  'EMQ', 'KDEy-ML']
+if FULL_METHOD_LIST:
+    ADJUSTMENT_METHODS = ['ACC', 'PACC']
+    DISTR_MATCH_METHODS = ['HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD',  'DM-CS', 'KDEy-CS']
+    MAX_LIKE_METHODS = ['DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML', 'KDEx-ML']
+else:
+    ADJUSTMENT_METHODS = ['PACC']
+    DISTR_MATCH_METHODS = ['DM-T', 'DM-HD', 'KDEy-HD',  'DM-CS', 'KDEy-CS']
+    MAX_LIKE_METHODS = ['EMQ', 'KDEy-ML', 'KDEx-ML']

+# list of methods to consider
+METHODS  = ADJUSTMENT_METHODS + DISTR_MATCH_METHODS + MAX_LIKE_METHODS
 BIN_METHODS = [x.replace('-OvA', '') for x in METHODS]

-
+# common hyperparameterss
 hyper_LR = {
    'classifier__C': np.logspace(-3,3,7),
    'classifier__class_weight': ['balanced', None]
@ -29,8 +37,9 @@ hyper_kde = {

 nbins_range = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 64]

-def new_method(method, **lr_kwargs):

+# instances a new quantifier based on a string name
+def new_method(method, **lr_kwargs):
    lr = LogisticRegression(**lr_kwargs)

    if method == 'CC':
@ -46,23 +55,19 @@ def new_method(method, **lr_kwargs):
        param_grid = hyper_LR
        quantifier = PACC(lr)
    elif method in ['KDEy-HD']:
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=10000, val_split=10)
-    elif method in ['KDEy-HD2']:
        param_grid = {**hyper_kde, **hyper_LR}
        quantifier = KDEyHD(lr)
    elif method == 'KDEy-CS':
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEyclosed_efficient_corr(lr, val_split=10)
-    elif method == 'KDEy-CS2':
        param_grid = {**hyper_kde, **hyper_LR}
        quantifier = KDEyCS(lr)
    elif method == 'KDEy-ML':
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEy(lr, target='max_likelihood', val_split=10)
-    elif method == 'KDEy-ML2':
        param_grid = {**hyper_kde, **hyper_LR}
        quantifier = KDEyML(lr)
+    elif method == 'KDEx-ML':
+        param_grid = {
+            'bandwidth': np.linspace(0.001, 2, 501)
+        }
+        quantifier = KDExML()
    elif method == 'DIR':
        param_grid = hyper_LR
        quantifier = DIRy(lr)
--- a/distribution_matching/lequa_nclasses_sensibility.py
+++ b/distribution_matching/lequa_nclasses_sensibility.py
@ -1,74 +0,0 @@
-import pickle
-import numpy as np
-import os
-from os.path import join
-import pandas as pd
-from quapy.protocol import UPP
-from quapy.data import LabelledCollection
-from distribution_matching.commons import METHODS, new_method, show_results
-import quapy as qp
-
-
-SEED=1
-
-
-def extract_classes(data:LabelledCollection, classes):
-    X, y = data.Xy
-    counts = data.counts()
-    Xs, ys = [], []
-    for class_i in classes:
-        Xs.append(X[y==class_i])
-        ys.append([class_i]*counts[class_i])
-    Xs = np.concatenate(Xs)
-    ys = np.concatenate(ys)
-    return LabelledCollection(Xs, ys, classes=classes
-                              )
-
-def task(nclasses):
-    in_classes = np.arange(0, nclasses)
-    train = extract_classes(train_pool, classes=in_classes)
-    test = extract_classes(test_pool, classes=in_classes)
-    with qp.util.temp_seed(SEED):
-        hyper, quantifier = new_method(method)
-        quantifier.set_params(classifier__C=1, classifier__class_weight='balanced')
-        hyper = {h:v for h,v in hyper.items() if not h.startswith('classifier__')}
-        tr, va = train.split_stratified(random_state=SEED)
-        quantifier = qp.model_selection.GridSearchQ(quantifier, hyper, UPP(va), optim).fit(tr)
-        report = qp.evaluation.evaluation_report(quantifier, protocol=UPP(test), error_metrics=['mae', 'mrae', 'kld'], verbose=True)
-        return report
-
-
-# only the quantifier-dependent hyperparameters are explored; the classifier is a LR with default parameters
-if __name__ == '__main__':
-
-    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
-    qp.environ['N_JOBS'] = -1
-
-
-    for optim in ['mae']: #, 'mrae']:
-
-        result_dir = f'results/lequa/nclasses/{optim}'
-        os.makedirs(result_dir, exist_ok=True)
-
-        for method in ['DM', 'EMQ', 'KDEy-ML']: # 'KDEy-ML', 'KDEy-DMhd3']:
-
-            result_path = join(result_dir, f'{method}.csv')
-            if os.path.exists(result_path): continue
-
-            train_orig, _, _ = qp.datasets.fetch_lequa2022('T1B')
-
-            train_pool, test_pool = train_orig.split_stratified(0.5, random_state=SEED)
-            arange_classes = np.arange(2, train_orig.n_classes + 1)
-            reports = qp.util.parallel(task, arange_classes, n_jobs=-1)
-            with open(result_path, 'at') as csv:
-                csv.write(f'Method\tDataset\tnClasses\tMAE\tMRAE\tKLD\n')
-                for num_classes, report in zip(arange_classes, reports):
-                    means = report.mean()
-                    report_result_path = join(result_dir, f'{method}_{num_classes}')+'.dataframe'
-                    report.to_csv(report_result_path)
-                    csv.write(f'{method}\tLeQua-T1B\t{num_classes}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
-                    csv.flush()
-
-            means = report.mean()
-            print(means)
-
--- a/distribution_matching/lequa_sensibility_analysis.py
+++ b/distribution_matching/lequa_sensibility_analysis.py
@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression
 import os
 import quapy as qp
 from distribution_matching.commons import show_results
-from method_kdey import KDEy
+from distribution_matching.method.method_kdey import KDEy
 from quapy.method.aggregative import DistributionMatching


--- a/distribution_matching/method/dirichlety.py
+++ b/distribution_matching/method/dirichlety.py
--- a/distribution_matching/methods_kdey.py
+++ b/distribution_matching/methods_kdey.py
@ -5,36 +5,35 @@ from sklearn.neighbors import KernelDensity

 import quapy as qp
 from quapy.data import LabelledCollection
-from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, cross_generate_predictions
 import quapy.functional as F

-from scipy.stats import multivariate_normal
-from scipy import optimize
 from sklearn.metrics.pairwise import rbf_kernel


-class KDEyBase:
+class KDEBase:

    BANDWIDTH_METHOD = ['scott', 'silverman']

-    def _check_bandwidth(self, bandwidth):
-        assert bandwidth in KDEyBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
-            f'invalid bandwidth, valid ones are {KDEyBase.BANDWIDTH_METHOD} or float values'
+    @classmethod
+    def _check_bandwidth(cls, bandwidth):
+        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
+            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
        if isinstance(bandwidth, float):
            assert 0 < bandwidth < 1,  "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"

-    def get_kde_function(self, posteriors, bandwidth):
-        return KernelDensity(bandwidth=bandwidth).fit(posteriors)
+    def get_kde_function(self, X, bandwidth):
+        return KernelDensity(bandwidth=bandwidth).fit(X)

-    def pdf(self, kde, posteriors):
-        return np.exp(kde.score_samples(posteriors))
+    def pdf(self, kde, X):
+        return np.exp(kde.score_samples(X))

-    def get_mixture_components(self, posteriors, y, n_classes, bandwidth):
-        return [self.get_kde_function(posteriors[y == cat], bandwidth) for cat in range(n_classes)]
+    def get_mixture_components(self, X, y, n_classes, bandwidth):
+        return [self.get_kde_function(X[y == cat], bandwidth) for cat in range(n_classes)]



-class KDEyML(AggregativeProbabilisticQuantifier, KDEyBase):
+class KDEyML(AggregativeProbabilisticQuantifier, KDEBase):

    def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
        self._check_bandwidth(bandwidth)
@ -77,7 +76,7 @@ class KDEyML(AggregativeProbabilisticQuantifier, KDEyBase):
        return F.optim_minimize(neg_loglikelihood, n_classes)


-class KDEyHD(AggregativeProbabilisticQuantifier, KDEyBase):
+class KDEyHD(AggregativeProbabilisticQuantifier, KDEBase):

    def __init__(self, classifier: BaseEstimator, val_split=10, divergence: str='HD',
                 bandwidth=0.1, n_jobs=None, random_state=0, montecarlo_trials=10000):
@ -145,7 +144,7 @@ class KDEyHD(AggregativeProbabilisticQuantifier, KDEyBase):
 class KDEyCS(AggregativeProbabilisticQuantifier):

    def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
-        self._check_bandwidth(bandwidth)
+        KDEBase._check_bandwidth(bandwidth)
        self.classifier = classifier
        self.val_split = val_split
        self.bandwidth = bandwidth
--- a/distribution_matching/method/method_kdey.py
+++ b/distribution_matching/method/method_kdey.py
--- a/distribution_matching/method/method_kdey_closed.py
+++ b/distribution_matching/method/method_kdey_closed.py
--- a/distribution_matching/method/method_kdey_closed_efficient.py
+++ b/distribution_matching/method/method_kdey_closed_efficient.py
--- a/distribution_matching/method/method_kdey_closed_efficient_correct.py
+++ b/distribution_matching/method/method_kdey_closed_efficient_correct.py
--- a/distribution_matching/show_results.py
+++ b/distribution_matching/show_results.py
@ -1,41 +0,0 @@
-import sys
-from pathlib import Path
-import pandas as pd
-
-result_dir = 'results/results_tweet_mae_redohyper'
-#result_dir = 'results_lequa_mrae'
-
-dfs = []
-
-pathlist = Path(result_dir).rglob('*.csv')
-for path in pathlist:
-     path_in_str = str(path)
-
-     try:
-          df = pd.read_csv(path_in_str, sep='\t')
-          df = df[df.iloc[:, 0] != df.columns[0]]
-          if not df.empty:
-               dfs.append(df)
-     except Exception:
-          print('empty')
-
-df = pd.concat(dfs)
-
-for err in ['MAE', 'MRAE', 'KLD']:
-     print('-'*100)
-     print(err)
-     print('-'*100)
-     piv = df.pivot_table(index='Dataset', columns='Method', values=err)
-     piv.loc['mean'] = piv.mean()
-
-     pd.set_option('display.max_columns', None)
-     pd.set_option('display.max_rows', None)
-     pd.set_option('expand_frame_repr', False)
-     print(piv)
-     print()
-
-
-
-
-
-
--- a/distribution_matching/tables/gen_tables_compact.py
+++ b/distribution_matching/tables/gen_tables_compact.py
@ -1,4 +1,5 @@
-from distribution_matching.commons import BIN_METHODS, METHODS
+from distribution_matching.commons import (ADJUSTMENT_METHODS, BIN_METHODS, DISTR_MATCH_METHODS, MAX_LIKE_METHODS,
+                                           METHODS, FULL_METHOD_LIST)
 import quapy as qp
 from os import makedirs
 import os
@ -12,10 +13,9 @@ tables_path = '.'
 MAXTONE = 35  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
 SHOW_STD = False

-NUM_ADJUSTMENT_METHODS = 2 if 'ACC' in METHODS else 1
-NUM_MAXIMUM_LIKELIHOOD_METHODS = 4 if 'DIR' in METHODS else 3
-NUM_DISTRIBUTION_MATCHING_PAIRS = 2
-NUM_DISTRIBUTION_MATCHING_METHODS = NUM_DISTRIBUTION_MATCHING_PAIRS*2 + (2 if 'HDy-OvA' in METHODS else 1)
+NUM_ADJUSTMENT_METHODS = len(ADJUSTMENT_METHODS)
+NUM_MAXIMUM_LIKELIHOOD_METHODS = len(MAX_LIKE_METHODS)
+NUM_DISTRIBUTION_MATCHING_METHODS = len(DISTR_MATCH_METHODS)

 qp.environ['SAMPLE_SIZE'] = 100

@ -27,21 +27,24 @@ nice_bench = {
    'semeval16': 'SemEval16',
 }

-nice_method={
-    'KDEy-MLE': 'KDEy-ML',
-    'KDEy-DMhd4': 'KDEy-HD',
-    'KDEy-closed++': 'KDEy-CS',
-    'EMQ-C': 'EMQ-BCTS'
-}

 def save_table(path, table):
    print(f'saving results in {path}')
    with open(path, 'wt') as foo:
        foo.write(table)

-
-def nicerm(key):
-    return '\mathrm{'+nice[key]+'}'
+def new_table(datasets, methods):
+    return Table(
+        benchmarks=datasets,
+        methods=methods,
+        ttest='wilcoxon',
+        prec_mean=5,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=(eval=='mae'),
+        average=True,
+        maxtone=MAXTONE
+    )


 def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
@ -54,7 +57,7 @@ def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):

    # write the latex table
    tabular = """
-            \\begin{tabular}{|c|""" + ('c|' * NUM_ADJUSTMENT_METHODS) + 'c|c' + ('|c|c' * (NUM_DISTRIBUTION_MATCHING_PAIRS)) +  ('|c' * NUM_MAXIMUM_LIKELIHOOD_METHODS) + """|} """ + cline + """           
+            \\begin{tabular}{|c|""" + ('c|' * NUM_ADJUSTMENT_METHODS) + ('c|' * NUM_DISTRIBUTION_MATCHING_METHODS) +  ('c|' * NUM_MAXIMUM_LIKELIHOOD_METHODS) + """} """ + cline + """           
            \multicolumn{1}{c}{} & 
            \multicolumn{"""+str(NUM_ADJUSTMENT_METHODS)+"""}{|c}{Adjustment} & 
            \multicolumn{"""+str(NUM_DISTRIBUTION_MATCHING_METHODS)+"""}{|c|}{Distribution Matching} & 
@ -62,8 +65,7 @@ def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
            \hline               
            """
    for i, (tab, group, name) in enumerate(zip(tabs, benchmark_groups, benchmark_names)):
-        tablines = tab.latexTabular(benchmark_replace=nice_bench, method_replace=nice_method, endl='\\\\'+ cline, aslines=True)
-        print(tablines)
+        tablines = tab.latexTabular(benchmark_replace=nice_bench, endl='\\\\'+ cline, aslines=True)
        tablines[0] = tablines[0].replace('\multicolumn{1}{c|}{}', '\\textbf{'+name+'}')
        if not compact:
            tabular += '\n'.join(tablines)
@ -87,17 +89,7 @@ def gen_tables_uci_multiclass(eval):

    datasets = qp.datasets.UCI_MULTICLASS_DATASETS

-    tab = Table(
-        benchmarks=datasets,
-        methods=METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, METHODS)

    for dataset in datasets:
        print(f'\t Dataset: {dataset}: ', end='')
@ -122,17 +114,7 @@ def gen_tables_uci_bin(eval):
    exclude = ['acute.a', 'acute.b', 'iris.1', 'balance.2']
    datasets = [x for x in qp.datasets.UCI_DATASETS if x not in exclude]

-    tab = Table(
-        benchmarks=datasets,
-        methods=BIN_METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, BIN_METHODS)

    for dataset in datasets:
        print(f'\t Dataset: {dataset}: ', end='')
@ -156,17 +138,7 @@ def gen_tables_tweet(eval):

    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST

-    tab = Table(
-        benchmarks=datasets,
-        methods=METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, METHODS)

    for dataset in datasets:
        print(f'\t Dataset: {dataset}: ', end='')
@ -185,19 +157,8 @@ def gen_tables_tweet(eval):

 def gen_tables_lequa(Methods, task, eval):
    # generating table for LeQua-T1A or Lequa-T1B; only one table with two rows, one for MAE, another for MRAE
-    dataset_name = 'LeQua-'+task

-    tab = Table(
-        benchmarks=[f'Average'],
-        methods=Methods,
-        ttest='wilcoxon',
-        prec_mean=5,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=False,
-        average=False,
-        maxtone=MAXTONE
-    )
+    tab = new_table([f'Average'], Methods)

    print('Generating table for T1A@Lequa', eval, end='')
    dir_results = f'../results/lequa/{task}/{eval}'
--- a/distribution_matching/tables/latex/tables_compact.tex
+++ b/distribution_matching/tables/latex/tables_compact.tex
@ -65,7 +65,7 @@
 \centering
 \caption{Multiclass RAE}
 \resizebox{\textwidth}{!}{%
-\input{multiclass_mae}
+\input{multiclass_mrae}
 }%
 \end{table}

--- a/distribution_matching/tweets_sensibility_analysis.py
+++ b/distribution_matching/tweets_sensibility_analysis.py
@ -0,0 +1,57 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import os
+
+import quapy as qp
+from distribution_matching.commons import show_results
+from quapy.method.aggregative import DMy
+from distribution_matching.method.method_kdey import KDEy
+from quapy.protocol import UPP
+
+SEED=1
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 100
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    result_dir = f'results/tweet/sensibility'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    for method, param, grid in [
+        ('KDEy-ML', 'Bandwidth', np.linspace(0.01, 0.2, 20)),
+        ('DM-HD', 'nbins', list(range(2,10)) + list(range(10,34,2)))
+    ]:
+
+        global_result_path = f'{result_dir}/{method}'
+
+        if not os.path.exists(global_result_path+'.csv'):
+            with open(global_result_path+'.csv', 'wt') as csv:
+                csv.write(f'Method\tDataset\t{param}\tMAE\tMRAE\tKLD\n')
+
+        with open(global_result_path+'.csv', 'at') as csv:
+            for val in grid:
+                for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset + (f'_{val:.3f}' if isinstance(val, float) else f'{val}')
+
+                    with qp.util.temp_seed(SEED):
+
+                        data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
+
+                        if method == 'KDEy-ML':
+                            quantifier = KDEy(LogisticRegression(n_jobs=-1), target='max_likelihood', val_split=10, bandwidth=val)
+                        elif method == 'DM-HD':
+                            quantifier = DMy(LogisticRegression(n_jobs=-1), val_split=10, nbins=val, divergence='HD', n_jobs=-1)
+                        quantifier.fit(data.training)
+                        protocol = UPP(data.test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True, n_jobs=-1)
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        means = report.mean()
+                        csv.write(f'{method}\t{data.name}\t{val}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
--- a/distribution_matching/ucimulticlass_experiments.py
+++ b/distribution_matching/ucimulticlass_experiments.py
@ -1,5 +1,6 @@
 import pickle
 import os
+from data.base import LabelledCollection

 from sklearn.linear_model import LogisticRegression

--- a/distribution_matching/ucimulti_sensibility_analysis.py
+++ b/distribution_matching/ucimulti_sensibility_analysis.py
@ -0,0 +1,63 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import os
+import quapy as qp
+from distribution_matching.commons import show_results
+from distribution_matching.method.method_kdey import KDEy
+from quapy.method.aggregative import DMy
+from quapy.protocol import UPP
+
+
+SEED=1
+
+def task(val):
+    print('job-init', dataset, val)
+
+    with qp.util.temp_seed(SEED):
+        if method=='KDEy-ML':
+            quantifier = KDEy(LogisticRegression(), target='max_likelihood', val_split=10, bandwidth=val)
+        elif method == 'DM-HD':
+            quantifier = DMy(LogisticRegression(), val_split=10, nbins=val, divergence='HD')
+
+        quantifier.fit(data.data)
+        protocol = UPP(data.test, repeats=n_bags_test)
+        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'],
+                                                 verbose=True, n_jobs=-1)
+        return report
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    result_dir = f'results/ucimulti/sensibility'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+        for method, param, grid in [
+            ('KDEy-ML', 'Bandwidth', np.linspace(0.01, 0.2, 20)),
+            ('DM-HD', 'nbins', list(range(2, 10)) + list(range(10, 34, 2)))
+        ]:
+
+            global_result_path = f'{result_dir}/{method}'
+
+            if not os.path.exists(global_result_path+'.csv'):
+                with open(global_result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\t{param}\tMAE\tMRAE\tKLD\n')
+
+            reports = qp.util.parallel(task, grid, n_jobs=-1)
+            with open(global_result_path + '.csv', 'at') as csv:
+                for val, report in zip(grid, reports):
+                    means = report.mean()
+                    local_result_path = global_result_path + '_' + dataset + (f'_{val:.3f}' if isinstance(val, float) else f'{val}')
+                    report.to_csv(f'{local_result_path}.dataframe')
+                    csv.write(f'{method}\t{dataset}\t{val}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+
+            show_results(global_result_path)
--- a/laboratory/main_tweets_auto.py
+++ b/laboratory/main_tweets_auto.py
@ -5,7 +5,7 @@ import pandas as pd

 import quapy as qp
 from method.aggregative import DistributionMatching
-from distribution_matching.method_kdey import KDEy
+from distribution_matching.method.method_kdey import KDEy
 from protocol import UPP