cleaning gitginore

cleaning project
checking stat significance
2023-12-06 17:05:12 +01:00 · 2023-12-06 16:55:06 +01:00 · 2023-11-28 10:20:54 +01:00 · 2023-11-23 11:30:45 +01:00 · 2023-11-22 12:27:15 +01:00 · 2023-11-21 10:26:13 +01:00
47 changed files with 4005 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -27,6 +27,40 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+.idea
+.vscode
+LeQua2022
+MultiLabel/results_generales
+MultiLabel/mlqtables
+NewMethods/plots*
+NewMethods/results*
+NewMethods/tables*
+NewMethods/latex*
+Ordinal/data*
+Ordinal/roberta*
+Ordinal/tables*
+Ordinal/results*
+eDiscovery/plots*
+eDiscovery/results*
+examples/results*
+poster-cikm*
+slides-cikm*
+slides-short-cikm*
+quick_experiment/figures*
+quick_experiment/figures*
+svm_perf_quantification/*
+TweetSentQuant/plots*
+TweetSentQuant/results*
+TweetSentQuant/tables*
+TweetSentQuant/Tweet Sentiment Quantification_NPP
+TweetSentQuant/checkpoint
+TweetSentQuant/*.tex
+checkpoint
+*.png
+*.zip
+*.pkl
+*.pickle
+*.pdf

 # PyInstaller
 #  Usually these files are written by a python script from a template
@ -130,3 +164,31 @@ dmypy.json
 .pyre/

 *__pycache__*
+*.dataframe
+*.svg
+
+cross_lingual.py
+
+# results
+*.csv
+
+# other projects
+*.tex
+*.md
+*.doctree
+*.rst
+MultiLabel/
+NewMethods/
+Ordinal/
+Retrieval/
+quick_experiment/
+Transduction/
+Transduction_office/
+TweetSentQuant/
+laboratory/
+docs/
+dataset_drifts.py
+notes.txt
+eDiscovery/
+examples/experiment_Dirichlet_calibration.py
+examples/experiments_dirichlet_cal.txt
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@ -0,0 +1,109 @@
+import numpy as np
+import pandas as pd
+from distribution_matching.method_kdey import KDEy
+from distribution_matching.method_kdey_closed import KDEyclosed
+from distribution_matching.method_kdey_closed_efficient_correct import KDEyclosed_efficient_corr
+from quapy.method.aggregative import EMQ, CC, PCC, DistributionMatching, PACC, HDy, OneVsAllAggregative, ACC
+from distribution_matching.method_dirichlety import DIRy
+from sklearn.linear_model import LogisticRegression
+from distribution_matching.method_kdey_closed_efficient import KDEyclosed_efficient
+
+# the full list of methods tested in the paper (reported in the appendix)
+METHODS  = ['ACC', 'PACC', 'HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS',  'DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML']
+
+# uncomment this other list for the methods shown in the body of the paper (the other methods are not comparable in performance)
+#METHODS  = ['PACC',  'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS',  'EMQ', 'KDEy-ML']
+
+BIN_METHODS = [x.replace('-OvA', '') for x in METHODS]
+
+
+hyper_LR = {
+    'classifier__C': np.logspace(-3,3,7),
+    'classifier__class_weight': ['balanced', None]
+}
+
+hyper_kde = {
+    'bandwidth': np.linspace(0.01, 0.2, 20)
+}
+
+nbins_range = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 64]
+
+def new_method(method, **lr_kwargs):
+
+    lr = LogisticRegression(**lr_kwargs)
+
+    if method == 'CC':
+        param_grid = hyper_LR
+        quantifier = CC(lr)
+    elif method == 'PCC':
+        param_grid = hyper_LR
+        quantifier = PCC(lr)
+    elif method == 'ACC':
+        param_grid = hyper_LR
+        quantifier = ACC(lr)
+    elif method == 'PACC':
+        param_grid = hyper_LR
+        quantifier = PACC(lr)
+    elif method in ['KDEy-HD']:
+        param_grid = {**hyper_kde, **hyper_LR}
+        quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=10000, val_split=10)
+    elif method == 'KDEy-CS':
+        param_grid = {**hyper_kde, **hyper_LR}
+        quantifier = KDEyclosed_efficient_corr(lr, val_split=10)
+    elif method == 'KDEy-ML':
+        param_grid = {**hyper_kde, **hyper_LR}
+        quantifier = KDEy(lr, target='max_likelihood', val_split=10)
+    elif method == 'DIR':
+        param_grid = hyper_LR
+        quantifier = DIRy(lr)
+    elif method == 'EMQ':
+        param_grid = hyper_LR
+        quantifier = EMQ(lr)
+    elif method == 'EMQ-BCTS':
+        method_params = {'exact_train_prev': [False], 'recalib': ['bcts']}
+        param_grid = {**method_params, **hyper_LR}
+        quantifier = EMQ(lr)
+    elif method == 'HDy':
+        param_grid = hyper_LR
+        quantifier = HDy(lr)
+    elif method == 'HDy-OvA':
+        param_grid = {'binary_quantifier__' + key: val for key, val in hyper_LR.items()}
+        quantifier = OneVsAllAggregative(HDy(lr))
+    elif method == 'DM-T':
+        method_params = {
+            'nbins': nbins_range,
+            'val_split': [10],
+            'divergence': ['topsoe']
+        }
+        param_grid = {**method_params, **hyper_LR}
+        quantifier = DistributionMatching(lr)
+    elif method == 'DM-HD':
+        method_params = {
+            'nbins': nbins_range,
+            'val_split': [10],
+            'divergence': ['HD']
+        }
+        param_grid = {**method_params, **hyper_LR}
+        quantifier = DistributionMatching(lr)
+    elif method == 'DM-CS':
+        method_params = {
+            'nbins': nbins_range,
+            'val_split': [10],
+            'divergence': ['CS']
+        }
+        param_grid = {**method_params, **hyper_LR}
+        quantifier = DistributionMatching(lr)
+    else:
+        raise NotImplementedError('unknown method', method)
+
+    return param_grid, quantifier
+
+
+def show_results(result_path):
+    df = pd.read_csv(result_path+'.csv', sep='\t')
+
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
+    print(pv)
+
--- a/distribution_matching/figures/histograms_density_plot.py
+++ b/distribution_matching/figures/histograms_density_plot.py
@ -0,0 +1,73 @@
+
+import math
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split, cross_val_predict
+from sklearn.neighbors import KernelDensity
+import matplotlib.pyplot as plt
+import numpy as np
+
+from data import LabelledCollection
+
+scale = 100
+
+
+import quapy as qp
+
+data = qp.datasets.fetch_twitter('wb', min_df=3, pickle=True, for_model_selection=False)
+
+X, y = data.training.Xy
+
+cls = LogisticRegression(C=0.0001, random_state=0)
+
+
+posteriors = cross_val_predict(cls, X=X, y=y, method='predict_proba', n_jobs=-1, cv=3)
+
+cls.fit(X, y)
+
+Xte, yte = data.test.Xy
+
+post_c1 = posteriors[y==0]
+post_c2 = posteriors[y==1]
+post_c3 = posteriors[y==2]
+
+
+print(len(post_c1))
+print(len(post_c2))
+print(len(post_c3))
+
+post_test = cls.predict_proba(Xte)
+
+alpha = qp.functional.prevalence_from_labels(yte, classes=[0, 1, 2])
+
+
+nbins = 20
+
+plt.rcParams.update({'font.size': 7})
+
+fig = plt.figure()
+positions = np.asarray([2,1,0])
+colors = ['r', 'g', 'b']
+
+for i, post_set in enumerate([post_c1, post_c2, post_c3, post_test]):
+    ax = fig.add_subplot(141+i, projection='3d')
+    for post, c, z in zip(post_set.T, colors, positions):
+
+        hist, bins = np.histogram(post, bins=nbins, density=True, range=[0,1])
+        xs = (bins[:-1] + bins[1:])/2
+
+        ax.bar(xs, hist, width=1/nbins, zs=z, zdir='y', color=c, ec=c, alpha=0.6)
+
+    ax.yaxis.set_ticks(positions)
+    ax.yaxis.set_ticklabels(['$y=1$', '$y=2$', '$y=3$'])
+    ax.xaxis.set_ticks([])
+    ax.xaxis.set_ticklabels([], minor=True)
+    ax.zaxis.set_ticks([])
+    ax.zaxis.set_ticklabels([], minor=True)
+
+
+#plt.figure(figsize=(10,6))
+#plt.show()
+plt.savefig('./histograms.pdf')
+
+
--- a/distribution_matching/figures/nclasses_plot.py
+++ b/distribution_matching/figures/nclasses_plot.py
@ -0,0 +1,29 @@
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+"""
+This script generates plots of sensibility for the number of classes
+Plots results for MAE, MRAE, and KLD
+The hyperparameters were set as: 
+    quantifier.set_params(classifier__C=0.01, classifier__class_weight='balanced', bandwidth=0.2)
+"""
+
+methods = ['DM', 'KDEy-ML', 'EMQ']
+optim = 'mae'
+dfs = [pd.read_csv(f'../results/lequa/nclasses/{optim}/{method}.csv', sep='\t') for method in methods]
+df = pd.concat(dfs)
+
+
+for err in ['MAE', 'MRAE']:
+    piv = df.pivot_table(index='nClasses', columns='Method', values=err)
+    g = sns.lineplot(data=piv, markers=True, dashes=False)
+    g.set(xlim=(1, 28))
+    g.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+    g.set_ylabel(err)
+    g.set_xticks(np.linspace(1, 28, 28))
+    plt.xticks(rotation=90)
+    plt.grid()
+    plt.savefig(f'./nclasses_{err}.pdf', bbox_inches='tight')
+    plt.clf()
--- a/distribution_matching/figures/sensibility_plot.py
+++ b/distribution_matching/figures/sensibility_plot.py
@ -0,0 +1,50 @@
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+"""
+This script generates plots of sensibility for the hyperparameter "bandwidth".
+Plots results for MAE, MRAE, and KLD
+The rest of hyperparameters were set to their default values
+"""
+
+
+
+log_mrae = True
+
+for method, param, xlim, xticks in [
+    ('KDEy-ML', 'Bandwidth', (0.01, 0.2), np.linspace(0.01, 0.2, 20)),
+    ('DM-HD', 'nbins', (2,32), list(range(2,10)) + list(range(10,34,2)))
+]:
+
+    for dataset in ['tweet', 'lequa', 'uciml']:
+
+        if dataset == 'tweet':
+            df = pd.read_csv(f'../results/tweet/sensibility/{method}.csv', sep='\t')
+            ylim = (0.03, 0.21)
+        elif dataset == 'lequa':
+            df = pd.read_csv(f'../results/lequa/T1B/sensibility/{method}.csv', sep='\t')
+            ylim = (0.0125, 0.03)
+        elif dataset == 'uciml':
+            ylim = (0, 0.23)
+            df = pd.read_csv(f'../results/ucimulti/sensibility/{method}.csv', sep='\t')
+
+        for err in ['MAE']: #, 'MRAE']:
+            piv = df.pivot_table(index=param, columns='Dataset', values=err)
+            g = sns.lineplot(data=piv, markers=True, dashes=False)
+            g.set(xlim=xlim)
+            g.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+
+            if log_mrae and err=='MRAE':
+                plt.yscale('log')
+                g.set_ylabel('log('+err+')')
+            else:
+                g.set_ylabel(err)
+
+            g.set_ylim(ylim)
+            g.set_xticks(xticks)
+            plt.xticks(rotation=90)
+            plt.grid()
+            plt.savefig(f'./sensibility_{method}_{dataset}_{err}.pdf', bbox_inches='tight')
+            plt.clf()
--- a/distribution_matching/figures/simplex_density_plot.py
+++ b/distribution_matching/figures/simplex_density_plot.py
@ -0,0 +1,154 @@
+import math
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KernelDensity
+
+from data import LabelledCollection
+
+scale = 10
+
+
+# con ternary (una lib de matplotlib) salen bien pero no puedo crear contornos, o no se
+# con plotly salen los contornos bien, pero es un poco un jaleo porque utiliza el navegador...
+
+def plot_simplex_(ax, density, title='', fontsize=9, points=None):
+    import ternary
+
+    tax = ternary.TernaryAxesSubplot(ax=ax, scale=scale)
+    tax.heatmapf(density, boundary=True, style="triangular", colorbar=False, cmap='viridis') #cmap='magma')
+    tax.boundary(linewidth=1.0)
+    corner_fontsize = 5*fontsize//6
+    tax.right_corner_label("$y=3$", fontsize=corner_fontsize)
+    tax.top_corner_label("$y=2$", fontsize=corner_fontsize)
+    tax.left_corner_label("$y=1$", fontsize=corner_fontsize)
+    if title:
+        tax.set_title(title, loc='center', y=-0.11, fontsize=fontsize)
+    if points is not None:
+        tax.scatter(points*scale, marker='o', color='w', alpha=0.25, zorder=10)
+    tax.get_axes().axis('off')
+    tax.clear_matplotlib_ticks()
+
+    return tax
+
+
+def plot_simplex(ax, coord, kde_scores, title='', fontsize=11, points=None, savepath=None):
+    import plotly.figure_factory as ff
+
+    tax = ff.create_ternary_contour(coord.T, kde_scores, pole_labels=['y=1', 'y=2', 'y=3'],
+                                interp_mode='cartesian',
+                                ncontours=20,
+                                colorscale='Viridis',
+                                showscale=True,
+                                title=title)
+    if savepath is None:
+        tax.show()
+    else:
+        tax.write_image(savepath)
+    return tax
+
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+def plot_3class_problem(post_c1, post_c2, post_c3, post_test, alpha, bandwidth):
+    import ternary
+
+    post_c1 = np.flip(post_c1, axis=1)
+    post_c2 = np.flip(post_c2, axis=1)
+    post_c3 = np.flip(post_c3, axis=1)
+    post_test = np.flip(post_test, axis=1)
+
+    size_=10
+    fig = ternary.plt.figure(figsize=(4*size_, 1*size_))
+    fig.tight_layout()
+    ax1 = fig.add_subplot(1, 4, 1)
+    divider = make_axes_locatable(ax1)
+    ax2 = fig.add_subplot(1, 4, 2)
+    divider = make_axes_locatable(ax2)
+    ax3 = fig.add_subplot(1, 4, 3)
+    divider = make_axes_locatable(ax3)
+    ax4 = fig.add_subplot(1, 4, 4)
+    divider = make_axes_locatable(ax4)
+
+    kde1 = KernelDensity(bandwidth=bandwidth).fit(post_c1)
+    kde2 = KernelDensity(bandwidth=bandwidth).fit(post_c2)
+    kde3 = KernelDensity(bandwidth=bandwidth).fit(post_c3)
+
+    #post_c1 = np.concatenate([post_c1, np.eye(3, dtype=float)])
+    #post_c2 = np.concatenate([post_c2, np.eye(3, dtype=float)])
+    #post_c3 = np.concatenate([post_c3, np.eye(3, dtype=float)])
+
+    #plot_simplex_(ax1, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    #plot_simplex_(ax2, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    #plot_simplex_(ax3, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    def density(kde):
+        def d(p):
+            return np.exp(kde([p])).item()
+        return d
+
+    plot_simplex_(ax1, density(kde1.score_samples), title='$p_1$')
+    plot_simplex_(ax2, density(kde2.score_samples), title='$p_2$')
+    plot_simplex_(ax3, density(kde3.score_samples), title='$p_3$')
+    #plot_simplex(ax1, post_c1, np.exp(kde1.score_samples(post_c1)), title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$') #, savepath='figure/y1.png')
+    #plot_simplex(ax2, post_c2, np.exp(kde2.score_samples(post_c2)), title='$f_2(\mathbf{x})=p(s(\mathbf{x})|y=2)$') #, savepath='figure/y2.png')
+    #plot_simplex(ax3, post_c3, np.exp(kde3.score_samples(post_c3)), title='$f_3(\mathbf{x})=p(s(\mathbf{x})|y=3)$') #, savepath='figure/y3.png')
+
+    def mixture_(prevs, kdes):
+        def m(p):
+            total_density = 0
+            for prev, kde in zip(prevs, kdes):
+                log_density = kde.score_samples([p]).item()
+                density = np.exp(log_density)
+                density *= prev
+                total_density += density
+            #print(total_density)
+            return total_density
+        return m
+
+    title = '$p_{\mathbf{\\alpha}} = \sum_{i \in n} \\alpha_i p_i$'
+
+    plot_simplex_(ax4, mixture_(alpha, [kde1, kde2, kde3]), title=title, points=post_test)
+    #mixture(alpha, [kde1, kde2, kde3])
+
+    #post_test = np.concatenate([post_test, np.eye(3, dtype=float)])
+    #test_scores = sum(alphai*np.exp(kdei.score_samples(post_test)) for alphai, kdei in zip(alpha, [kde1,kde2,kde3]))
+    #plot_simplex(ax4, post_test, test_scores, title=title, points=post_test)
+
+    #ternary.plt.show()
+    ternary.plt.savefig('./simplex.png')
+
+
+import quapy as qp
+
+
+data = qp.datasets.fetch_twitter('wb', min_df=3, pickle=True, for_model_selection=False)
+
+X, y = data.training.Xy
+
+cls = LogisticRegression(C=0.0001, random_state=0)
+
+Xtr, Xte, ytr, yte = train_test_split(X, y, train_size=0.7, stratify=y, random_state=0)
+
+cls.fit(Xtr, ytr)
+
+test = LabelledCollection(Xte, yte)
+test = test.sampling(100, *[0.2, 0.1, 0.7])
+
+Xte, yte = test.Xy
+
+post_c1 = cls.predict_proba(Xte[yte==0])
+post_c2 = cls.predict_proba(Xte[yte==1])
+post_c3 = cls.predict_proba(Xte[yte==2])
+
+post_test = cls.predict_proba(Xte)
+print(post_test)
+alpha = qp.functional.prevalence_from_labels(yte, classes=[0, 1, 2])
+
+#post_c1 = np.random.dirichlet([10,3,1], 30)
+#post_c2 = np.random.dirichlet([1,11,6], 30)
+#post_c3 = np.random.dirichlet([1,5,20], 30)
+#post_test = np.random.dirichlet([5,1,6], 100)
+#alpha = [0.5, 0.3, 0.2]
+
+
+print(f'test alpha {alpha}')
+plot_3class_problem(post_c1, post_c2, post_c3, post_test, alpha, bandwidth=0.1)
+
--- a/distribution_matching/figures/simplex_density_plot2.py
+++ b/distribution_matching/figures/simplex_density_plot2.py
@ -0,0 +1,129 @@
+import ternary
+import math
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split, cross_val_predict
+from sklearn.neighbors import KernelDensity
+import plotly.figure_factory as ff
+
+from data import LabelledCollection
+
+scale = 100
+
+
+# con ternary (una lib de matplotlib) salen bien pero no puedo crear contornos, o no se
+# con plotly salen los contornos bien, pero es un poco un jaleo porque utiliza el navegador...
+
+def plot_simplex_(ax, density, title='', fontsize=30, points=None):
+
+    tax = ternary.TernaryAxesSubplot(ax=ax, scale=scale)
+    tax.heatmapf(density, boundary=True, style="triangular", colorbar=False, cmap='viridis') #cmap='magma')
+    tax.boundary(linewidth=1.0)
+    corner_fontsize = int(5*fontsize//6)
+    tax.right_corner_label("$y=3$", fontsize=corner_fontsize)
+    tax.top_corner_label("$y=2$", fontsize=corner_fontsize)
+    tax.left_corner_label("$y=1$", fontsize=corner_fontsize)
+    if title:
+        tax.set_title(title, loc='center', y=-0.11, fontsize=fontsize)
+    if points is not None:
+        tax.scatter(points*scale, marker='o', color='w', alpha=0.25, zorder=10, s=5*scale)
+    tax.get_axes().axis('off')
+    tax.clear_matplotlib_ticks()
+
+    return tax
+
+
+
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+def plot_3class_problem(post_c1, post_c2, post_c3, post_test, alpha, bandwidth):
+    post_c1 = np.flip(post_c1, axis=1)
+    post_c2 = np.flip(post_c2, axis=1)
+    post_c3 = np.flip(post_c3, axis=1)
+    post_test = np.flip(post_test, axis=1)
+
+    size_=10
+    fig = ternary.plt.figure(figsize=(5*size_, 1*size_))
+    fig.tight_layout()
+    ax1 = fig.add_subplot(1, 4, 1)
+    divider = make_axes_locatable(ax1)
+    ax2 = fig.add_subplot(1, 4, 2)
+    divider = make_axes_locatable(ax2)
+    ax3 = fig.add_subplot(1, 4, 3)
+    divider = make_axes_locatable(ax3)
+    ax4 = fig.add_subplot(1, 4, 4)
+    divider = make_axes_locatable(ax4)
+
+    kde1 = KernelDensity(bandwidth=bandwidth).fit(post_c1)
+    kde2 = KernelDensity(bandwidth=bandwidth).fit(post_c2)
+    kde3 = KernelDensity(bandwidth=bandwidth).fit(post_c3)
+
+    #post_c1 = np.concatenate([post_c1, np.eye(3, dtype=float)])
+    #post_c2 = np.concatenate([post_c2, np.eye(3, dtype=float)])
+    #post_c3 = np.concatenate([post_c3, np.eye(3, dtype=float)])
+
+    #plot_simplex_(ax1, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    #plot_simplex_(ax2, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    #plot_simplex_(ax3, lambda x:0, title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$')
+    def density(kde):
+        def d(p):
+            return np.exp(kde([p])).item()
+        return d
+
+    plot_simplex_(ax1, density(kde1.score_samples), title='$p_1$')
+    plot_simplex_(ax2, density(kde2.score_samples), title='$p_2$')
+    plot_simplex_(ax3, density(kde3.score_samples), title='$p_3$')
+    #plot_simplex(ax1, post_c1, np.exp(kde1.score_samples(post_c1)), title='$f_1(\mathbf{x})=p(s(\mathbf{x})|y=1)$') #, savepath='figure/y1.png')
+    #plot_simplex(ax2, post_c2, np.exp(kde2.score_samples(post_c2)), title='$f_2(\mathbf{x})=p(s(\mathbf{x})|y=2)$') #, savepath='figure/y2.png')
+    #plot_simplex(ax3, post_c3, np.exp(kde3.score_samples(post_c3)), title='$f_3(\mathbf{x})=p(s(\mathbf{x})|y=3)$') #, savepath='figure/y3.png')
+
+    def mixture_(prevs, kdes):
+        def m(p):
+            total_density = 0
+            for prev, kde in zip(prevs, kdes):
+                log_density = kde.score_samples([p]).item()
+                density = np.exp(log_density)
+                density *= prev
+                total_density += density
+            #print(total_density)
+            return total_density
+        return m
+
+    title = '$\mathbf{p}_{\mathbf{\\alpha}} = \sum_{i \in n} \\alpha_i p_i$'
+
+    plot_simplex_(ax4, mixture_(alpha, [kde1, kde2, kde3]), title=title, points=post_test)
+
+    #ternary.plt.show()
+    ternary.plt.savefig('./simplex.pdf')
+
+
+import quapy as qp
+
+
+data = qp.datasets.fetch_twitter('wb', min_df=3, pickle=True, for_model_selection=False)
+
+Xtr, ytr = data.training.Xy
+Xte, yte = data.test.sampling(150, *[0.5, 0.1, 0.4]).Xy
+
+cls = LogisticRegression(C=0.0001, random_state=0)
+
+draw_from_training = False
+if draw_from_training:
+    post_tr = cross_val_predict(cls, Xtr, ytr, n_jobs=-1, method='predict_proba')
+    post_c1 = post_tr[ytr==0]
+    post_c2 = post_tr[ytr==1]
+    post_c3 = post_tr[ytr==2]
+    cls.fit(Xtr, ytr)
+else:
+    cls.fit(Xtr, ytr)
+    post_te = cls.predict_proba(Xte)
+    post_c1 = post_te[yte == 0]
+    post_c2 = post_te[yte == 1]
+    post_c3 = post_te[yte == 2]
+
+post_test = cls.predict_proba(Xte)
+
+alpha = qp.functional.prevalence_from_labels(yte, classes=[0, 1, 2])
+
+print(f'test alpha {alpha}')
+plot_3class_problem(post_c1, post_c2, post_c3, post_test, alpha, bandwidth=0.1)
+
--- a/distribution_matching/lequa_experiments.py
+++ b/distribution_matching/lequa_experiments.py
@ -0,0 +1,65 @@
+import pickle
+import numpy as np
+import os
+import pandas as pd
+from distribution_matching.commons import METHODS, BIN_METHODS, new_method, show_results
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+
+
+if __name__ == '__main__':
+
+    for task in ['T1A', 'T1B']:
+        qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE[task]
+        qp.environ['N_JOBS'] = -1
+        for optim in ['mae', 'mrae']:
+
+            result_dir = f'results/lequa/{task}/{optim}'
+
+            os.makedirs(result_dir, exist_ok=True)
+
+            for method in (METHODS if task=='T1B' else BIN_METHODS):
+
+                print('Init method', method)
+
+                result_path = f'{result_dir}/{method}'
+
+                if os.path.exists(result_path+'.csv'):
+                    print(f'file {result_path}.csv already exist; skipping')
+                    continue
+
+                with open(result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+                    dataset = task
+                    train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
+                    print(f'init {dataset} #instances: {len(train)}')
+                    param_grid, quantifier = new_method(method)
+
+                    if param_grid is not None:
+                        modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)
+
+                        modsel.fit(train)
+                        print(f'best params {modsel.best_params_}')
+                        print(f'best score {modsel.best_score_}')
+                        pickle.dump(
+                            (modsel.best_params_, modsel.best_score_,),
+                            open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                        quantifier = modsel.best_model()
+                    else:
+                        print('debug mode... skipping model selection')
+                        quantifier.fit(train)
+
+                    report = qp.evaluation.evaluation_report(
+                        quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'],
+                        verbose=True, verbose_error=optim[1:], n_jobs=-1
+                    )
+                    means = report.mean()
+                    report.to_csv(result_path+'.dataframe')
+                    csv.write(f'{method}\tLeQua-{task}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+                    print(means)
+
+        show_results(result_path)
--- a/distribution_matching/lequa_nclasses_sensibility.py
+++ b/distribution_matching/lequa_nclasses_sensibility.py
@ -0,0 +1,74 @@
+import pickle
+import numpy as np
+import os
+from os.path import join
+import pandas as pd
+from quapy.protocol import UPP
+from quapy.data import LabelledCollection
+from distribution_matching.commons import METHODS, new_method, show_results
+import quapy as qp
+
+
+SEED=1
+
+
+def extract_classes(data:LabelledCollection, classes):
+    X, y = data.Xy
+    counts = data.counts()
+    Xs, ys = [], []
+    for class_i in classes:
+        Xs.append(X[y==class_i])
+        ys.append([class_i]*counts[class_i])
+    Xs = np.concatenate(Xs)
+    ys = np.concatenate(ys)
+    return LabelledCollection(Xs, ys, classes=classes
+                              )
+
+def task(nclasses):
+    in_classes = np.arange(0, nclasses)
+    train = extract_classes(train_pool, classes=in_classes)
+    test = extract_classes(test_pool, classes=in_classes)
+    with qp.util.temp_seed(SEED):
+        hyper, quantifier = new_method(method)
+        quantifier.set_params(classifier__C=1, classifier__class_weight='balanced')
+        hyper = {h:v for h,v in hyper.items() if not h.startswith('classifier__')}
+        tr, va = train.split_stratified(random_state=SEED)
+        quantifier = qp.model_selection.GridSearchQ(quantifier, hyper, UPP(va), optim).fit(tr)
+        report = qp.evaluation.evaluation_report(quantifier, protocol=UPP(test), error_metrics=['mae', 'mrae', 'kld'], verbose=True)
+        return report
+
+
+# only the quantifier-dependent hyperparameters are explored; the classifier is a LR with default parameters
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
+    qp.environ['N_JOBS'] = -1
+
+
+    for optim in ['mae']: #, 'mrae']:
+
+        result_dir = f'results/lequa/nclasses/{optim}'
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in ['DM', 'EMQ', 'KDEy-ML']: # 'KDEy-ML', 'KDEy-DMhd3']:
+
+            result_path = join(result_dir, f'{method}.csv')
+            if os.path.exists(result_path): continue
+
+            train_orig, _, _ = qp.datasets.fetch_lequa2022('T1B')
+
+            train_pool, test_pool = train_orig.split_stratified(0.5, random_state=SEED)
+            arange_classes = np.arange(2, train_orig.n_classes + 1)
+            reports = qp.util.parallel(task, arange_classes, n_jobs=-1)
+            with open(result_path, 'at') as csv:
+                csv.write(f'Method\tDataset\tnClasses\tMAE\tMRAE\tKLD\n')
+                for num_classes, report in zip(arange_classes, reports):
+                    means = report.mean()
+                    report_result_path = join(result_dir, f'{method}_{num_classes}')+'.dataframe'
+                    report.to_csv(report_result_path)
+                    csv.write(f'{method}\tLeQua-T1B\t{num_classes}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+
+            means = report.mean()
+            print(means)
+
--- a/distribution_matching/lequa_sensibility_analysis.py
+++ b/distribution_matching/lequa_sensibility_analysis.py
@ -0,0 +1,56 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import os
+import quapy as qp
+from distribution_matching.commons import show_results
+from method_kdey import KDEy
+from quapy.method.aggregative import DistributionMatching
+
+
+SEED=1
+
+def task(val):
+    print('job-init', val)
+    train, val_gen, test_gen = qp.datasets.fetch_lequa2022('T1B')
+
+    with qp.util.temp_seed(SEED):
+        if method=='KDEy-ML':
+            quantifier = KDEy(LogisticRegression(), target='max_likelihood', val_split=10, bandwidth=val)
+        elif method == 'DM-HD':
+            quantifier = DistributionMatching(LogisticRegression(), val_split=10, nbins=val, divergence='HD')
+
+        quantifier.fit(train)
+        report = qp.evaluation.evaluation_report(
+            quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'], verbose=True)
+        return report
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
+    qp.environ['N_JOBS'] = -1
+    result_dir = f'results/lequa/T1B/sensibility'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    for method, param, grid in [
+        ('KDEy-ML', 'Bandwidth', np.linspace(0.01, 0.2, 20)),
+        ('DM-HD', 'nbins', list(range(2, 10)) + list(range(10, 34, 2)))
+    ]:
+
+        global_result_path = f'{result_dir}/{method}'
+
+        if not os.path.exists(global_result_path+'.csv'):
+            with open(global_result_path+'.csv', 'wt') as csv:
+                csv.write(f'Method\tDataset\t{param}\tMAE\tMRAE\tKLD\n')
+
+        reports = qp.util.parallel(task, grid, n_jobs=-1)
+        with open(global_result_path + '.csv', 'at') as csv:
+            for val, report in zip(grid, reports):
+                means = report.mean()
+                local_result_path = global_result_path + '_T1B' + (f'_{val:.3f}' if isinstance(val, float) else f'{val}')
+                report.to_csv(f'{local_result_path}.dataframe')
+                csv.write(f'{method}\tLeQua-T1B\t{val}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                csv.flush()
+
+        show_results(global_result_path)
--- a/distribution_matching/method_dirichlety.py
+++ b/distribution_matching/method_dirichlety.py
@ -0,0 +1,105 @@
+import os
+import sys
+from typing import Union, Callable
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, UPP
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
+    DistributionMatching, _get_divergence
+import scipy
+from scipy import optimize
+from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
+import dirichlet
+
+
+class DIRy(AggregativeProbabilisticQuantifier):
+
+    MAXITER = 100000
+
+    def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None, target='max_likelihood'):
+        self.classifier = classifier
+        self.val_split = val_split
+        self.n_jobs = n_jobs
+        self.target = target
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, _, _ = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        self.val_parameters = []
+        for cat in range(data.n_classes):
+            dir_i = dirichlet.mle(posteriors[y == cat], maxiter=DIRy.MAXITER)
+            self.val_parameters.append(dir_i)
+            # print(cat)
+        # self.val_parameters = [dirichlet.mle(posteriors[y == cat], maxiter=DIRy.MAXITER) for cat in range(data.n_classes)]
+
+        return self
+
+    def val_pdf(self, prev):
+        """
+        Returns a function that computes the mixture model with the given prev as mixture factor
+        :param prev: a prevalence vector, ndarray
+        :return: a function implementing the validation distribution with fixed mixture factor
+        """
+        return lambda posteriors: sum(prev_i * dirichlet.pdf(parameters_i)(posteriors) for parameters_i, prev_i in zip(self.val_parameters, prev))
+
+    def aggregate(self, posteriors: np.ndarray):
+        if self.target == 'min_divergence':
+            raise NotImplementedError('not yet')
+            return self._target_divergence(posteriors)
+        elif self.target == 'max_likelihood':
+            return self._target_likelihood(posteriors)
+        else:
+            raise ValueError('unknown target')
+
+    def _target_divergence(self, posteriors):
+        test_density = self.get_kde(posteriors)
+        # val_test_posteriors = np.concatenate([self.val_posteriors, posteriors])
+        test_likelihood = self.pdf(test_density, posteriors)
+        divergence = _get_divergence(self.divergence)
+
+        n_classes = len(self.val_densities)
+
+        def match(prev):
+            val_pdf = self.val_pdf(prev)
+            val_likelihood = val_pdf(posteriors)
+            return divergence(val_likelihood, test_likelihood)
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        return r.x
+
+    def _target_likelihood(self, posteriors, eps=0.000001):
+        n_classes = len(self.val_parameters)
+
+        def neg_loglikelihood(prev):
+            val_pdf = self.val_pdf(prev)
+            test_likelihood = val_pdf(posteriors)
+            test_loglikelihood = np.log(test_likelihood + eps)
+            return -np.sum(test_loglikelihood)
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(neg_loglikelihood, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        return r.x
--- a/distribution_matching/method_kdey.py
+++ b/distribution_matching/method_kdey.py
@ -0,0 +1,321 @@
+from cgi import test
+import os
+import sys
+from typing import Union, Callable
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, UPP
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
+    DistributionMatching, _get_divergence
+import quapy.functional as F
+import scipy
+from scipy import optimize
+#from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
+
+
+# TODO: optimize the bandwidth automatically
+# TODO: think of a MMD-y variant, i.e., a MMD variant that uses the points in the simplex and possibly any non-linear kernel
+
+
+class KDEy(AggregativeProbabilisticQuantifier):
+
+    BANDWIDTH_METHOD = ['auto', 'scott', 'silverman']
+    ENGINE = ['scipy', 'sklearn', 'statsmodels']
+    TARGET = ['min_divergence', 'min_divergence_uniform', 'max_likelihood']
+
+    def __init__(self, classifier: BaseEstimator, val_split=0.4, divergence: Union[str, Callable]='L2',
+                 bandwidth='scott', engine='sklearn', target='min_divergence', n_jobs=None, random_state=0, montecarlo_trials=1000):
+        assert bandwidth in KDEy.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
+            f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
+        assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
+        assert target in KDEy.TARGET, f'unknown target, valid ones are {KDEy.TARGET}'
+        assert target=='max_likelihood' or divergence in ['KLD', 'HD', 'JS'], \
+            'in this version I will only allow KLD or squared HD as a divergence'
+        self.classifier = classifier
+        self.val_split = val_split
+        self.divergence = divergence
+        self.bandwidth = bandwidth
+        self.engine = engine
+        self.target = target
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+        self.montecarlo_trials = montecarlo_trials
+
+    def search_bandwidth_maxlikelihood(self, posteriors, labels):
+        grid = {'bandwidth': np.linspace(0.001, 0.2, 100)}
+        search = GridSearchCV(
+            KernelDensity(), param_grid=grid, n_jobs=-1, cv=50, verbose=1, refit=True
+        )
+        search.fit(posteriors, labels)
+        bandwidth = search.best_params_['bandwidth']
+        print(f'auto: bandwidth={bandwidth:.5f}')
+        return bandwidth
+
+    def get_kde_function(self, posteriors):
+        # if self.bandwidth == 'auto':
+        #     print('adjusting bandwidth')
+        #
+        #     if self.engine == 'sklearn':
+        #         grid = {'bandwidth': np.linspace(0.001,0.2,41)}
+        #         search = GridSearchCV(
+        #             KernelDensity(), param_grid=grid, n_jobs=-1, cv=10, verbose=1, refit=True
+        #         )
+        #         search.fit(posteriors)
+        #         print(search.best_score_)
+        #         print(search.best_params_)
+        #
+        #         import pandas as pd
+        #         df = pd.DataFrame(search.cv_results_)
+        #         pd.set_option('display.max_columns', None)
+        #         pd.set_option('display.max_rows', None)
+        #         pd.set_option('expand_frame_repr', False)
+        #         print(df)
+        #         sys.exit(0)
+        #
+        #         kde = search
+
+        #else:
+        if self.engine == 'scipy':
+            # scipy treats columns as datapoints, and need the datapoints not to lie in a lower-dimensional subspace, which
+            # requires removing the last dimension which is constrained
+            posteriors = posteriors[:,:-1].T
+            kde = scipy.stats.gaussian_kde(posteriors)
+            kde.set_bandwidth(self.bandwidth)
+        elif self.engine == 'sklearn':
+            #print('fitting kde')
+            kde = KernelDensity(bandwidth=self.bandwidth).fit(posteriors)
+            #print('[fitting done]')
+        return kde
+
+    def pdf(self, kde, posteriors):
+        if self.engine == 'scipy':
+            return kde(posteriors[:, :-1].T)
+        elif self.engine == 'sklearn':
+            return np.exp(kde.score_samples(posteriors))
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        """
+
+        :param data: the training set
+        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
+        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
+         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
+         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
+         to estimate the parameters
+        """
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        if self.bandwidth == 'auto':
+            self.bandwidth = self.search_bandwidth_maxlikelihood(posteriors, y)
+
+        self.val_densities = [self.get_kde_function(posteriors[y == cat]) for cat in range(data.n_classes)]
+        self.val_posteriors = posteriors
+
+        if self.target == 'min_divergence_uniform':
+            self.samples = qp.functional.uniform_prevalence_sampling(n_classes=data.n_classes, size=self.montecarlo_trials)
+            self.sample_densities = [self.pdf(kde_i, self.samples) for kde_i in self.val_densities]
+        elif self.target == 'min_divergence':
+            N = self.montecarlo_trials
+            rs = self.random_state
+            n = data.n_classes
+            self.reference_samples = np.vstack([kde_i.sample(N//n, random_state=rs) for kde_i in self.val_densities])
+            self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.val_densities])
+            self.reference_density = np.mean(self.reference_classwise_densities, axis=0)  # equiv. to (uniform @ self.reference_classwise_densities)
+        elif self.target == 'min_divergence_deprecated':  # the version of the first draft, with n*N presampled, then alpha*N chosen for class
+            self.class_samples = [kde_i.sample(self.montecarlo_trials, random_state=self.random_state) for kde_i in self.val_densities]
+            self.class_sample_densities = {}
+            for ci, samples_i in enumerate(self.class_samples):
+                self.class_sample_densities[ci] = np.asarray([self.pdf(kde_j, samples_i) for kde_j in self.val_densities]).T
+
+        return self
+
+    def aggregate(self, posteriors: np.ndarray):
+        if self.target == 'min_divergence':
+            return self._target_divergence(posteriors)
+        elif self.target == 'min_divergence_uniform':
+            return self._target_divergence_uniform(posteriors)
+        elif self.target == 'max_likelihood':
+            return self._target_likelihood(posteriors)
+        else:
+            raise ValueError('unknown target')
+
+
+    # new version in which we retain all n*N examples (sampled from a mixture with uniform parameter), and then
+    # apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS, and not D(q||p_alpha) as
+    # in the first draft
+    def _target_divergence(self, posteriors):
+        # in this variant we evaluate the divergence using a Montecarlo approach
+        n_classes = len(self.val_densities)
+
+        test_kde = self.get_kde_function(posteriors)
+        test_densities = self.pdf(test_kde, self.reference_samples)
+
+        def f_squared_hellinger(u):
+            return (np.sqrt(u)-1)**2
+
+        # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
+        if self.divergence.lower() == 'hd':
+            f = f_squared_hellinger
+        else:
+            raise ValueError('only squared HD is currently implemented')
+
+        epsilon = 1e-10
+        qs = test_densities + epsilon
+        rs = self.reference_density + epsilon
+        iw = qs/rs  #importance weights
+        p_class = self.reference_classwise_densities + epsilon
+        fracs = p_class/qs
+
+        def divergence(prev):
+            # ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs
+            ps_div_qs = prev @ fracs
+            return np.mean( f(ps_div_qs) * iw )
+
+        return F.optim_minimize(divergence, n_classes)
+
+    # new version in which we retain all n*N examples (sampled from a mixture with uniform parameter), and then
+    # apply importance sampling (IS). In this version we compute D(q||p_alpha) with IS, and not D(p_alpha||q) as
+    # in the reformulation proposed above
+    def _target_divergence_q__p(self, posteriors):
+        # in this variant we evaluate the divergence using a Montecarlo approach
+        n_classes = len(self.val_densities)
+
+        test_kde = self.get_kde_function(posteriors)
+        test_densities = self.pdf(test_kde, self.reference_samples)
+
+        def f_squared_hellinger(u):
+            return (np.sqrt(u)-1)**2
+
+        # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
+        if self.divergence.lower() == 'hd':
+            f = f_squared_hellinger
+        else:
+            raise ValueError('only squared HD is currently implemented')
+
+        epsilon = 1e-10
+        qs = test_densities + epsilon
+        rs = self.reference_density + epsilon
+        p_class = self.reference_classwise_densities + epsilon
+
+        # D(q||p_a) = 1/N sum f(q/p_a) * (p_a / p_u)
+        def divergence(prev):
+            p_a = prev @ p_class
+            return np.mean( f(qs/p_a) * (p_a/rs) )
+
+        return F.optim_minimize(divergence, n_classes)
+
+
+    # the first version we explain in the draft, choosing alpha*N from a pool of N for each class and w/o importance sampling
+    def _target_divergence_deprecated(self, posteriors):
+        # in this variant we evaluate the divergence using a Montecarlo approach
+        n_classes = len(self.val_densities)
+
+        test_kde = self.get_kde_function(posteriors)
+        test_densities_per_class = [self.pdf(test_kde, samples_i) for samples_i in self.class_samples]
+
+        # divergence = _get_divergence(self.divergence)
+        def kld_monte(pi, qi, eps=1e-8):
+            # there is no pi in front of the log because the samples are already drawn according to pi
+            smooth_pi = pi+eps
+            smooth_qi = qi+eps
+            return np.mean(np.log(smooth_pi / smooth_qi))
+
+        def squared_hellinger(pi, qi, eps=1e-8):
+            smooth_pi = pi + eps
+            smooth_qi = qi + eps
+            return np.mean((np.sqrt(smooth_pi/smooth_qi)-1)**2)
+
+        # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
+        if self.divergence.lower() == 'kld':
+            fdivergence = kld_monte
+        elif self.divergence.lower() == 'hd':
+            fdivergence = squared_hellinger
+
+
+        def dissimilarity(prev):
+            # choose the samples according to the prevalence vector
+            # e.g., prev = [0.5, 0.3, 0.2] will draw 50% from KDE_0, 30% from KDE_1, and 20% from KDE_2
+            #       the points are already pre-sampled and de densities are pre-computed, so that now all that remains
+            #       is to pick a proportional number of each from each class (same for test)
+            num_variates_per_class = np.round(prev * self.montecarlo_trials).astype(int)
+            sample_densities = np.vstack(
+                [self.class_sample_densities[ci][:num_i] for ci, num_i in enumerate(num_variates_per_class)]
+            )
+            #val_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip(prev, sample_densities.T))
+            val_likelihood = prev @ sample_densities.T
+            #test_likelihood = []
+            #for samples_i, num_i in zip(test_densities_per_class, num_variates_per_class):
+            #    test_likelihood.append(samples_i[:num_i])
+            #test_likelihood = np.concatenate[test]
+            test_likelihood = np.concatenate(
+                [samples_i[:num_i] for samples_i, num_i in zip(test_densities_per_class, num_variates_per_class)]
+            )
+            # return fdivergence(val_likelihood, test_likelihood)  # this is wrong, If I sample from the val distribution
+            # then I am computing D(Test||Val), so it should be E_{x ~ Val}[f(Test(x)/Val(x))]
+            return fdivergence(test_likelihood, val_likelihood)
+
+        return F.optim_minimize(dissimilarity, n_classes)
+
+    # this version explores the entire simplex, and then applies importance sampling. We have not really tested it in deep but
+    # seems not to be promising
+    def _target_divergence_uniform(self, posteriors):
+        # in this variant we evaluate the divergence using a Montecarlo approach
+        n_classes = len(self.val_densities)
+
+        test_kde = self.get_kde_function(posteriors)
+        test_likelihood = self.pdf(test_kde, self.samples)
+
+        def f_squared_hellinger(t):
+            return (np.sqrt(t) - 1)**2
+
+        def f_jensen_shannon(t):
+            return -(t+1)*np.log((t+1)/2) + t*np.log(t)
+
+        def fdivergence(pi, qi, f, eps=1e-10):
+            spi = pi+eps
+            sqi = qi+eps
+            return np.mean(f(spi/sqi)*sqi)
+
+        if self.divergence.lower() == 'hd':
+            f = f_squared_hellinger
+        elif self.divergence.lower() == 'js':
+            f = f_jensen_shannon
+
+        def dissimilarity(prev):
+            val_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, self.sample_densities))
+            return fdivergence(val_likelihood, test_likelihood, f)
+
+        return F.optim_minimize(dissimilarity, n_classes)
+
+    def _target_likelihood(self, posteriors, eps=0.000001):
+        """
+        Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
+        (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
+
+        :param instances: instances in the sample
+        :return: a vector of class prevalence estimates
+        """
+        np.random.RandomState(self.random_state)
+        n_classes = len(self.val_densities)
+        test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.val_densities]
+
+        #return lambda posteriors: sum(prev_i * self.pdf(kde_i, posteriors) for kde_i, prev_i in zip(self.val_densities, prev))
+        def neg_loglikelihood(prev):
+            test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+            test_loglikelihood = np.log(test_mixture_likelihood + eps)
+            return  -np.sum(test_loglikelihood)
+
+        return F.optim_minimize(neg_loglikelihood, n_classes)
+
--- a/distribution_matching/method_kdey_closed.py
+++ b/distribution_matching/method_kdey_closed.py
@ -0,0 +1,174 @@
+from cgi import test
+import os
+import sys
+from typing import Union, Callable
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+from scipy.stats import multivariate_normal
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, UPP
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
+    DistributionMatching, _get_divergence
+import scipy
+from scipy import optimize
+from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
+from time import time
+from sklearn.metrics.pairwise import rbf_kernel
+
+
+def gram_matrix_mix(bandwidth, X, Y=None):
+    # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
+    # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are 
+    # two "scalar matrices" (h^2) I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)
+    variance = 2 * (bandwidth**2)
+    nD = X.shape[1]
+    gamma = 1/(2*variance)
+    gram = rbf_kernel(X, Y, gamma=gamma)
+    norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
+    gram *= norm_factor
+    print('GRAM SUM:', gram.sum())
+    return gram
+
+def weighted_prod(pi, tau, G):
+    return pi[:,np.newaxis] * G * tau
+
+def tril_weighted_prod(pi, G):
+    M = weighted_prod(pi, pi, G)
+    return np.triu(M,1)
+
+
+class KDEyclosed(AggregativeProbabilisticQuantifier):
+
+    def __init__(self, classifier: BaseEstimator, val_split=0.4, bandwidth=0.1, n_jobs=None, random_state=0):
+        self.classifier = classifier
+        self.val_split = val_split
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        """
+
+        :param data: the training set
+        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
+        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
+         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
+         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
+         to estimate the parameters
+        """
+        # print('[fit] enter')
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        # from distribution_matching.binary_debug import HACK
+        # posteriors, y = HACK(posteriors, y)
+
+        # print('[fit] precomputing stuff')
+
+        n = data.n_classes
+        #L = [posteriors[y==i] for i in range(n)]
+        #l = [len(Li) for Li in L]
+
+        D = n
+        h = self.bandwidth
+        #cov_mix_scalar = 2 * h * h  # corresponds to a bandwidth of sqrt(2)*h
+        #Kernel = multivariate_normal(mean=np.zeros(D), cov=cov_mix_scalar)
+
+        # print('[fit] classifier ready; precomputing gram')
+        self.gram_tr_tr = gram_matrix_mix(h, posteriors)
+
+        # li_inv keeps track of the relative weight of each datapoint within its class 
+        # (i.e., the weight in its KDE model)
+        counts_inv = 1/(data.counts())
+        self.li_inv = counts_inv[y]
+
+#        Khash = {}
+#        for a in range(n):
+#            for b in range(l[a]):
+#                for i in range(n):
+#                    Khash[(a,b,i)] = sum(Kernel.pdf(L[i][j] - L[a][b]) for j in range(l[i]))
+                    # for j in range(l[i]):  # this for, and index j, can be supressed and store the sum across j
+                    #     Khash[(a, b, i, j)] = Kernel.pdf(L[i][j] - L[a][b])
+
+        self.n = n
+        #self.L = L
+        #self.l = l
+        #self.Kernel = Kernel
+        #self.Khash = Khash
+        self.C = ((2 * np.pi) ** (-D / 2)) * h ** (-D)
+        print('C:', self.C)
+        self.Ptr = posteriors
+        self.ytr = y
+
+        assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), 'label name gaps not allowed in current implementation'
+
+        # print('[fit] exit')
+
+        return self
+
+
+    def aggregate(self, posteriors: np.ndarray):
+
+        # print('[aggregate] enter')
+
+        Ptr = self.Ptr
+        Pte = posteriors
+
+        gram_te_te = gram_matrix_mix(self.bandwidth, Pte, Pte)
+        gram_tr_te = gram_matrix_mix(self.bandwidth, Ptr, Pte)
+
+        K = Pte.shape[0]
+        tau = np.full(shape=K, fill_value=1/K, dtype=float)
+
+        h = self.bandwidth
+        D = Ptr.shape[1]
+        C = self.C
+
+        partC = 0.5 * np.log( C/K + 2 * tril_weighted_prod(tau, gram_te_te).sum())
+        
+        def match(alpha):
+
+            pi = alpha[self.ytr] * self.li_inv
+
+            partA = -np.log(weighted_prod(pi, tau, gram_tr_te).sum())
+            # print('gram_Tr_Tr sum', self.gram_tr_tr.sum())
+            # print('pretril', (np.triu(self.gram_tr_tr,1).sum()))
+            # print('tril', (2 * tril_weighted_prod(pi, self.gram_tr_tr).sum()))
+            # print('pi', pi.sum(), pi[:10])
+            # print('Cs', C*(pi**2).sum())
+            partB = 0.5 * np.log(C*(pi**2).sum() + 2*tril_weighted_prod(pi, self.gram_tr_tr).sum())
+
+            Dcs = partA + partB + partC
+
+            # print(f'{alpha=}\t{partA=}\t{partB=}\t{partC}')
+            # print()
+
+            return Dcs
+
+
+
+
+
+        # print('[aggregate] starts search')
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / self.n, shape=(self.n,))
+        # uniform_distribution = [0.2, 0.8]
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for _ in range(self.n))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        # print('[aggregate] end')
+        return r.x
+
+
--- a/distribution_matching/method_kdey_closed_efficient.py
+++ b/distribution_matching/method_kdey_closed_efficient.py
@ -0,0 +1,145 @@
+from cgi import test
+import os
+import sys
+from typing import Union, Callable
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+from scipy.stats import multivariate_normal
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, UPP
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
+    DistributionMatching, _get_divergence
+import scipy
+from scipy import optimize
+from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
+from time import time
+from sklearn.metrics.pairwise import rbf_kernel
+
+
+def gram_matrix_mix_sum(bandwidth, X, Y=None, reduce=True):
+    # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
+    # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are 
+    # two "scalar matrices" (h^2) I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)
+    variance = 2 * (bandwidth**2)
+    nRows,nD = X.shape
+    gamma = 1/(2*variance)
+    gram = rbf_kernel(X, Y, gamma=gamma)
+
+    norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
+    gram *= norm_factor
+    if Y is None:
+        # ignores the diagonal
+        aggr = (2 * np.triu(gram, 1)).sum()
+    else:
+        aggr = gram.sum()
+    return aggr
+
+
+class KDEyclosed_efficient(AggregativeProbabilisticQuantifier):
+
+    def __init__(self, classifier: BaseEstimator, val_split=0.4, bandwidth=0.1, n_jobs=None, random_state=0):
+        self.classifier = classifier
+        self.val_split = val_split
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        """
+
+        :param data: the training set
+        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
+        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
+         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
+         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
+         to estimate the parameters
+        """
+
+        # print('[fit] enter')
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), \
+            'label name gaps not allowed in current implementation'
+
+        n = data.n_classes
+        h = self.bandwidth
+
+        P = posteriors
+        counts_inv = 1 / (data.counts())
+
+        nD = P.shape[1]
+        C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))
+        tr_tr_sums = np.zeros(shape=(n,n), dtype=float)
+        self.tr_C = []
+        for i in range(n):
+            for j in range(n):
+                if i > j:
+                    tr_tr_sums[i,j] = tr_tr_sums[j,i]
+                else:
+                    if i == j:
+                        tr_tr_sums[i, j] = gram_matrix_mix_sum(h, P[y == i])
+                        self.tr_C.append(C * sum(y == i))
+                    else:
+                        block = gram_matrix_mix_sum(h, P[y == i], P[y == j])
+                        tr_tr_sums[i, j] = block
+        self.tr_C = np.asarray(self.tr_C)
+        self.Ptr = posteriors
+        self.ytr = y
+        self.tr_tr_sums = tr_tr_sums
+        self.counts_inv = counts_inv
+
+        return self
+
+
+    def aggregate(self, posteriors: np.ndarray):
+
+        # print('[aggregate] enter')
+
+        Ptr = self.Ptr
+        Pte = posteriors
+
+        K,nD = Pte.shape
+        Kinv = (1/K)
+        h = self.bandwidth
+        n = Ptr.shape[1]
+        y = self.ytr
+        tr_tr_sums = self.tr_tr_sums
+
+        C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))
+        partC = 0.5*np.log(gram_matrix_mix_sum(h, Pte) * Kinv * Kinv + C*Kinv)
+
+
+        tr_te_sums = np.zeros(shape=n, dtype=float)
+        for i in range(n):
+            tr_te_sums[i] = gram_matrix_mix_sum(h, Ptr[y==i], Pte) * self.counts_inv[i] * Kinv
+
+        def match(alpha):
+            partA = -np.log((alpha * tr_te_sums).sum())
+            alpha_l = alpha * self.counts_inv
+            partB = 0.5 * np.log((alpha_l[:,np.newaxis] * tr_tr_sums * alpha_l).sum() + (self.tr_C*(alpha_l**2)).sum())
+            return partA + partB + partC
+
+        # print('[aggregate] starts search')
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n, shape=(n,))
+        # uniform_distribution = [0.2, 0.8]
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for _ in range(n))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        # print('[aggregate] end')
+        return r.x
+
+
--- a/distribution_matching/method_kdey_closed_efficient_correct.py
+++ b/distribution_matching/method_kdey_closed_efficient_correct.py
@ -0,0 +1,140 @@
+from cgi import test
+import os
+import sys
+from typing import Union, Callable
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+from scipy.stats import multivariate_normal
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, UPP
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
+    DistributionMatching, _get_divergence
+import scipy
+from scipy import optimize
+from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
+from time import time
+from sklearn.metrics.pairwise import rbf_kernel
+
+
+def gram_matrix_mix_sum(bandwidth, X, Y=None, reduce=True):
+    # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
+    # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are 
+    # two "scalar matrices" (h^2) I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)
+    variance = 2 * (bandwidth**2)
+    nRows,nD = X.shape
+    gamma = 1/(2*variance)
+    norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
+    gram = norm_factor * rbf_kernel(X, Y, gamma=gamma)
+    return gram.sum()
+
+
+class KDEyclosed_efficient_corr(AggregativeProbabilisticQuantifier):
+
+    def __init__(self, classifier: BaseEstimator, val_split=0.4, bandwidth=0.1, n_jobs=None, random_state=0):
+        self.classifier = classifier
+        self.val_split = val_split
+        self.bandwidth = bandwidth
+        self.n_jobs = n_jobs
+        self.random_state=random_state
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
+        """
+
+        :param data: the training set
+        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
+        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
+         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
+         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
+         to estimate the parameters
+        """
+
+        # print('[fit] enter')
+        if val_split is None:
+            val_split = self.val_split
+
+        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
+            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
+        )
+
+        print('training over')
+
+        assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), \
+            'label name gaps not allowed in current implementation'
+
+        # print('[fit] precomputing stuff')
+
+        # from distribution_matching.binary_debug import HACK
+        # posteriors, y = HACK(posteriors, y)
+
+        n = data.n_classes
+        h = self.bandwidth
+
+        # print('[fit] classifier ready; precomputing gram')
+        P = posteriors
+
+        # li_inv keeps track of the relative weight of each datapoint within its class
+        # (i.e., the weight in its KDE model)
+        counts_inv = 1 / (data.counts())
+
+        tr_tr_sums = np.zeros(shape=(n,n), dtype=float)
+        for i in range(n):
+            for j in range(n):
+                if i > j:
+                    tr_tr_sums[i,j] = tr_tr_sums[j,i]
+                else:
+                    block = gram_matrix_mix_sum(h, P[y == i], P[y == j] if i!=j else None)
+                    tr_tr_sums[i, j] = block
+
+        # compute class-class block-sums
+        self.Ptr = posteriors
+        self.ytr = y
+        self.tr_tr_sums = tr_tr_sums
+        self.counts_inv = counts_inv
+
+        print('fit over')
+
+        return self
+
+
+    def aggregate(self, posteriors: np.ndarray):
+
+        # print('aggregating')
+        Ptr = self.Ptr
+        Pte = posteriors
+
+        K,nD = Pte.shape
+        Kinv = (1/K)
+        h = self.bandwidth
+        n = Ptr.shape[1]
+        y = self.ytr
+        tr_tr_sums = self.tr_tr_sums
+
+        partC = 0.5*np.log(gram_matrix_mix_sum(h, Pte) * Kinv * Kinv)
+
+        tr_te_sums = np.zeros(shape=n, dtype=float)
+        for i in range(n):
+            tr_te_sums[i] = gram_matrix_mix_sum(h, Ptr[y==i], Pte) * self.counts_inv[i] * Kinv
+
+        def match(alpha):
+            partA = -np.log((alpha * tr_te_sums).sum())
+            alpha_l = alpha * self.counts_inv
+            partB = 0.5 * np.log((alpha_l[:,np.newaxis] * tr_tr_sums * alpha_l).sum())
+            return partA + partB + partC
+
+        # print('starting search')
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n, shape=(n,))
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for _ in range(n))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        return r.x
+
+
--- a/distribution_matching/show_results.py
+++ b/distribution_matching/show_results.py
@ -0,0 +1,41 @@
+import sys
+from pathlib import Path
+import pandas as pd
+
+result_dir = 'results/results_tweet_mae_redohyper'
+#result_dir = 'results_lequa_mrae'
+
+dfs = []
+
+pathlist = Path(result_dir).rglob('*.csv')
+for path in pathlist:
+     path_in_str = str(path)
+
+     try:
+          df = pd.read_csv(path_in_str, sep='\t')
+          df = df[df.iloc[:, 0] != df.columns[0]]
+          if not df.empty:
+               dfs.append(df)
+     except Exception:
+          print('empty')
+
+df = pd.concat(dfs)
+
+for err in ['MAE', 'MRAE', 'KLD']:
+     print('-'*100)
+     print(err)
+     print('-'*100)
+     piv = df.pivot_table(index='Dataset', columns='Method', values=err)
+     piv.loc['mean'] = piv.mean()
+
+     pd.set_option('display.max_columns', None)
+     pd.set_option('display.max_rows', None)
+     pd.set_option('expand_frame_repr', False)
+     print(piv)
+     print()
+
+
+
+
+
+
--- a/distribution_matching/tables/gen_tables_compact.py
+++ b/distribution_matching/tables/gen_tables_compact.py
@ -0,0 +1,249 @@
+from distribution_matching.commons import BIN_METHODS, METHODS
+import quapy as qp
+from os import makedirs
+import os
+
+from tabular import Table
+import pandas as pd
+
+tables_path = '.'
+# makedirs(tables_path, exist_ok=True)
+
+MAXTONE = 35  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
+SHOW_STD = False
+
+NUM_ADJUSTMENT_METHODS = 2 if 'ACC' in METHODS else 1
+NUM_MAXIMUM_LIKELIHOOD_METHODS = 4 if 'DIR' in METHODS else 3
+NUM_DISTRIBUTION_MATCHING_PAIRS = 2
+NUM_DISTRIBUTION_MATCHING_METHODS = NUM_DISTRIBUTION_MATCHING_PAIRS*2 + (2 if 'HDy-OvA' in METHODS else 1)
+
+qp.environ['SAMPLE_SIZE'] = 100
+
+nice_bench = {
+    'sanders': 'Sanders',
+    'semeval13': 'SemEval13',
+    'semeval14': 'SemEval14',
+    'semeval15': 'SemEval15',
+    'semeval16': 'SemEval16',
+}
+
+nice_method={
+    'KDEy-MLE': 'KDEy-ML',
+    'KDEy-DMhd4': 'KDEy-HD',
+    'KDEy-closed++': 'KDEy-CS',
+    'EMQ-C': 'EMQ-BCTS'
+}
+
+def save_table(path, table):
+    print(f'saving results in {path}')
+    with open(path, 'wt') as foo:
+        foo.write(table)
+
+
+def nicerm(key):
+    return '\mathrm{'+nice[key]+'}'
+
+
+def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
+
+    n_methods = len(METHODS)
+    assert n_methods == (NUM_ADJUSTMENT_METHODS+NUM_DISTRIBUTION_MATCHING_METHODS+NUM_MAXIMUM_LIKELIHOOD_METHODS), \
+        "Unexpected number of methods"
+
+    cline = "\cline{2-" + str(n_methods+ 1) + "}"
+
+    # write the latex table
+    tabular = """
+            \\begin{tabular}{|c|""" + ('c|' * NUM_ADJUSTMENT_METHODS) + 'c|c' + ('|c|c' * (NUM_DISTRIBUTION_MATCHING_PAIRS)) +  ('|c' * NUM_MAXIMUM_LIKELIHOOD_METHODS) + """|} """ + cline + """           
+            \multicolumn{1}{c}{} & 
+            \multicolumn{"""+str(NUM_ADJUSTMENT_METHODS)+"""}{|c}{Adjustment} & 
+            \multicolumn{"""+str(NUM_DISTRIBUTION_MATCHING_METHODS)+"""}{|c|}{Distribution Matching} & 
+            \multicolumn{"""+str(NUM_MAXIMUM_LIKELIHOOD_METHODS)+"""}{c|}{Maximum Likelihood} \\\\
+            \hline               
+            """
+    for i, (tab, group, name) in enumerate(zip(tabs, benchmark_groups, benchmark_names)):
+        tablines = tab.latexTabular(benchmark_replace=nice_bench, method_replace=nice_method, endl='\\\\'+ cline, aslines=True)
+        print(tablines)
+        tablines[0] = tablines[0].replace('\multicolumn{1}{c|}{}', '\\textbf{'+name+'}')
+        if not compact:
+            tabular += '\n'.join(tablines)
+        else:
+            # if compact, keep the method names and the average; discard the rest
+            tabular += tablines[0] + '\n' + tablines[-1] + '\n'
+
+        tabular += "\n" + "\\textit{Rank} & " + tab.getRankTable(prec_mean=0 if name.startswith('LeQua') else 1).latexAverage()
+        if i < (len(tabs) - 1):
+            tabular += "\\hline\n"
+        else:
+            tabular += "\n"
+    tabular += "\end{tabular}"
+    return tabular
+
+
+def gen_tables_uci_multiclass(eval):
+
+    print('Generating table for UCI Multiclass Datasets', eval)
+    dir_results = f'../results/ucimulti/{eval}'
+
+    datasets = qp.datasets.UCI_MULTICLASS_DATASETS
+
+    tab = Table(
+        benchmarks=datasets,
+        methods=METHODS,
+        ttest='wilcoxon',
+        prec_mean=4,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=(eval=='mae'),
+        average=True,
+        maxtone=MAXTONE
+    )
+
+    for dataset in datasets:
+        print(f'\t Dataset: {dataset}: ', end='')
+        for method in METHODS:
+            result_path = f'{dir_results}/{method}_{dataset}.dataframe'
+            if os.path.exists(result_path):
+                df = pd.read_csv(result_path)
+                print(f'{method}', end=' ')
+                tab.add(dataset, method, df[eval].values)
+            else:
+                print(f'MISSING-{method}', end=' ')
+        print()
+
+    return tab
+
+
+def gen_tables_uci_bin(eval):
+
+    print('Generating table for UCI Datasets', eval)
+    dir_results = f'../results/binary/{eval}'
+
+    exclude = ['acute.a', 'acute.b', 'iris.1', 'balance.2']
+    datasets = [x for x in qp.datasets.UCI_DATASETS if x not in exclude]
+
+    tab = Table(
+        benchmarks=datasets,
+        methods=BIN_METHODS,
+        ttest='wilcoxon',
+        prec_mean=4,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=(eval=='mae'),
+        average=True,
+        maxtone=MAXTONE
+    )
+
+    for dataset in datasets:
+        print(f'\t Dataset: {dataset}: ', end='')
+        for method in BIN_METHODS:
+            result_path = f'{dir_results}/{method}_{dataset}.dataframe'
+            if os.path.exists(result_path):
+                df = pd.read_csv(result_path)
+                print(f'{method}', end=' ')
+                tab.add(dataset, method, df[eval].values)
+            else:
+                print(f'MISSING-{method}', end=' ')
+
+    return tab
+
+    
+
+def gen_tables_tweet(eval):
+
+    print('Generating table for Twitter', eval)
+    dir_results = f'../results/tweet/{eval}'
+
+    datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
+
+    tab = Table(
+        benchmarks=datasets,
+        methods=METHODS,
+        ttest='wilcoxon',
+        prec_mean=4,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=(eval=='mae'),
+        average=True,
+        maxtone=MAXTONE
+    )
+
+    for dataset in datasets:
+        print(f'\t Dataset: {dataset}: ', end='')
+        for method in METHODS:
+            result_path = f'{dir_results}/{method}_{dataset}.dataframe'
+            if os.path.exists(result_path):
+                df = pd.read_csv(result_path)
+                print(f'{method}', end=' ')
+                tab.add(dataset, method, df[eval].values)
+            else:
+                print(f'MISSING-{method}', end=' ')
+        print()
+
+    return tab
+
+
+def gen_tables_lequa(Methods, task, eval):
+    # generating table for LeQua-T1A or Lequa-T1B; only one table with two rows, one for MAE, another for MRAE
+    dataset_name = 'LeQua-'+task
+
+    tab = Table(
+        benchmarks=[f'Average'],
+        methods=Methods,
+        ttest='wilcoxon',
+        prec_mean=5,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=False,
+        average=False,
+        maxtone=MAXTONE
+    )
+
+    print('Generating table for T1A@Lequa', eval, end='')
+    dir_results = f'../results/lequa/{task}/{eval}'
+
+    for method in Methods:
+        result_path = f'{dir_results}/{method}.dataframe'
+        if os.path.exists(result_path):
+            df = pd.read_csv(result_path)
+            print(f'{method}', end=' ')
+            tab.add('Average', method, df[eval].values)
+        else:
+            print(f'MISSING-{method}', end=' ')
+    print()
+
+    return tab
+
+
+
+if __name__ == '__main__':
+    os.makedirs('./latex', exist_ok=True)
+
+    for eval in ['mae', 'mrae']:
+        tabs = []
+        tabs.append(gen_tables_tweet(eval))
+        tabs.append(gen_tables_uci_multiclass(eval))
+        tabs.append(gen_tables_lequa(METHODS, 'T1B', eval))
+
+        names = ['Tweets', 'UCI-multi', 'LeQua-T1B']
+        table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=names)
+        save_table(f'./latex/multiclass_{eval}.tex', table)
+
+    for eval in ['mae', 'mrae']:
+        tabs = []
+        tabs.append(gen_tables_uci_bin(eval))
+        
+        # print uci-binary with all datasets for the appendix
+        table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=['UCI-binary'])
+        save_table(f'./latex/ucibinary_{eval}.tex', table)
+        
+        # print uci-bin compacted plus lequa-T1A for the main body
+        tabs.append(gen_tables_lequa(BIN_METHODS, 'T1A', eval))
+        table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=['UCI-binary', 'LeQua-T1A'], compact=True)
+        save_table(f'./latex/binary_{eval}.tex', table)
+
+    print("[Tables Done] runing latex")
+    os.chdir('./latex/')
+    os.system('pdflatex tables_compact.tex')
+    os.system('rm tables_compact.aux tables_compact.bbl tables_compact.blg tables_compact.log tables_compact.out tables_compact.dvi')
+
--- a/distribution_matching/tables/latex/tables_compact.tex
+++ b/distribution_matching/tables/latex/tables_compact.tex
@ -0,0 +1,107 @@
+\documentclass{article}
+
+
+
+\usepackage[utf8]{inputenc} % allow utf-8 input
+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
+\usepackage{hyperref}       % hyperlinks
+\usepackage{url}            % simple URL typesetting
+\usepackage{booktabs}       % professional-quality tables
+\usepackage{amsfonts}       % blackboard math symbols
+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
+\usepackage{microtype}      % microtypography
+\usepackage{lipsum}
+\usepackage{fancyhdr}       % header
+\usepackage{graphicx}       % graphics
+\graphicspath{{media/}}     % organize your images and other figures under media/ folder
+\usepackage{amsmath}
+\usepackage{bm}
+\usepackage{tabularx}
+\usepackage{color}
+\usepackage{colortbl}
+\usepackage{xcolor}
+\usepackage{lmodern}
+
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
+
+\newif\ifdraft
+\drafttrue
+
+\newcommand{\juanjo}[1]{\ifdraft{\leavevmode\color{purple}{[JJ]:
+{#1}}}\else{\vspace{0ex}}\fi}
+
+\newcommand{\alex}[1]{\ifdraft{\leavevmode\color{violet}{[AM]:
+{#1}}}\else{\vspace{0ex}}\fi}
+
+\newcommand{\pablo}[1]{\ifdraft{\leavevmode\color{red}{[PG]:
+{#1}}}\else{\vspace{0ex}}\fi}
+
+
+\title{Tables} 
+
+
+\author{
+  Alejandro Moreo 
+}
+
+
+\begin{document}
+
+\maketitle
+
+
+
+\begin{table}[h]
+ \centering
+ \caption{Multiclass AE}
+\resizebox{\textwidth}{!}{%
+\input{multiclass_mae}
+}%
+\end{table}
+
+
+\begin{table}[h]
+ \centering
+ \caption{Multiclass RAE}
+\resizebox{\textwidth}{!}{%
+\input{multiclass_mae}
+}%
+\end{table}
+
+\begin{table}[h]
+ \centering
+ \caption{Binary MAE}
+\resizebox{\textwidth}{!}{%
+\input{binary_mae}
+}%
+\end{table}
+
+\begin{table}[h]
+  \centering
+  \caption{Binary MRAE}
+ \resizebox{\textwidth}{!}{%
+ \input{binary_mrae}
+ }%
+ \end{table}
+
+\begin{table}[h]
+ \centering
+ \caption{UCI binary full AE}
+\resizebox{\textwidth}{!}{%
+\input{ucibinary_mae}
+}%
+\end{table}
+
+\begin{table}[h]
+ \centering
+ \caption{UCI binary full RAE}
+\resizebox{\textwidth}{!}{%
+\input{ucibinary_mrae}
+}%
+\end{table}
+
+
+
+\end{document}
+
--- a/distribution_matching/tables/tabular.py
+++ b/distribution_matching/tables/tabular.py
@ -0,0 +1,348 @@
+import numpy as np
+import itertools
+from scipy.stats import ttest_ind_from_stats, wilcoxon
+
+
+class Table:
+    VALID_TESTS = [None, "wilcoxon", "ttest"]
+
+    def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3,
+                 clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', color=True, maxtone=50):
+        assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
+
+        self.benchmarks = np.asarray(benchmarks)
+        self.benchmark_index = {row:i for i, row in enumerate(benchmarks)}
+
+        self.methods = np.asarray(methods)
+        self.method_index = {col:j for j, col in enumerate(methods)}
+
+        self.map = {}  
+        # keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
+        self._addmap('values', dtype=object)
+        self.lower_is_better = lower_is_better
+        self.ttest = ttest
+        self.prec_mean = prec_mean
+        self.clean_zero = clean_zero
+        self.show_std = show_std
+        self.prec_std = prec_std
+        self.add_average = average
+        self.missing = missing
+        self.missing_str = missing_str
+        self.color = color
+        self.maxtone = maxtone
+        
+        self.touch()
+
+    @property
+    def nbenchmarks(self):
+        return len(self.benchmarks)
+
+    @property
+    def nmethods(self):
+        return len(self.methods)
+
+    def touch(self):
+        self._modif = True
+
+    def update(self):
+        if self._modif:
+            self.compute()
+
+    def _getfilled(self):
+        return np.argwhere(self.map['fill'])
+
+    @property
+    def values(self):
+        return self.map['values']
+
+    def _indexes(self):
+        return itertools.product(range(self.nbenchmarks), range(self.nmethods))
+
+    def _addmap(self, map, dtype, func=None):
+        self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
+        if func is None:
+            return
+        m = self.map[map]
+        f = func
+        indexes = self._indexes() if map == 'fill' else self._getfilled()
+        for i, j in indexes:
+            m[i, j] = f(self.values[i, j])
+
+    def _addrank(self):
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
+            ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
+            if not self.lower_is_better:
+                ranked_cols_idx = ranked_cols_idx[::-1]
+            self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx)+1)
+            
+    def _addcolor(self):
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            if filled_cols_idx.size==0:
+                continue
+            col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
+            #col_means = [self.map['rank'][i, j] for j in filled_cols_idx]
+
+            minval = min(col_means)
+            maxval = max(col_means)
+
+            for col_idx in filled_cols_idx:
+                val = self.map['mean'][i,col_idx]
+                norm = (maxval - minval)
+                if norm > 0:
+                    normval = (val - minval) / norm
+                else:
+                    normval = 0.5
+
+                if self.lower_is_better:
+                    normval = 1 - normval
+
+                normval = np.clip(normval, 0,1)
+
+                self.map['color'][i, col_idx] = color_red2green_01(normval, self.maxtone)
+
+    def _run_ttest(self, row, col1, col2):
+        mean1 = self.map['mean'][row, col1]
+        std1 = self.map['std'][row, col1]
+        nobs1 = self.map['nobs'][row, col1]
+        mean2 = self.map['mean'][row, col2]
+        std2 = self.map['std'][row, col2]
+        nobs2 = self.map['nobs'][row, col2]
+        _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
+        return p_val
+
+    def _run_wilcoxon(self, row, col1, col2):
+        values1 = self.map['values'][row, col1]
+        values2 = self.map['values'][row, col2]
+        try:
+            _, p_val = wilcoxon(values1, values2)
+        except ValueError:
+            p_val = 0
+        return p_val
+
+    def _add_statistical_test(self):
+        if self.ttest is None:
+            return
+        self.some_similar = [False]*self.nmethods
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            if len(filled_cols_idx) <= 1:
+                continue
+            col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
+            best_pos = filled_cols_idx[np.argmin(col_means)]
+
+            for j in filled_cols_idx:
+                if j==best_pos:
+                    continue
+                if self.ttest == 'ttest':
+                    p_val = self._run_ttest(i, best_pos, j)
+                else:
+                    p_val = self._run_wilcoxon(i, best_pos, j)
+
+                pval_outcome = pval_interpretation(p_val)
+                self.map['ttest'][i, j] = pval_outcome
+                if pval_outcome != 'Diff':
+                    self.some_similar[j] = True
+
+    def compute(self):
+        self._addmap('fill', dtype=bool, func=lambda x: x is not None)
+        self._addmap('mean', dtype=float, func=np.mean)
+        self._addmap('std', dtype=float, func=np.std)
+        self._addmap('nobs', dtype=float, func=len)
+        self._addmap('rank', dtype=int, func=None)
+        self._addmap('color', dtype=object, func=None)
+        self._addmap('ttest', dtype=object, func=None)
+        self._addmap('latex', dtype=object, func=None)
+        self._addrank()
+        self._addcolor()
+        self._add_statistical_test()
+        if self.add_average:
+            self._addave()
+        self._modif = False
+
+    def _is_column_full(self, col):
+        return all(self.map['fill'][:, self.method_index[col]])
+
+    def _addave(self):
+        ave = Table(['ave'], self.methods,
+                    lower_is_better=self.lower_is_better,
+                    ttest=self.ttest,
+                    average=False,
+                    missing=self.missing,
+                    missing_str=self.missing_str,
+                    prec_mean=self.prec_mean,
+                    prec_std=self.prec_std,
+                    clean_zero=self.clean_zero,
+                    show_std=self.show_std,
+                    color=self.color,
+                    maxtone=self.maxtone)
+        for col in self.methods:
+            values = None
+            if self._is_column_full(col):
+                if self.ttest == 'ttest':
+                    # values = np.asarray(self.map['mean'][:, self.method_index[col]])
+                    values = np.concatenate(self.values[:, self.method_index[col]])
+                else:  # wilcoxon
+                    # values = np.asarray(self.map['mean'][:, self.method_index[col]])
+                    values = np.concatenate(self.values[:, self.method_index[col]])
+            ave.add('ave', col, values)
+        self.average = ave
+
+    def add(self, benchmark, method, values):
+        if values is not None:
+            values = np.asarray(values)
+            if values.ndim==0:
+                values = values.flatten()
+        rid, cid = self._coordinates(benchmark, method)
+        self.map['values'][rid, cid] = values
+        self.touch()
+
+    def get(self, benchmark, method, attr='mean'):
+        self.update()
+        assert attr in self.map, f'unknwon attribute {attr}'
+        rid, cid = self._coordinates(benchmark, method)
+        if self.map['fill'][rid, cid]:
+            v = self.map[attr][rid, cid]
+            if v is None or (isinstance(v,float) and np.isnan(v)):
+                return self.missing
+            return v
+        else:
+            return self.missing
+
+    def _coordinates(self, benchmark, method):
+        assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
+        assert method in self.method_index, f'method {method} out of range'
+        rid = self.benchmark_index[benchmark]
+        cid = self.method_index[method]
+        return rid, cid
+
+    def get_average(self, method, attr='mean'):
+        self.update()
+        if self.add_average:
+            return self.average.get('ave', method, attr=attr)
+        return None
+
+    def get_color(self, benchmark, method):
+        color = self.get(benchmark, method, attr='color')
+        if color is None:
+            return ''
+        return color
+
+    def latex(self, benchmark, method):
+        self.update()
+        i,j = self._coordinates(benchmark, method)
+        if self.map['fill'][i,j] == False:
+            return self.missing_str
+
+        mean = self.map['mean'][i,j]
+        l = f" {mean:.{self.prec_mean}f}"
+        if self.clean_zero:
+            l = l.replace(' 0.', '.')
+
+        isbest = self.map['rank'][i,j] == 1
+        if isbest:
+            l = "\\textbf{"+l.strip()+"}"
+
+        stat = '' if self.ttest is None else '^{\phantom{\ddag}}'
+        if self.ttest is not None and self.some_similar[j]:
+            test_label = self.map['ttest'][i,j]
+            if test_label == 'Sim':
+                stat = '^{\dag}'
+            elif test_label == 'Same':
+                stat = '^{\ddag}'
+            elif isbest or test_label == 'Diff':
+                stat = '^{\phantom{\ddag}}'
+
+        std = ''
+        if self.show_std:
+            std = self.map['std'][i,j]
+            std = f" {std:.{self.prec_std}f}"
+            if self.clean_zero:
+                std = std.replace(' 0.', '.')
+            std = f"\pm {std:{self.prec_std}}"
+
+        if stat!='' or std!='':
+            l = f'{l}${stat}{std}$'
+
+        if self.color:
+            l += ' ' + self.map['color'][i,j]
+
+        return l
+
+    def latexTabular(self, benchmark_replace={}, method_replace={}, aslines=False, endl='\\\\\hline'):
+        lines = []
+        l = '\multicolumn{1}{c|}{} & '
+        l += ' & '.join([method_replace.get(col, col) for col in self.methods])
+        l += ' \\\\\hline'
+        lines.append(l)
+
+        for row in self.benchmarks:
+            rowname = benchmark_replace.get(row, row)
+            l = rowname + ' & '
+            l += self.latexRow(row, endl=endl)
+            lines.append(l)
+
+        if self.add_average:
+            # l += '\hline\n'
+            l = '\hline \n \\textit{Average} & '
+            l += self.latexAverage(endl=endl)
+            lines.append(l)
+        if not aslines:
+            lines='\n'.join(lines)
+        return lines
+
+    def latexRow(self, benchmark, endl='\\\\\hline\n'):
+        s = [self.latex(benchmark, col) for col in self.methods]
+        s = ' & '.join(s)
+        s += ' ' + endl
+        return s
+
+    def latexAverage(self, endl='\\\\\hline\n'):
+        if self.add_average:
+            return self.average.latexRow('ave', endl=endl)
+
+    def getRankTable(self, prec_mean=0):
+        t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=prec_mean, average=True, maxtone=self.maxtone, ttest=None)
+        for rid, cid in self._getfilled():
+            row = self.benchmarks[rid]
+            col = self.methods[cid]
+            t.add(row, col, self.get(row, col, 'rank'))
+        t.compute()
+        return t
+
+    def dropMethods(self, methods):
+        drop_index = [self.method_index[m] for m in methods]
+        new_methods = np.delete(self.methods, drop_index)
+        new_index = {col:j for j, col in enumerate(new_methods)}
+
+        self.map['values'] = self.values[:,np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
+        self.methods = new_methods
+        self.method_index = new_index
+        self.touch()
+
+
+def pval_interpretation(p_val):
+    if 0.005 >= p_val:
+        return 'Diff'
+    elif 0.05 >= p_val > 0.005:
+        return 'Sim'
+    elif p_val > 0.05:
+        return 'Same'
+
+
+def color_red2green_01(val, maxtone=50):
+    if np.isnan(val): return None
+    assert 0 <= val <= 1, f'val {val} out of range [0,1]'
+
+
+    # rescale to [-1,1]
+    val = val * 2 - 1
+    if val < 0:
+        color = 'red'
+        tone = maxtone * (-val)
+    else:
+        color = 'green'
+        tone = maxtone * val
+    return '\cellcolor{' + color + f'!{int(tone)}' + '}'
--- a/distribution_matching/tmp/approximating_divergence_montecarlo.py
+++ b/distribution_matching/tmp/approximating_divergence_montecarlo.py
@ -0,0 +1,82 @@
+import numpy as np
+from scipy.stats import norm, uniform, expon
+from scipy.special import rel_entr
+
+
+def kld_approx(p, q, resolution=100):
+    steps = np.linspace(-5, 5, resolution)
+    integral = 0
+    for step_i1, step_i2 in zip(steps[:-1], steps[1:]):
+        base = step_i2-step_i1
+        middle = (step_i1+step_i2)/2
+        pi = p.pdf(middle)
+        qi = q.pdf(middle)
+        integral += (base * pi*np.log(pi/qi))
+    return integral
+
+
+def integrate(f, resolution=10000):
+    steps = np.linspace(-10, 10, resolution)
+    integral = 0
+    for step_i1, step_i2 in zip(steps[:-1], steps[1:]):
+        base = step_i2 - step_i1
+        middle = (step_i1 + step_i2) / 2
+        integral += (base * f(middle))
+    return integral
+
+
+def kl_analytic(m1, s1, m2, s2):
+    return np.log(s2/s1)+ (s1*s1 + (m1-m2)**2) / (2*s2*s2) - 0.5
+
+
+def montecarlo(p, q, trials=1000000):
+    xs = p.rvs(trials)
+    ps = p.pdf(xs)
+    qs = q.pdf(xs)
+    return np.mean(np.log(ps/qs))
+
+
+def montecarlo2(p, q, trials=100000):
+    #r = norm(-3, scale=5)
+    r = uniform(-10, 20)
+    # r = expon(loc=-6)
+    #r = p
+    xs = r.rvs(trials)
+    rs = r.pdf(xs)
+    print(rs)
+    ps = p.pdf(xs)+0.0000001
+    qs = q.pdf(xs)+0.0000001
+    return np.mean((ps/rs)*np.log(ps/qs))
+
+
+def wrong(p, q, trials=100000):
+    xs = np.random.uniform(-10,10, trials)
+    ps = p.pdf(xs)
+    qs = q.pdf(xs)
+    return np.mean(ps*np.log(ps/qs))
+
+
+p = norm(loc=0, scale=1)
+q = norm(loc=1, scale=1)
+
+
+integral_approx = kld_approx(p, q)
+analytic_solution = kl_analytic(p.kwds['loc'], p.kwds['scale'], q.kwds['loc'], q.kwds['scale'])
+montecarlo_approx = montecarlo(p, q)
+montecarlo_approx2 = montecarlo2(p, q)
+wrong_approx = wrong(p, q)
+
+print(f'{analytic_solution=:.10f}')
+print()
+print(f'{integral_approx=:.10f}')
+print(f'integra error = {np.abs(analytic_solution-integral_approx):.10f}')
+print()
+print(f'{montecarlo_approx=:.10f}')
+print(f'montecarlo error = {np.abs(analytic_solution-montecarlo_approx):.10f}')
+print()
+print(f'{montecarlo_approx2=:.10f}')
+print(f'montecarlo2 error = {np.abs(analytic_solution-montecarlo_approx2):.10f}')
+print()
+print(f'{wrong_approx=:.10f}')
+print(f'wrong error = {np.abs(analytic_solution-wrong_approx):.10f}')
+
--- a/distribution_matching/tmp/cauchy_schwarz_div_gauss_mix.py
+++ b/distribution_matching/tmp/cauchy_schwarz_div_gauss_mix.py
@ -0,0 +1,113 @@
+import numpy as np
+from sklearn.neighbors import KernelDensity
+from scipy.stats import norm, multivariate_normal, Covariance
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.neighbors import KDTree
+
+# def CauchySchwarzDivGaussMix(pi_i, mu_i, Lambda_i, tau_i, nu_i, Omega_i):
+#     Lambda_i_inv =
+
+
+# def z()
+
+
+nD=2
+mu = [[0.1,0.1], [0.2, 0.3], [1, 1]]      # centers
+bandwidth = 0.10   # bandwidth and scale (standard deviation)
+standard_deviation = bandwidth
+variance = standard_deviation**2
+
+mus = np.asarray(mu).reshape(-1,nD)
+kde = KernelDensity(bandwidth=bandwidth).fit(mus)
+
+x = [0.5,0.2]
+
+print('with scikit-learn KDE')
+prob = np.exp(kde.score_samples(np.asarray(x).reshape(-1,nD)))
+print(prob)
+
+# univariate
+# N = norm(loc=mu, scale=scale)
+# prob = N.pdf(x)
+# print(prob)
+
+# multivariate
+
+print('with scipy multivariate normal')
+npoints = mus.shape[0]
+probs = sum(multivariate_normal(mean=mu_i, cov=variance).pdf(x) for mu_i in mu)
+probs /= npoints
+print(probs)
+
+print('with scikit learn rbf_kernel')
+x = np.asarray(x).reshape(-1,nD)
+gamma = 1/(2*variance)
+gram = rbf_kernel(mus, x, gamma=gamma)
+const = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
+gram *= const
+print(gram)
+print(np.sum(gram)/npoints)
+
+print('with stackoverflow answer')
+from scipy.spatial.distance import pdist, cdist, squareform
+import scipy
+  # this is an NxD matrix, where N is number of items and D its dimensionalites
+pairwise_sq_dists = cdist(mus, x, 'euclidean')
+print(pairwise_sq_dists)
+K = np.exp(-pairwise_sq_dists / variance)
+print(K)
+print(np.sum(K)/npoints)
+
+print("trying with scipy multivariate on more than one instance")
+probs = sum(multivariate_normal(mean=x.reshape(-nD), cov=variance).pdf(mus))
+probs /= npoints
+print(probs)
+
+import sys
+sys.exit(0)
+
+# N1 = multivariate_normal(mean=mu[0], cov=variance)
+# prob1 = N1.pdf(x)
+# N2 = multivariate_normal(mean=mu[1], cov=variance)
+# prob2 = N2.pdf(x)
+# print(prob1+prob2)
+
+cov = Covariance.from_diagonal([variance, variance])
+print(cov.covariance)
+precision_matrix = np.asarray([[1/variance, 0],[0, 1/variance]])
+print(Covariance.from_precision(precision_matrix).covariance)
+print(np.linalg.inv(precision_matrix))
+print(np.linalg.inv(cov.covariance))
+
+print('-'*100)
+
+nD=2
+mu = np.asarray([[0.1,0.5]])      # centers
+bandwidth = 0.10   # bandwidth and scale (standard deviation)
+standard_deviation = bandwidth
+variance = standard_deviation**2
+
+mus = np.asarray([mu]).reshape(-1,nD)
+kde = KernelDensity(bandwidth=bandwidth).fit(mus)
+
+x = np.asarray([0.5,0.2])
+
+prob = np.exp(kde.score_samples(np.asarray(x).reshape(-1,nD)))
+print(prob)
+
+probs = sum(multivariate_normal(mean=mu_i, cov=variance).pdf(x) for mu_i in mu) / len(mu)
+probs = sum(multivariate_normal(mean=[0,0], cov=1).pdf((x-mu_i)/bandwidth) for mu_i in mu) / len(mu)
+probs = sum(multivariate_normal(mean=[0,0], cov=variance).pdf(x-mu_i) for mu_i in mu) / len(mu)
+print(probs)
+# N1 = multivariate_normal(mean=mu[0], cov=variance)
+# prob1 = N1.pdf(x)
+# N2 = multivariate_normal(mean=mu[1], cov=variance)
+# prob2 = N2.pdf(x)
+# print(prob1+prob2)
+
+h=0.1
+D=4
+
+print(np.sqrt((4**2) * (5**2)))
+print(4*5)
+
--- a/distribution_matching/tmp/cauchy_schwarz_div_kde.py
+++ b/distribution_matching/tmp/cauchy_schwarz_div_kde.py
@ -0,0 +1,153 @@
+import numpy as np
+from scipy.stats import multivariate_normal
+from scipy import optimize
+
+
+def cauchy_schwarz_divergence_kde(L:list, Xte:np.ndarray, bandwidth:float, alpha:np.ndarray):
+    """
+
+    :param L: a list of np.ndarray (instances x dimensions) with the Li being the instances of class i
+    :param Xte: an np.ndarray (instances x dimensions)
+    :param bandwidth: the bandwidth of the kernel
+    :param alpha: the mixture parameter
+    :return: the Cauchy-Schwarz divergence between the validation KDE mixture distribution (with mixture paramerter
+        alpha) and the test KDE distribution
+    """
+
+    n = len(L) # number of classes
+    K, D = Xte.shape # number of test instances, and number of dimensions
+    Kinv = 1/K
+
+    # the lengths of each block
+    l = np.asarray([len(Li) for Li in L])
+
+    # contains the a_i / l_i
+    alpha_r = alpha / l
+    alpha2_r_sum = np.sum(alpha * alpha_r)  # contains the sum_i a_i**2 / l_i
+
+    h = bandwidth
+
+    # we will only use the bandwidth (h) between two gaussians with covariance matrix a "scalar matrix" h**2
+    cov_mix_scalar = 2*h*h   # corresponds to a bandwidth of sqrt(2)*h
+
+    # constant
+    C = ((2*np.pi)**(-D/2))*h**(-D)
+
+    Kernel = multivariate_normal(mean=np.zeros(D), cov=cov_mix_scalar)
+    K0 = Kernel.pdf(np.zeros(D))
+
+
+    def compute_block_E():
+        kernel_block_E = []
+        for i,Li in enumerate(L):
+            acc = 0
+            for x_ji in Li: #optimize...
+                for x_k in Xte: #optimize...
+                    acc += Kernel.pdf(x_ji - x_k) #optimize...
+            kernel_block_E.append(acc)
+        return np.asarray(kernel_block_E)
+
+
+    def compute_block_F_hash():
+        # this can be computed entirely at training time
+        Khash = {}
+        for a in range(n):
+            for b in range(l[a]):
+                for i in range(n):
+                    for j in range(l[i]): # this for, and index j, can be supressed and store the sum across j
+                        Khash[(a,b,i,j)] = Kernel.pdf(L[i][j]-L[a][b])
+        return Khash
+
+
+    def compute_block_Ktest():
+        # this can be optimized in several ways, starting by computing only the lower diagonal triangle... and remove
+        # then the K0 which is not needed after that
+        acc = 0
+        for x_i in Xte:
+            for x_j in Xte:
+                acc += Kernel.pdf(x_i-x_j)
+        return acc
+
+
+    def compute_block_F():
+        F = 0
+        for a in range(n):
+            tmp_b = 0
+            for b in range(l[a]):
+                tmp_i = 0
+                for i in range(n):
+                    tmp_j = 0
+                    for j in range(l[i]):
+                        tmp_j += Fh[(a, b, i, j)]
+                    tmp_i += (alpha_r[i] * tmp_j)
+                tmp_b += tmp_i
+            F += (alpha_r[a] * tmp_b)
+        return F
+
+
+    E = compute_block_E()
+    Fh = compute_block_F_hash()
+    # Ktest = compute_block_Ktest()
+    F = compute_block_F()
+
+    C1 = K*Kinv*Kinv*C
+    C2 = 2 * np.sum([Kernel.pdf(Xte[k]-Xte[k_p]) for k in range(K) for k_p in range(k)])
+
+    partA = -np.log(Kinv * (alpha_r @ E))
+    partB = 0.5*np.log(C*alpha2_r_sum + F - (K0*alpha2_r_sum))
+    # partC = 0.5*np.log(Kinv) + 0.5*np.log(C + Kinv*Ktest - K0)
+    partC = 0.5*np.log(C1+C2)
+
+    Dcs = partA + partB + partC
+
+    return Dcs
+
+
+L = [
+    np.asarray([
+        [-1,-1,-1]
+    ]),
+    np.asarray([
+        [0,0,0],
+    ]),
+    np.asarray([
+        [0,0,0.1],
+        [1,1,1],
+        [3,3,1],
+    ]),
+    np.asarray([
+        [1,0,0]
+    ]),
+    np.asarray([
+        [0,1,0]
+    ])
+]
+Xte = np.asarray(
+    [[0,0,0],
+     [0,0,0],
+     [1,0,0],
+     [0,1,0]]
+)
+bandwidth=0.01
+alpha=np.asarray([0, 2/4, 0, 1/4, 1/4])
+
+div = cauchy_schwarz_divergence_kde(L, Xte, bandwidth, alpha)
+print(div)
+
+def divergence(alpha):
+    return cauchy_schwarz_divergence_kde(L, Xte, bandwidth, alpha)
+
+
+# the initial point is set as the uniform distribution
+n_classes = len(L)
+uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+# solutions are bounded to those contained in the unit-simplex
+bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
+constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+#print('searching for alpha')
+r = optimize.minimize(divergence, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+sol = r.x
+for x in sol:
+    print(f'{x:.4f}')
+print(cauchy_schwarz_divergence_kde(L, Xte, bandwidth, sol))
--- a/distribution_matching/tmp/check_hyperparams.py
+++ b/distribution_matching/tmp/check_hyperparams.py
@ -0,0 +1,19 @@
+import pickle
+import os
+
+dataset = 'lequa/T1B'
+for metric in ['mae', 'mrae']:
+    print('metric', metric)
+    for method in ['KDEy-DMhd4', 'KDEy-DMhd4+', 'KDEy-DMhd4++']:
+
+        path = f'/home/moreo/QuaPy/distribution_matching/results/{dataset}/{metric}/{method}.hyper.pkl'
+        if os.path.exists(path):
+            obj = pickle.load(open(path, 'rb'))
+            print(method, obj)
+        else:
+            print(f'path {path} does not exist')
+
+    print()
+
+
+
--- a/distribution_matching/tmp/compile_test.py
+++ b/distribution_matching/tmp/compile_test.py
@ -0,0 +1,27 @@
+from time import time
+from sklearn.metrics.pairwise import rbf_kernel
+import torch
+import numpy as np
+
+
+def rbf(X, Y):
+    return X @ Y.T
+
+
+@torch.compile
+def rbf_comp(X, Y):
+    return X @ Y.T
+
+
+def measure_time(X, Y, func):
+    tinit = time()
+    func(X, Y)
+    tend = time()
+    print(f'took {tend-tinit:.3}s')
+
+
+X = np.random.rand(1000, 100)
+Y = np.random.rand(1000, 100)
+
+measure_time(X, Y, rbf)
+measure_time(X, Y, rbf_comp)
--- a/distribution_matching/tmp/show_datasets_stats.py
+++ b/distribution_matching/tmp/show_datasets_stats.py
@ -0,0 +1,20 @@
+import pickle
+import os
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+
+
+toprint = []
+for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+    data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+    # model selection
+    train, test = data.train_test
+    toprint.append(f'{dataset}\t{len(train)}\t{len(test)}\t{data.n_classes}')
+
+print()
+for pr in toprint:
+    print(pr)
--- a/distribution_matching/tmp/tmp.py
+++ b/distribution_matching/tmp/tmp.py
@ -0,0 +1,6 @@
+import pandas as pd
+
+report = pd.read_csv('../results_lequa_mrae/KDEy-MLE.dataframe')
+
+means = report.mean()
+print(f'KDEy-MLE\tLeQua-T1B\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
--- a/distribution_matching/tmp/understanding_divergence_montecarlo.py
+++ b/distribution_matching/tmp/understanding_divergence_montecarlo.py
@ -0,0 +1,155 @@
+import numpy as np
+from scipy.stats import norm
+
+
+EPS=1e-8
+LARGE_NUM=1000000
+TRIALS=LARGE_NUM
+
+# calculamos: D(p||q)
+# se asume:
+# q = distribución de referencia (en nuestro caso: el mixture de KDEs en training)
+# p = distribución (en nuestro caso: KDE del test)
+# N = TRIALS el número de trials para montecarlo
+# n = numero de clases, en este ejemplo n=3
+
+
+# ejemplo de f-generator function: Squared Hellinger Distance
+def hd2(u):
+    v = (np.sqrt(u)-1)
+    return v*v
+
+
+# esta funcion estima la divergencia entre dos mixtures (univariates)
+def integrate(p, q, f=hd2, resolution=TRIALS, epsilon=EPS):
+    xs = np.linspace(-50, 50, resolution)
+    ps = p.pdf(xs)+epsilon
+    qs = q.pdf(xs)+epsilon
+    rs = ps/qs
+    base = xs[1]-xs[0]
+    return np.sum(base * f(rs) * qs)
+
+
+# esta es la aproximación de montecarlo según está en el artículo
+# es decir, sampleando la q, y haciendo la media (1/N). En el artículo
+# en realidad se "presamplean" N por cada clase y luego se eligen los que
+# corresponda a cada clase. Aquí directamente sampleamos solo los
+# que correspondan.
+def montecarlo(p, q, f=hd2, trials=TRIALS, epsilon=EPS):
+    xs = q.rvs(trials)
+    ps = p.pdf(xs)+epsilon
+    qs = q.pdf(xs)+epsilon
+    N = trials
+    return (1/N)*np.sum(f(ps/qs))
+
+
+# esta es la aproximación de montecarlo asumiendo que vayamos con todos los N*n
+# puntos (es decir, si hubieramos sampleado de una "q" en la que todas las clases
+# eran igualmente probables, 1/3) y luego reescalamos los pesos con "importance
+# weighting" <-- esto no está en el artículo, pero me gusta más que lo que hay
+# porque desaparece el paso de "seleccionar los que corresponden por clase"
+def montecarlo_importancesampling(p, q, r, f=hd2, trials=TRIALS, epsilon=EPS):
+    xs = r.rvs(trials)  # <- distribución de referencia (aquí: class weight uniforme)
+    rs = r.pdf(xs)
+    ps = p.pdf(xs)+epsilon
+    qs = q.pdf(xs)+epsilon
+    N = trials
+    importance_weight = qs/rs
+    return (1/N)*np.sum(f(ps/qs)*(importance_weight))
+
+
+# he intentado implementar la variante que propne Juanjo pero creo que no la he
+# entendido bien. Tal vez haya que reescalar el peso por 1/3 o algo así, pero no
+# doy con la tecla...
+def montecarlo_classweight(p, q, f=hd2, trials=TRIALS, epsilon=EPS):
+    xs_1 = q.rvs_fromclass(0, trials)
+    xs_2 = q.rvs_fromclass(1, trials)
+    xs_3 = q.rvs_fromclass(2, trials)
+    xs = np.concatenate([xs_1, xs_2, xs_3])
+    weights = np.asarray([[alpha_i]*trials for alpha_i in q.alphas]).flatten()
+    ps = p.pdf(xs)+epsilon
+    qs = q.pdf(xs)+epsilon
+    N = trials
+    n = q.n
+    return (1/(N))*np.sum(weights*f(ps/qs))
+
+
+class Q:
+    """
+    Esta clase es un mixture de gausianas.
+
+    :param locs: medias, una por clase
+    :param scales: stds, una por clase
+    :param alphas: peso, uno por clase
+    """
+    def __init__(self, locs, scales, alphas):
+        self.qs = []
+        for loc, scale in zip(locs, scales):
+            self.qs.append(norm(loc=loc, scale=scale))
+        assert np.isclose(np.sum(alphas), 1), 'alphas do not sum up to 1!'
+        self.alphas = np.asarray(alphas) / np.sum(alphas)
+
+    def pdf(self, xs):
+        q_xs = np.vstack([
+            q_i.pdf(xs) * alpha_i for q_i, alpha_i in zip(self.qs, self.alphas)
+        ])
+        v = q_xs.sum(axis=0)
+        if len(v)==1:
+            v = v[0]
+        return v
+
+    @property
+    def n(self):
+        return len(self.alphas)
+
+    def rvs_fromclass(self, inclass, trials):
+        return self.qs[inclass].rvs(trials)
+
+    def rvs(self, trials):
+        variates = []
+        added = 0
+        for i, (q_i, alpha_i) in enumerate(zip(self.qs, self.alphas)):
+            trials_i = int(trials*alpha_i) if (i < self.n-1) else trials-added
+            variates_i = q_i.rvs(trials_i)
+            variates.append(variates_i)
+            added += len(variates_i)
+
+        return np.concatenate(variates)
+
+    @property
+    def locs(self):
+        return np.asarray([x.kwds['loc'] for x in self.qs])
+
+    @property
+    def scales(self):
+        return np.asarray([x.kwds['scale'] for x in self.qs])
+
+    @classmethod
+    def change_priors(cls, q, alphas):
+        assert len(alphas)==len(q.alphas)
+        return Q(locs=q.locs, scales=q.scales, alphas=alphas)
+
+# distribucion de test
+p = Q(locs=[1, 1.5], scales=[1,1], alphas=[0.8, 0.2])
+
+# distribucion de training (mixture por clase)
+q = Q(locs=[0, 0.5, 1.2], scales=[1,1,1.5], alphas=[0.1, 0.2, 0.7])
+
+# distribucion de referencia (mixture copia de q, con pesos de clase uniformes)
+r = Q.change_priors(q, alphas=[1/3, 1/3, 1/3])
+
+
+integral_approx = integrate(p, q)
+montecarlo_approx = montecarlo(p, q)
+montecarlo_approx2 = montecarlo_importancesampling(p, q, r)
+montecarlo_approx3 = montecarlo_classweight(p, q)
+
+print(f'{integral_approx=:.10f}')
+print()
+print(f'{montecarlo_approx=:.10f}, error={np.abs(integral_approx-montecarlo_approx):.10f}')
+print()
+print(f'{montecarlo_approx2=:.10f}, error={np.abs(integral_approx-montecarlo_approx2):.10f}')
+print()
+print(f'{montecarlo_approx3=:.10f}, error={np.abs(integral_approx-montecarlo_approx3):.10f}')
+
+
--- a/distribution_matching/todo.txt
+++ b/distribution_matching/todo.txt
@ -0,0 +1,21 @@
+1.- No se si sería más facil tomar r=uniforme, y no r=mixture model, simplifica mucho el sampling y tal vez incluso produzca menos error
+2.- Por ahora tengo KLD y HD:
+    - para KLD no he entendido si tengo que añadir el -x + y
+3.- Se puede poner la topsoe como una f-divergence?
+    La topsoe parece que es 2 veces la jensen-shannon divergence, o sea
+    topsoe(p,q) = kld(p|m) + kld(q|m), con m = (p+q)/2
+4.- Se puede poner la Wasserstein como una f-divergence?
+5.- En general, qué relación hay con las "distancias"?
+
+
+2) implementar el auto
+    - optimización interna para likelihood [ninguno parece funcionar bien]
+        - de todo (e.g., todo el training)?
+        - independiente para cada conjunto etiquetado? (e.g., positivos, negativos, neutros, y test)
+    - optimización como un parámetro GridSearchQ
+6) optimizar kernel? optimizar distancia?
+10) Definir un clasificador que devuelva, para cada clase, una posterior como la likelihood en la class-conditional KDE dividida
+    por la likelihood en en todas las clases (como propone Juanjo) y meterlo en EMD. Hacer al contario: re-calibrar con
+    EMD y meterlo en KDEy
+11) KDEx?
+
--- a/distribution_matching/tweets_experiments.py
+++ b/distribution_matching/tweets_experiments.py
@ -0,0 +1,85 @@
+import pickle
+import os
+import pandas as pd
+from distribution_matching.commons import METHODS, new_method, show_results
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+
+SEED=1
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 100
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    for optim in ['mae', 'mrae']:
+
+        result_dir = f'results/tweet/{optim}'
+
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in METHODS:
+
+            print('Init method', method)
+
+            global_result_path = f'{result_dir}/{method}'
+
+            if not os.path.exists(global_result_path+'.csv'):
+                with open(global_result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+            with open(global_result_path+'.csv', 'at') as csv:
+                # four semeval dataset share the training, so it is useless to optimize hyperparameters four times;
+                # this variable controls that the mod sel has already been done, and skip this otherwise
+                semeval_trained = False
+
+                for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset
+                    if os.path.exists(local_result_path+'.dataframe'):
+                        print(f'result file {local_result_path}.dataframe already exist; skipping')
+                        continue
+
+                    with qp.util.temp_seed(SEED):
+
+                        is_semeval = dataset.startswith('semeval')
+
+                        if not is_semeval or not semeval_trained:
+
+                            param_grid, quantifier = new_method(method)
+
+                            # model selection
+                            data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=True)
+
+                            protocol = UPP(data.test, repeats=n_bags_val)
+                            modsel = GridSearchQ(quantifier, param_grid, protocol, refit=False, n_jobs=-1, verbose=1, error=optim)
+
+                            modsel.fit(data.training)
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+                            pickle.dump(
+                                (modsel.best_params_, modsel.best_score_,),
+                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                            quantifier = modsel.best_model()
+
+                            if is_semeval:
+                                semeval_trained = True
+
+                        else:
+                            print(f'model selection for {dataset} already done; skipping')
+
+                        data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
+                        quantifier.fit(data.training)
+                        protocol = UPP(data.test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True)
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        means = report.mean()
+                        csv.write(f'{method}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                        csv.flush()
+
+            show_results(global_result_path)
--- a/distribution_matching/ucibinary_experiments.py
+++ b/distribution_matching/ucibinary_experiments.py
@ -0,0 +1,83 @@
+import pickle
+import os
+from distribution_matching.commons import BIN_METHODS, new_method, show_results
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+
+
+SEED = 1
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 100
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    for optim in ['mae', 'mrae']:
+        result_dir = f'results/binary/{optim}'
+
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in BIN_METHODS:
+
+            print('Init method', method)
+
+            global_result_path = f'{result_dir}/{method}'
+
+            if not os.path.exists(global_result_path + '.csv'):
+                with open(global_result_path + '.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+            with open(global_result_path + '.csv', 'at') as csv:
+
+                for dataset in qp.datasets.UCI_DATASETS:
+                    if dataset in ['acute.a', 'acute.b', 'iris.1']: continue # , 'pageblocks.5', 'spambase', 'wdbc']: continue
+
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset
+                    if os.path.exists(local_result_path + '.dataframe'):
+                        print(f'result file {local_result_path}.dataframe already exist; skipping')
+                        continue
+
+                    with qp.util.temp_seed(SEED):
+
+                        param_grid, quantifier = new_method(method, max_iter=3000)
+
+                        data = qp.datasets.fetch_UCIDataset(dataset)
+
+                        # model selection
+                        train, test = data.train_test
+                        train, val = train.split_stratified(random_state=SEED)
+
+                        protocol = UPP(val, repeats=n_bags_val)
+                        modsel = GridSearchQ(
+                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error=optim
+                        )
+
+                        try:
+                            modsel.fit(train)
+
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+                            pickle.dump(
+                                (modsel.best_params_, modsel.best_score_,),
+                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                            quantifier = modsel.best_model()
+                        except:
+                            print('something went wrong... trying to fit the default model')
+                            quantifier.fit(train)
+
+                        protocol = UPP(test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'],
+                                                                verbose=True)
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        means = report.mean()
+                        csv.write(f'{method}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
--- a/distribution_matching/ucimulticlass_experiments.py
+++ b/distribution_matching/ucimulticlass_experiments.py
@ -0,0 +1,87 @@
+import pickle
+import os
+
+from sklearn.linear_model import LogisticRegression
+
+from distribution_matching.commons import METHODS, new_method, show_results
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+
+
+SEED = 1
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    for optim in ['mae', 'mrae']:
+        result_dir = f'results/ucimulti/{optim}'
+
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in METHODS:
+
+            print('Init method', method)
+
+            global_result_path = f'{result_dir}/{method}'
+
+            if not os.path.exists(global_result_path + '.csv'):
+                with open(global_result_path + '.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+            with open(global_result_path + '.csv', 'at') as csv:
+
+                for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset
+                    if os.path.exists(local_result_path + '.dataframe'):
+                        print(f'result file {local_result_path}.dataframe already exist; skipping')
+                        continue
+
+                    with qp.util.temp_seed(SEED):
+
+                        param_grid, quantifier = new_method(method, max_iter=3000)
+
+                        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+                        # model selection
+                        train, test = data.train_test
+                        train, val = train.split_stratified(random_state=SEED)
+
+                        protocol = UPP(val, repeats=n_bags_val)
+                        modsel = GridSearchQ(
+                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error=optim
+                        )
+
+                        try:
+                            modsel.fit(train)
+
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+                            pickle.dump(
+                                (modsel.best_params_, modsel.best_score_,),
+                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                            quantifier = modsel.best_model()
+                        except:
+                            print('something went wrong... trying to fit the default model')
+                            quantifier.fit(train)
+                            # quantifier = qp.method.aggregative.CC(LogisticRegression()).fit(train)
+
+
+                        protocol = UPP(test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'],
+                                                                verbose=True, n_jobs=-1)
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        means = report.mean()
+                        csv.write(f'{method}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
--- a/laboratory/custom_vectorizers.py
+++ b/laboratory/custom_vectorizers.py
@ -0,0 +1,244 @@
+from scipy.sparse import csc_matrix, csr_matrix
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
+import numpy as np
+from joblib import Parallel, delayed
+import sklearn
+import math
+from scipy.stats import t
+
+
+class ContTable:
+    def __init__(self, tp=0, tn=0, fp=0, fn=0):
+        self.tp=tp
+        self.tn=tn
+        self.fp=fp
+        self.fn=fn
+
+    def get_d(self): return self.tp + self.tn + self.fp + self.fn
+
+    def get_c(self): return self.tp + self.fn
+
+    def get_not_c(self): return self.tn + self.fp
+
+    def get_f(self): return self.tp + self.fp
+
+    def get_not_f(self): return self.tn + self.fn
+
+    def p_c(self): return (1.0*self.get_c())/self.get_d()
+
+    def p_not_c(self): return 1.0-self.p_c()
+
+    def p_f(self): return (1.0*self.get_f())/self.get_d()
+
+    def p_not_f(self): return 1.0-self.p_f()
+
+    def p_tp(self): return (1.0*self.tp) / self.get_d()
+
+    def p_tn(self): return (1.0*self.tn) / self.get_d()
+
+    def p_fp(self): return (1.0*self.fp) / self.get_d()
+
+    def p_fn(self): return (1.0*self.fn) / self.get_d()
+
+    def tpr(self):
+        c = 1.0*self.get_c()
+        return self.tp / c if c > 0.0 else 0.0
+
+    def fpr(self):
+        _c = 1.0*self.get_not_c()
+        return self.fp / _c if _c > 0.0 else 0.0
+
+
+def __ig_factor(p_tc, p_t, p_c):
+    den = p_t * p_c
+    if den != 0.0 and p_tc != 0:
+        return p_tc * math.log(p_tc / den, 2)
+    else:
+        return 0.0
+
+
+def information_gain(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
+           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
+           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
+           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
+
+
+def squared_information_gain(cell):
+    return information_gain(cell)**2
+
+def posneg_information_gain(cell):
+    ig = information_gain(cell)
+    if cell.tpr() < cell.fpr():
+        return -ig
+    else:
+        return ig
+
+def pos_information_gain(cell):
+    if cell.tpr() < cell.fpr():
+        return 0
+    else:
+        return information_gain(cell)
+
+def pointwise_mutual_information(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
+
+
+def gss(cell):
+    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
+
+
+def chi_square(cell):
+    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
+    if den==0.0: return 0.0
+    num = gss(cell)**2
+    return num / den
+
+
+def conf_interval(xt, n):
+    if n>30:
+        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
+    else:
+        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
+    p = (xt + 0.5 * z2) / (n + z2)
+    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
+    return p, amplitude
+
+
+def strength(minPosRelFreq, minPos, maxNeg):
+    if minPos > maxNeg:
+        return math.log(2.0 * minPosRelFreq, 2.0)
+    else:
+        return 0.0
+
+
+#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
+#however, for some extremely imbalanced dataset caused all documents to be 0
+def conf_weight(cell, cancel_features=False):
+    c = cell.get_c()
+    not_c = cell.get_not_c()
+    tp = cell.tp
+    fp = cell.fp
+
+    pos_p, pos_amp = conf_interval(tp, c)
+    neg_p, neg_amp = conf_interval(fp, not_c)
+
+    min_pos = pos_p-pos_amp
+    max_neg = neg_p+neg_amp
+    den = (min_pos + max_neg)
+    minpos_relfreq = min_pos / (den if den != 0 else 1)
+
+    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
+
+    if str_tplus == 0 and not cancel_features:
+        return 1e-20
+
+    return str_tplus;
+
+def get_tsr_matrix(cell_matrix, tsr_score_funtion):
+    nC = len(cell_matrix)
+    nF = len(cell_matrix[0])
+    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
+    return np.array(tsr_matrix)
+
+
+def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
+    tp_ = len(positive_document_indexes & feature_document_indexes)
+    fp_ = len(feature_document_indexes - positive_document_indexes)
+    fn_ = len(positive_document_indexes - feature_document_indexes)
+    tn_ = nD - (tp_ + fp_ + fn_)
+    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
+
+def category_tables(feature_sets, category_sets, c, nD, nF):
+    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
+
+def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
+    """
+    Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
+    Efficiency O(nF x nC x log(S)) where S is the sparse factor
+    """
+
+    nD, nF = coocurrence_matrix.shape
+    nD2, nC = label_matrix.shape
+
+    if nD != nD2:
+        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
+                         (coocurrence_matrix.shape,label_matrix.shape))
+
+    def nonzero_set(matrix, col):
+        return set(matrix[:, col].nonzero()[0])
+
+    if isinstance(coocurrence_matrix, csr_matrix):
+        coocurrence_matrix = csc_matrix(coocurrence_matrix)
+    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
+    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
+    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
+    return np.array(cell_matrix)
+
+
+
+class TSRweighting(BaseEstimator,TransformerMixin):
+    """
+    Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
+    chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
+    each category-feature pair.
+    The supervised_4cell_matrix (a CxF matrix containing the 4-cell contingency tables
+    for each category-feature pair) can be pre-computed (e.g., during the feature selection phase) and passed as an
+    argument.
+    When C>1, i.e., in multiclass scenarios, a global_policy is used in order to determine a single feature-score which
+    informs about its relevance. Accepted policies include "max" (takes the max score across categories), "ave" and "wave"
+    (take the average, or weighted average, across all categories -- weights correspond to the class prevalence), and "sum"
+    (which sums all category scores).
+    """
+
+    def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
+        if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
+        self.tsr_function = tsr_function
+        self.global_policy = global_policy
+        self.supervised_4cell_matrix = supervised_4cell_matrix
+        self.sublinear_tf=sublinear_tf
+        self.norm=norm
+        self.min_df = min_df
+        self.n_jobs=n_jobs
+
+    def fit(self, X, y):
+        self.count_vectorizer = CountVectorizer(min_df=self.min_df)
+        X = self.count_vectorizer.fit_transform(X)
+
+        self.tf_vectorizer = TfidfTransformer(
+            norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf).fit(X)
+
+        if len(y.shape) == 1:
+            y = np.expand_dims(y, axis=1)
+
+        nD, nC = y.shape
+        nF = len(self.tf_vectorizer.get_feature_names_out())
+
+        if self.supervised_4cell_matrix is None:
+            self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
+        else:
+            if self.supervised_4cell_matrix.shape != (nC, nF): raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
+        tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
+        if self.global_policy == 'ave':
+            self.global_tsr_vector = np.average(tsr_matrix, axis=0)
+        elif self.global_policy == 'wave':
+            category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
+            self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
+        elif self.global_policy == 'sum':
+            self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
+        elif self.global_policy == 'max':
+            self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
+        return self
+
+    def fit_transform(self, X, y):
+        return self.fit(X,y).transform(X)
+
+    def transform(self, X):
+        if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
+        X = self.count_vectorizer.transform(X)
+        tf_X = self.tf_vectorizer.transform(X).toarray()
+        weighted_X = np.multiply(tf_X, self.global_tsr_vector)
+        if self.norm is not None and self.norm!='none':
+            weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
+        return csr_matrix(weighted_X)
--- a/laboratory/dataset_adapter.py
+++ b/laboratory/dataset_adapter.py
--- a/laboratory/main.py
+++ b/laboratory/main.py
--- a/laboratory/main_tweets_auto.py
+++ b/laboratory/main_tweets_auto.py
@ -0,0 +1,62 @@
+from sklearn.linear_model import LogisticRegression
+import os
+import sys
+import pandas as pd
+
+import quapy as qp
+from method.aggregative import DistributionMatching
+from distribution_matching.method_kdey import KDEy
+from protocol import UPP
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 100
+    qp.environ['N_JOBS'] = -1
+    method = 'KDE'
+    param = 0.1
+    div = 'topsoe'
+    method_identifier = f'{method}_{param}_{div}'
+
+    # generates tuples (dataset, method, method_name)
+    # (the dataset is needed for methods that process the dataset differently)
+    def gen_methods():
+
+        for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
+
+            data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
+
+            if method == 'KDE':
+                kdey = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn')
+                yield data, kdey, method_identifier
+
+            elif method == 'DM':
+                dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=param)
+                yield data, dm, method_identifier
+
+            else:
+                raise NotImplementedError('unknown method')
+
+    os.makedirs('results', exist_ok=True)
+    result_path = f'results/{method_identifier}.csv'
+
+    if os.path.exists(result_path):
+        print('Result already exit. Nothing to do')
+        sys.exit(0)
+
+    with open(result_path, 'wt') as csv:
+        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
+        for data, quantifier, quant_name in gen_methods():
+            quantifier.fit(data.training)
+            protocol = UPP(data.test, repeats=100)
+            report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
+            means = report.mean()
+            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
+            csv.flush()
+
+    df = pd.read_csv(result_path, sep='\t')
+
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
+    print(pv)
--- a/laboratory/method_dxs.py
+++ b/laboratory/method_dxs.py
@ -0,0 +1,148 @@
+from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from sklearn.linear_model import LogisticRegression
+
+import quapy as qp
+from data import LabelledCollection
+import numpy as np
+
+from laboratory.custom_vectorizers import *
+from protocol import APP
+from quapy.method.aggregative import _get_divergence, HDy, DistributionMatching
+from quapy.method.base import BaseQuantifier
+from scipy import optimize
+import pandas as pd
+
+
+# TODO: explore the bernoulli (term presence/absence) variant
+# TODO: explore the multinomial (term frequency) variant
+# TODO: explore the multinomial + length normalization variant
+# TODO: consolidate the TSR-variant (e.g., using information gain) variant;
+#   - works better with the idf?
+#   - works better with length normalization?
+#   - etc
+
+class DxS(BaseQuantifier):
+    def __init__(self, vectorizer=None, divergence='topsoe'):
+        self.vectorizer = vectorizer
+        self.divergence = divergence
+
+    # def __as_distribution(self, instances):
+    #     return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
+
+    def __as_distribution(self, instances):
+        dist = instances.sum(axis=0) / instances.sum()
+        return np.asarray(dist).flatten()
+
+    def fit(self, data: LabelledCollection):
+
+        text_instances, labels = data.Xy
+
+        if self.vectorizer is not None:
+            text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
+
+        distributions = []
+        for class_i in data.classes_:
+            distributions.append(self.__as_distribution(text_instances[labels == class_i]))
+        self.validation_distribution = np.asarray(distributions)
+        return self
+
+    def quantify(self, text_instances):
+        if self.vectorizer is not None:
+            text_instances = self.vectorizer.transform(text_instances)
+
+        test_distribution = self.__as_distribution(text_instances)
+        divergence = _get_divergence(self.divergence)
+        n_classes, n_feats = self.validation_distribution.shape
+
+        def match(prev):
+            prev = np.expand_dims(prev, axis=0)
+            mixture_distribution = (prev @ self.validation_distribution).flatten()
+            return divergence(test_distribution, mixture_distribution)
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        return r.x
+
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 250
+    qp.environ['N_JOBS'] = -1
+    min_df = 10
+    # dataset = 'imdb'
+    repeats = 10
+    error = 'mae'
+
+    div = 'topsoe'
+
+    # generates tuples (dataset, method, method_name)
+    # (the dataset is needed for methods that process the dataset differently)
+    def gen_methods():
+
+        for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
+
+            data = qp.datasets.fetch_reviews(dataset, tfidf=False)
+
+            bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
+            dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
+            yield data, dxs, 'DxS-Bernoulli'
+
+            multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
+            dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
+            yield data, dxs, 'DxS-multinomial'
+
+            tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
+            dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
+            yield data, dxs, 'DxS-TF'
+
+            logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
+            dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
+            yield data, dxs, 'DxS-logTF'
+
+            tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
+            dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
+            yield data, dxs, 'DxS-TFIDF'
+
+            tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
+            dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
+            yield data, dxs, 'DxS-TFIDF-l2'
+
+            tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
+            dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
+            yield data, dxs, 'DxS-TFTSR-l2'
+
+            data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
+            hdy = HDy(LogisticRegression())
+            yield data, hdy, 'HDy'
+
+            dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
+            yield data, dm, 'DM-5b'
+
+            dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
+            yield data, dm, 'DM-10b'
+
+
+    result_path = 'results.csv'
+    with open(result_path, 'wt') as csv:
+        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
+        for data, quantifier, quant_name in gen_methods():
+            quantifier.fit(data.training)
+            report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
+            means = report.mean()
+            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
+
+    df = pd.read_csv(result_path, sep='\t')
+    # print(df)
+
+    pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
+    print(pv)
+
+
+
+
--- a/quapy/CHANGE_LOG.txt
+++ b/quapy/CHANGE_LOG.txt
@ -1,3 +1,15 @@
+Change Log 0.1.8
+----------------
+
+- A conceptual error in MedianSweep and MedianSweep2 has been solved. The error consisted on computing all TPRs and
+    FPRs and report the median; then, the adjustment then operated on these single values. Instead, the original
+    method proposed by G.Forman comes down to generating all prevalence predictions, for all TPRs and FPRs, and then
+    computing the median of it.
+
+- qp.evaluation now runs in parallel <improve, remove or fix the ongoing error, put at the qp. level instead of
+    qp.evaluation because I don't like the qp.evaluation.evaluate thing>
+
+
 Change Log 0.1.7
 ----------------

--- a/quapy/classification/calibration.py
+++ b/quapy/classification/calibration.py
@ -59,7 +59,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi
        elif isinstance(k, float):
            if not (0 < k < 1):
                raise ValueError('wrong value for val_split: the proportion of validation documents must be in (0,1)')
-            return self.fit_cv(X, y)
+            return self.fit_tr_val(X, y)

    def fit_cv(self, X, y):
        """
--- a/quapy/classification/neural.py
+++ b/quapy/classification/neural.py
@ -176,7 +176,7 @@ class NeuralClassifierTrainer:
        self.classes_ = train.classes_
        opt = self.trainer_hyperparams
        checkpoint = self.checkpointpath
-        self.reset_net_params(self.vocab_size, train.n_classes)
+        self.reset_net_params(self.vocab_size, train.arange_classes)

        train_generator = TorchDataset(train.instances, train.labels).asDataloader(
            opt['batch_size'], shuffle=True, pad_length=opt['padding_length'], device=opt['device'])
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -123,7 +123,7 @@ class LabelledCollection:
            return self.uniform_sampling_index(size, random_state=random_state)
        if len(prevs) == self.n_classes - 1:
            prevs = prevs + (1 - sum(prevs),)
-        assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
+        assert len(prevs) == self.n_classes, f'unexpected number of prevalences (found {len(prevs)}, expected {self.n_classes})'
        assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})'

        # Decide how many instances should be taken for each class in order to satisfy the requested prevalence
@ -269,6 +269,7 @@ class LabelledCollection:
        test = self.sampling_from_index(right)
        return training, test

+
    def __add__(self, other):
        """
        Returns a new :class:`LabelledCollection` as the union of this collection with another collection.
@ -319,7 +320,8 @@ class LabelledCollection:
        else:
            raise NotImplementedError('unsupported operation for collection types')
        labels = np.concatenate([lc.labels for lc in args])
-        classes = np.unique(labels).sort()
+        classes = np.unique(labels)
+        classes.sort()
        return LabelledCollection(instances, labels, classes=classes)

    @property
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@ -1,3 +1,6 @@
+import numpy as np
+
+
 def warn(*args, **kwargs):
    pass
 import warnings
@ -7,7 +10,8 @@ import zipfile
 from os.path import join
 import pandas as pd
 import scipy
-
+import pickle
+from ucimlrepo import fetch_ucirepo
 from quapy.data.base import Dataset, LabelledCollection
 from quapy.data.preprocessing import text2tfidf, reduce_columns
 from quapy.data.reader import *
@ -45,6 +49,12 @@ UCI_DATASETS = ['acute.a', 'acute.b',
                'wine-q-red', 'wine-q-white',
                'yeast']

+UCI_MULTICLASS_DATASETS = ['dry-bean',
+                           'wine-quality',
+                           'academic-success',
+                           'digits',
+                           'letter']
+
 LEQUA2022_TASKS = ['T1A', 'T1B', 'T2A', 'T2B']

 _TXA_SAMPLE_SIZE = 250
@ -204,7 +214,7 @@ def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False
    :return: a :class:`quapy.data.base.Dataset` instance
    """
    data = fetch_UCILabelledCollection(dataset_name, data_home, verbose)
-    return Dataset(*data.split_stratified(1 - test_split, random_state=0))
+    return Dataset(*data.split_stratified(1 - test_split, random_state=0), name=dataset_name)


 def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection:
@ -548,11 +558,110 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
    data.stats()
    return data

-
 def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float):
    df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False)


+def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset:
+    """
+    Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`, as used in
+    `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
+    Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
+    Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_
+    and
+    `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
+    Dynamic ensemble selection for quantification tasks.
+    Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.
+    The datasets do not come with a predefined train-test split (see :meth:`fetch_UCILabelledCollection` for further
+    information on how to use these collections), and so a train-test split is generated at desired proportion.
+    The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
+
+    :param dataset_name: a dataset name
+    :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
+        ~/quay_data/ directory)
+    :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets
+    :return: a :class:`quapy.data.base.Dataset` instance
+    """
+    data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose)
+    return Dataset(*data.split_stratified(1 - test_split, random_state=0))
+
+
+def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection:
+    """
+    Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in
+    `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
+    Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
+    Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_
+    and
+    `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
+    Dynamic ensemble selection for quantification tasks.
+    Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.
+    The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation
+    protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.
+    This can be reproduced by using :meth:`quapy.data.base.Dataset.kFCV`, e.g.:
+
+    >>> import quapy as qp
+    >>> collection = qp.datasets.fetch_UCILabelledCollection("dry-bean")
+    >>> for data in qp.domains.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
+    >>>     ...
+
+    The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`
+
+    :param dataset_name: a dataset name
+    :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
+        ~/quay_data/ directory)
+    :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets
+    :return: a :class:`quapy.data.base.LabelledCollection` instance
+    """
+    assert dataset_name in UCI_MULTICLASS_DATASETS, \
+        f'Name {dataset_name} does not match any known dataset from the UCI Machine Learning datasets repository (multiclass). ' \
+        f'Valid ones are {UCI_MULTICLASS_DATASETS}'
+
+    if data_home is None:
+        data_home = get_quapy_home()
+
+    identifiers = {"dry-bean": 602,
+                   "wine-quality": 186,
+                   "academic-success": 697,
+                   "digits": 80,
+                   "letter": 59}
+
+    full_names = {"dry-bean": "Dry Bean Dataset",
+                  "wine-quality": "Wine Quality",
+                  "academic-success": "Predict students' dropout and academic success",
+                  "digits": "Optical Recognition of Handwritten Digits",
+                  "letter": "Letter Recognition"
+                  }
+
+    identifier = identifiers[dataset_name]
+    fullname = full_names[dataset_name]
+
+    print(f'Loading UCI Muticlass {dataset_name} ({fullname})')
+
+    file = join(data_home, 'uci_multiclass', dataset_name + '.pkl')
+    if os.path.exists(file):
+        with open(file, 'rb') as file:
+            data = pickle.load(file)
+    else:
+        data = fetch_ucirepo(id=identifier)
+        X, y = data['data']['features'].to_numpy(), data['data']['targets'].to_numpy().squeeze()
+        classes = np.sort(np.unique(y))
+        y = np.searchsorted(classes, y)
+        data = LabelledCollection(X, y)
+        os.makedirs(os.path.dirname(file), exist_ok=True)
+        with open(file, 'wb') as file:
+            pickle.dump(data, file)
+
+    data.stats()
+    return data
+
+
+def _df_replace(df, col, repl={'yes': 1, 'no': 0}, astype=float):
+    df[col] = df[col].apply(lambda x: repl[x]).astype(astype, copy=False)
+
+
 def fetch_lequa2022(task, data_home=None):
    """
    Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@ -1,3 +1,4 @@
+from copy import deepcopy
 from typing import Union, Callable, Iterable
 import numpy as np
 from tqdm import tqdm
@ -11,7 +12,9 @@ def prediction(
        model: BaseQuantifier,
        protocol: AbstractProtocol,
        aggr_speedup: Union[str, bool] = 'auto',
-        verbose=False):
+        verbose=False,
+        verbose_error=None,
+        n_jobs=1):
    """
    Uses a quantification model to generate predictions for the samples generated via a specific protocol.
    This function is central to all evaluation processes, and is endowed with an optimization to speed-up the
@ -32,6 +35,11 @@ def prediction(
        in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is
        convenient or not. Set to False to deactivate.
    :param verbose: boolean, show or not information in stdout
+    :param verbose_error: an evaluation function to be used to display intermediate results if verbose=True (default None)
+    :param n_jobs: number of parallel workers. Default is 1 so that, if not explicitly requested, the evaluation phase
+        is to be carried out in a single core. That is to say, this parameter will not inspect the environment variable
+        N_JOBS by default. This might be convenient in many situations, since parallelizing the evaluation entails
+        adding an overhead for cloning the objects within different threads that is often not worth the effort.
    :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape
        `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample
    """
@ -61,16 +69,44 @@ def prediction(
    if apply_optimization:
        pre_classified = model.classify(protocol.get_labelled_collection().instances)
        protocol_with_predictions = protocol.on_preclassified_instances(pre_classified)
-        return __prediction_helper(model.aggregate, protocol_with_predictions, verbose)
+        return __prediction_helper(model, protocol_with_predictions, True, verbose, verbose_error, n_jobs)
    else:
-        return __prediction_helper(model.quantify, protocol, verbose)
+        return __prediction_helper(model, protocol, False, verbose, verbose_error, n_jobs)


-def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=False):
+def __delayed_prediction(args):
+    quantifier, aggregate, sample_instances, sample_prev = args
+    quantifier = deepcopy(quantifier)
+    quant_fn = quantifier.aggregate if aggregate else quantifier.quantify
+    predicted = quant_fn(sample_instances)
+    return sample_prev, predicted
+
+
+def __prediction_helper(quantifier, protocol: AbstractProtocol, aggregate: bool, verbose=False, verbose_error=None, n_jobs=1):
    true_prevs, estim_prevs = [], []
-    for sample_instances, sample_prev in tqdm(protocol(), total=protocol.total(), desc='predicting') if verbose else protocol():
-        estim_prevs.append(quantification_fn(sample_instances))
-        true_prevs.append(sample_prev)
+    ongoing_errors = []
+    if verbose:
+        pbar =  tqdm(protocol(), total=protocol.total(), desc='predicting')
+    if n_jobs==1:
+        quant_fn = quantifier.aggregate if aggregate else quantifier.quantify
+        for sample_instances, sample_prev in pbar if verbose else protocol():
+            predicted = quant_fn(sample_instances)
+            estim_prevs.append(predicted)
+            true_prevs.append(sample_prev)
+            if verbose and verbose_error is not None:
+                err = verbose_error(sample_prev, predicted)
+                ongoing_errors.append(err)
+                pbar.set_description(f'predicting: ongoing error={np.mean(ongoing_errors):.5f}')
+    else:
+        if verbose:
+            print('parallelizing prediction')
+        outputs = qp.util.parallel(
+            __delayed_prediction,
+            ((quantifier, aggregate, sample_X, sample_p) for (sample_X, sample_p) in (pbar if verbose else protocol())),
+            seed=qp.environ.get('_R_SEED', None),
+            n_jobs=n_jobs
+        )
+        true_prevs, estim_prevs = list(zip(*outputs))

    true_prevs = np.asarray(true_prevs)
    estim_prevs = np.asarray(estim_prevs)
@ -82,7 +118,7 @@ def evaluation_report(model: BaseQuantifier,
                      protocol: AbstractProtocol,
                      error_metrics: Iterable[Union[str,Callable]] = 'mae',
                      aggr_speedup: Union[str, bool] = 'auto',
-                      verbose=False):
+                      verbose=False, verbose_error=None, n_jobs=1):
    """
    Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according
    to a specific protocol and in terms of one or more evaluation metrics (errors).
@ -100,12 +136,19 @@ def evaluation_report(model: BaseQuantifier,
        in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is
        convenient or not. Set to False to deactivate.
    :param verbose: boolean, show or not information in stdout
+    :param verbose_error: an evaluation function to be used to display intermediate results if verbose=True (default None)
+    :param n_jobs: number of parallel workers. Default is 1 so that, if not explicitly requested, the evaluation phase
+        is to be carried out in a single core. That is to say, this parameter will not inspect the environment variable
+        N_JOBS by default. This might be convenient in many situations, since parallelizing the evaluation entails
+        adding an overhead for cloning the objects within different threads that is often not worth the effort.
    :return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample),
        'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics
        have been indicated, each displaying the score in terms of that metric for every sample.
    """

-    true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose)
+    true_prevs, estim_prevs = prediction(
+        model, protocol, aggr_speedup=aggr_speedup, verbose=verbose, verbose_error=verbose_error, n_jobs=n_jobs
+    )
    return _prevalence_report(true_prevs, estim_prevs, error_metrics)


--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -90,11 +90,32 @@ def TopsoeDistance(P, Q, epsilon=1e-20):

    :param P: real-valued array-like of shape `(k,)` representing a discrete distribution
    :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution
+    :param epsilon: small value to smooth the distributions for numerical stability
    :return: float
    """
    return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon)))
                  

+def CauchySchwarz(P, Q, epsilon=1e-20):
+    """
+    Cauchy-Schwarz divergence between two (discretized) distributions `P` and `Q`.
+    The Cauchy-Schwarz divergence for two discrete distributions of `k` bins is defined as:
+
+    .. math::
+        CS(P,Q) = \\frac{ \\sum_{i=1}^k  p_i q_i }{
+            \\left( \\sum_{i=1}^k  p^2_i \\right) \\left( \\sum_{i=1}^k  q^2_i \\right)
+        }
+
+    :param P: real-valued array-like of shape `(k,)` representing a discrete distribution
+    :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution
+    :param epsilon: small value to smooth the distributions for numerical stability
+    :return: float
+    """
+    P += epsilon
+    Q += epsilon
+    return - np.log(sum(P * Q) / np.sqrt(sum(P ** 2) * sum(Q ** 2)))
+
+
 def uniform_prevalence_sampling(n_classes, size=1):
    """
    Implements the `Kraemer algorithm <http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf>`_
@ -276,3 +297,24 @@ def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08):
        return False
    return True

+
+def optim_minimize(loss, n_classes):
+    """
+    Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex
+    that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy's
+    SLSQP routine.
+
+    :param loss: (callable) the function to minimize
+    :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector
+    :return: (ndarray) the best prevalence vector found
+    """
+    from scipy import optimize
+
+    # the initial point is set as the uniform distribution
+    uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+    # solutions are bounded to those contained in the unit-simplex
+    bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
+    constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+    r = optimize.minimize(loss, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+    return r.x
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -604,10 +604,17 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):

 def _get_divergence(divergence: Union[str, Callable]):
    if isinstance(divergence, str):
-        if divergence=='HD':
+        divergence = divergence.lower()
+        if divergence=='hd':
            return F.HellingerDistance
        elif divergence=='topsoe':
            return F.TopsoeDistance
+        elif divergence.lower()=='cs':
+            return F.CauchySchwarz
+        elif divergence.lower()=='l2':
+            return lambda a,b: np.linalg.norm(a-b)
+        elif divergence.lower()=='l1':
+            return lambda a,b: np.linalg.norm(a-b, ord=1)
        else:
            raise ValueError(f'unknown divergence {divergence}')
    elif callable(divergence):
@ -770,7 +777,9 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
        """
        Trains the classifier (if requested) and generates the validation distributions out of the training data.
        The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
-        channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
+        channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
+        as classes, although in the binary case one can use only one channel, since the other one is constrained),
+        and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
        are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
        distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
        is the fraction of instances with a value in the `k`-th bin.
@ -819,7 +828,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))

        # solutions are bounded to those contained in the unit-simplex
-        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
+        bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        return r.x
@ -1085,7 +1094,7 @@ class T50(ThresholdOptimization):
    Threshold Optimization variant for :class:`ACC` as proposed by
    `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and
    `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks
-    for the threshold that makes `tpr` cosest to 0.5.
+    for the threshold that makes `tpr` closest to 0.5.
    The goal is to bring improved stability to the denominator of the adjustment.

    :param classifier: a sklearn's Estimator that generates a classifier
@ -1173,7 +1182,7 @@ class MS(ThresholdOptimization):
        super().__init__(classifier, val_split)

    def _condition(self, tpr, fpr) -> float:
-        pass
+        return True

    def _optimize_threshold(self, y, probabilities):
        tprs = []
@ -1184,9 +1193,26 @@ class MS(ThresholdOptimization):
            TP, FP, FN, TN = self._compute_table(y, y_)
            tpr = self._compute_tpr(TP, FP)
            fpr = self._compute_fpr(FP, TN)
-            tprs.append(tpr)
-            fprs.append(fpr)
-        return np.median(tprs), np.median(fprs)
+            if self._condition(tpr, fpr):
+                tprs.append(tpr)
+                fprs.append(fpr)
+        return tprs, fprs
+
+    def aggregate(self, classif_predictions):
+        prevs_estim = self.cc.aggregate(classif_predictions)
+
+        positive_prevs = []
+        for tpr, fpr in zip(self.tpr, self.fpr):
+            if tpr - fpr > 0:
+                acc = np.clip((prevs_estim[1] - fpr) / (tpr - fpr), 0, 1)
+                positive_prevs.append(acc)
+
+        if len(positive_prevs) > 0:
+            adjusted_positive_prev = np.median(positive_prevs)
+            adjusted_prevs_estim = np.array((1 - adjusted_positive_prev, adjusted_positive_prev))
+            return adjusted_prevs_estim
+        else:
+            return prevs_estim


 class MS2(MS):
@ -1209,19 +1235,9 @@ class MS2(MS):
    def __init__(self, classifier: BaseEstimator, val_split=0.4):
        super().__init__(classifier, val_split)

-    def _optimize_threshold(self, y, probabilities):
-        tprs = [0, 1]
-        fprs = [0, 1]
-        candidate_thresholds = np.unique(probabilities[:, 1])
-        for candidate_threshold in candidate_thresholds:
-            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
-            TP, FP, FN, TN = self._compute_table(y, y_)
-            tpr = self._compute_tpr(TP, FP)
-            fpr = self._compute_fpr(FP, TN)
-            if (tpr - fpr) > 0.25:
-                tprs.append(tpr)
-                fprs.append(fpr)
-        return np.median(tprs), np.median(fprs)
+    def _condition(self, tpr, fpr) -> float:
+        return (tpr - fpr) > 0.25
+


 ClassifyAndCount = CC
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@ -32,7 +32,7 @@ class QuaNetTrainer(BaseQuantifier):
    >>> qp.domains.preprocessing.index(dataset, min_df=5, inplace=True)
    >>>
    >>> # the text classifier is a CNN trained by NeuralClassifierTrainer
-    >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
+    >>> cnn = CNNnet(dataset.vocabulary_size, dataset.arange_classes)
    >>> classifier = NeuralClassifierTrainer(cnn, device='cuda')
    >>>
    >>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -54,6 +54,7 @@ class GridSearchQ(BaseQuantifier):
        self.__check_error(error)
        assert isinstance(protocol, AbstractProtocol), 'unknown protocol'

+
    def _sout(self, msg):
        if self.verbose:
            print(f'[{self.__class__.__name__}:{self.model.__class__.__name__}]: {msg}')
@ -88,7 +89,7 @@ class GridSearchQ(BaseQuantifier):

        hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)]
        self._sout(f'starting model selection with {self.n_jobs =}')
-        #pass a seed to parallel so it is set in clild processes
+        # pass a seed to parallel so it is set in clild processes
        scores = qp.util.parallel(
            self._delayed_eval,
            ((params, training) for params in hyper),
@ -223,7 +224,7 @@ def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfol
    for train, test in data.kFCV(nfolds=nfolds, random_state=random_state):
        quantifier.fit(train)
        fold_prev = quantifier.quantify(test.X)
-        rel_size = len(test.X)/len(data)
+        rel_size = len(test)/len(data)
        total_prev += fold_prev*rel_size

    return total_prev
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -11,7 +11,7 @@ import quapy as qp

 plt.rcParams['figure.figsize'] = [10, 6]
 plt.rcParams['figure.dpi'] = 200
-plt.rcParams['font.size'] = 18
+plt.rcParams['font.size'] = 12


 def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True,
@ -259,7 +259,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
    data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)

    if method_order is None:
-        method_order = method_names
+        method_order = np.unique(method_names)

    _set_colors(ax, n_methods=len(method_order))

@ -330,8 +330,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
    
    if show_legend:
        fig.legend(loc='lower center',
-                  bbox_to_anchor=(1, 0.5),
-                  ncol=(len(method_names)+1)//2)
+                  bbox_to_anchor=(0.9, 0.2),
+                  ncol=1)
      
    _save_or_show(savepath)
Author	SHA1	Message	Date
Alejandro Moreo Fernandez	29e23b5bf6	cleaning gitginore	2023-12-06 17:05:12 +01:00
Alejandro Moreo Fernandez	b823745dd1	cleaning project	2023-12-06 16:55:06 +01:00
Alejandro Moreo Fernandez	b7b052ef7d	checking stat significance	2023-11-28 10:20:54 +01:00
Alejandro Moreo Fernandez	01ef81bf25	sampling better the KDEy-HD approach	2023-11-23 11:30:45 +01:00
Alejandro Moreo Fernandez	dcfd61ae5b	launching new KDEy-HD with importance sampling	2023-11-22 12:27:15 +01:00
Alejandro Moreo Fernandez	ac88dc8961	understanding importance sampling	2023-11-21 10:26:13 +01:00
Alejandro Moreo Fernandez	61399d66f6	scripts for understanding the divergence approximations	2023-11-20 15:28:56 +01:00
Alejandro Moreo Fernandez	d060ffa591	Merge branch 'lab' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into lab	2023-11-09 18:15:33 +01:00
Alejandro Moreo Fernandez	949bd3cca7	gitignore update	2023-11-07 17:33:54 +01:00
Alejandro Moreo Fernandez	c0f9a50a14	plotting simplex and 3d-histograms	2023-11-07 17:28:32 +01:00
Alejandro Moreo Fernandez	50d0ed2e84	switching	2023-11-03 09:54:36 +01:00
Alejandro Moreo Fernandez	0f4008e18d	switching to devel	2023-10-30 09:41:52 +01:00
Alejandro Moreo Fernandez	3243fd90f8	running final experiments, one dedicated DM for each divergence with fine-grained exploration of nbins	2023-10-23 11:32:35 +02:00
Alejandro Moreo Fernandez	f08885dca3	renaming dataset labels as numeric values w/o gaps	2023-10-18 11:42:52 +02:00
Alejandro Moreo Fernandez	cfd5b00b97	renaming binary -> ucibinary	2023-10-18 10:55:56 +02:00
Alejandro Moreo Fernandez	53b052edc7	add uci multiclass datasets	2023-10-18 10:52:04 +02:00
Alejandro Moreo Fernandez	1c258a2000	adding multiclass experiments	2023-10-18 10:48:50 +02:00
Alejandro Moreo Fernandez	dca955819c	everything working; need to clean prints though	2023-10-17 18:11:25 +02:00
Alejandro Moreo Fernandez	7593fde2e0	KDE with closed form working	2023-10-13 17:34:26 +02:00
Alejandro Moreo Fernandez	32d6aa58f6	understanding montecarlo sampling	2023-10-02 17:50:12 +02:00
Alejandro Moreo Fernandez	56dbe744df	cleaning and refactoring, also trying to repair the montecarlo approximation	2023-09-28 17:47:39 +02:00
Alejandro Moreo Fernandez	9221b3f775	binary experiments	2023-09-11 12:48:32 +02:00
Alejandro Moreo Fernandez	7e2237abe8	stability	2023-09-05 16:57:43 +02:00
Alejandro Moreo Fernandez	2d6ac4af0d	testing the sensibility of KDEy to the bandwidth	2023-09-04 12:05:25 +02:00
Alejandro Moreo Fernandez	b1715b685c	Merge branch 'lab' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into lab	2023-08-01 19:04:36 +02:00
Alejandro Moreo Fernandez	f72a612011	everything working, last results	2023-08-01 19:04:23 +02:00
Alejandro Moreo Fernandez	fe819a7a3c	plots	2023-07-31 09:03:41 +02:00
Alejandro Moreo Fernandez	96758834f3	kde working with kfcv	2023-07-24 16:28:21 +02:00
Alejandro Moreo Fernandez	0ce6106ee1	simplex plot	2023-07-21 12:56:01 +02:00
Alejandro Moreo Fernandez	d995990fba	initial experiments and DIR method	2023-07-21 11:41:16 +02:00
Alejandro Moreo Fernandez	26de9d92eb	distribution matching multiclass	2023-07-20 09:03:22 +02:00
Alejandro Moreo Fernandez	4a7ffe5b50	testing kde-y	2023-05-22 17:34:00 +02:00
Alejandro Moreo Fernandez	ca25f1d601	lab + some experimental methods based on distribution matching	2023-05-04 17:26:17 +02:00