diff --git a/NewMethods/fgsld/__init__.py b/NewMethods/fgsld/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/NewMethods/fgsld/fglsd_test.py b/NewMethods/fgsld/fglsd_test.py
index d02f07a..c8a098c 100644
--- a/NewMethods/fgsld/fglsd_test.py
+++ b/NewMethods/fgsld/fglsd_test.py
@@ -25,13 +25,13 @@ method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
 for model, model_name in [
     (CC(cls), 'CC'),
 #    (FakeFGLSD(cls, nbins=20, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-20'),
-    (FakeFGLSD(cls, nbins=11, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-11'),
+    #(FakeFGLSD(cls, nbins=11, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-11'),
     #(FakeFGLSD(cls, nbins=8, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-8'),
     #(FakeFGLSD(cls, nbins=6, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-6'),
     (FakeFGLSD(cls, nbins=5, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-5'),
     #(FakeFGLSD(cls, nbins=4, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-4'),
-    (FakeFGLSD(cls, nbins=3, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-3'),
-#    (FakeFGLSD(cls, nbins=1, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-1'),
+    #(FakeFGLSD(cls, nbins=3, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-3'),
+    (FakeFGLSD(cls, nbins=1, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-1'),
 #    (FakeFGLSD(cls, nbins=3, isomerous=False, recompute_bins=False), 'FGSLD-isometric-sta-3'),
     (EMQ(cls), 'SLD'),
 ]:
diff --git a/NewMethods/fgsld/plot_fglsd.png b/NewMethods/fgsld/plot_fglsd.png
index a48f09f..bc0de3f 100644
Binary files a/NewMethods/fgsld/plot_fglsd.png and b/NewMethods/fgsld/plot_fglsd.png differ
diff --git a/NewMethods/methods.py b/NewMethods/methods.py
deleted file mode 100644
index b47927d..0000000
--- a/NewMethods/methods.py
+++ /dev/null
@@ -1,174 +0,0 @@
-import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.decomposition import PCA
-from sklearn.preprocessing import StandardScaler
-
-import quapy as qp
-from typing import Union
-
-from quapy.data import LabelledCollection
-from quapy.method.base import BaseQuantifier, BinaryQuantifier
-from quapy.method.aggregative import PACC, EMQ, HDy
-import quapy.functional as F
-from tqdm import tqdm
-from scipy.sparse import issparse, csr_matrix
-import scipy
-
-
-class PACCSLD(PACC):
-    """
-    This method combines the EMQ improved posterior probabilities with PACC.
-    Note: the posterior probabilities are re-calibrated with EMQ only during prediction, and not also during fit since,
-    for PACC, the validation split is known to have the same prevalence as the training set (this is because the split
-    is stratified) and thus the posterior probabilities should not be re-calibrated for a different prior (it actually
-    happens to degrades performance).
-    """
-
-    def fit(self, data: qp.data.LabelledCollection, fit_learner=True, val_split:Union[float, int, qp.data.LabelledCollection]=0.4):
-        self.train_prevalence = F.prevalence_from_labels(data.labels, data.n_classes)
-        return super(PACCSLD, self).fit(data, fit_learner, val_split)
-
-    def aggregate(self, classif_posteriors):
-        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon=1e-4)
-        return super(PACCSLD, self).aggregate(posteriors)
-
-
-class HDySLD(HDy):
-    """
-        This method combines the EMQ improved posterior probabilities with HDy.
-        Note: [same as PACCSLD]
-        """
-    def fit(self, data: qp.data.LabelledCollection, fit_learner=True,
-            val_split: Union[float, int, qp.data.LabelledCollection] = 0.4):
-        self.train_prevalence = F.prevalence_from_labels(data.labels, data.n_classes)
-        return super(HDySLD, self).fit(data, fit_learner, val_split)
-
-    def aggregate(self, classif_posteriors):
-        priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon=1e-4)
-        return super(HDySLD, self).aggregate(posteriors)
-
-
-
-class AveragePoolQuantification(BinaryQuantifier):
-    def __init__(self, learner, sample_size, trials, n_components=-1, zscore=False):
-        self.learner = learner
-        self.sample_size = sample_size
-        self.trials = trials
-
-        self.do_zscore = zscore
-        self.zscore = StandardScaler() if self.do_zscore else None
-
-        self.do_pca = n_components>0
-        self.pca = PCA(n_components) if self.do_pca else None
-
-    def fit(self, data: LabelledCollection):
-        training, validation = data.split_stratified(train_prop=0.7)
-
-        X, y = [], []
-
-        nprevpoints = F.get_nprevpoints_approximation(self.trials, data.n_classes)
-        for sample in tqdm(
-                training.artificial_sampling_generator(self.sample_size, n_prevalences=nprevpoints, repeats=1),
-                desc='generating averages'
-        ):
-            X.append(sample.instances.mean(axis=0))
-            y.append(sample.prevalence()[1])
-        while len(X) < self.trials:
-            sample = training.sampling(self.sample_size, F.uniform_simplex_sampling(data.n_classes))
-            X.append(sample.instances.mean(axis=0))
-            y.append(sample.prevalence())
-        X = np.asarray(np.vstack(X))
-        y = np.asarray(y)
-
-        if self.do_pca:
-            X = self.pca.fit_transform(X)
-            print(X.shape)
-
-        if self.do_zscore:
-            X = self.zscore.fit_transform(X)
-
-        print('training regressor...')
-        self.regressor = self.learner.fit(X, y)
-
-        # correction at 0:
-        print('getting corrections...')
-        X0 = np.asarray(np.vstack([validation.sampling(self.sample_size, 0., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
-        X1 = np.asarray(np.vstack([validation.sampling(self.sample_size, 1., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
-
-        if self.do_pca:
-            X0 = self.pca.transform(X0)
-            X1 = self.pca.transform(X1)
-
-        if self.do_zscore:
-            X0 = self.zscore.transform(X0)
-            X1 = self.zscore.transform(X1)
-
-        self.correction_0 = self.regressor.predict(X0).mean()
-        self.correction_1 = self.regressor.predict(X1).mean()
-
-        print('correction-0', self.correction_0)
-        print('correction-1', self.correction_1)
-        print('done')
-
-    def quantify(self, instances):
-        ave = np.asarray(instances.mean(axis=0))
-
-        if self.do_pca:
-            ave = self.pca.transform(ave)
-        if self.do_zscore:
-            ave = self.zscore.transform(ave)
-        phat = self.regressor.predict(ave).item()
-        phat = np.clip((phat-self.correction_0)/(self.correction_1-self.correction_0), 0, 1)
-        return np.asarray([1-phat, phat])
-
-    def set_params(self, **parameters):
-        self.learner.set_params(**parameters)
-
-    def get_params(self, deep=True):
-        return self.learner.get_params(deep=deep)
-
-
-class WinnowOrthogonal(BaseEstimator):
-
-    def __init__(self):
-        pass
-
-    def fit(self, X, y):
-        self.classes_ = np.asarray(sorted(np.unique(y)))
-        w1 = np.asarray(X[y == 0].mean(axis=0)).flatten()
-        w2 = np.asarray(X[y == 1].mean(axis=0)).flatten()
-        diff = w2 - w1
-        orth = np.ones_like(diff)
-        orth[0] = -diff[1:].sum() / diff[0]
-        orth /= np.linalg.norm(orth)
-        self.w = orth
-        self.b = w1.dot(orth)
-        return self
-
-    def decision_function(self, X):
-        if issparse(X):
-            Z = X.dot(csr_matrix(self.w).T).toarray().flatten()
-            return Z - self.b
-        else:
-            return np.matmul(X, self.w) - self.b
-
-    def predict(self, X):
-        return 1 * (self.decision_function(X) > 0)
-
-    def split(self, X, y):
-        s = self.predict(X)
-        X0a = X[np.logical_and(y == 0, s == 0)]
-        X0b = X[np.logical_and(y == 0, s == 1)]
-        X1a = X[np.logical_and(y == 1, s == 0)]
-        X1b = X[np.logical_and(y == 1, s == 1)]
-        y0a = np.zeros(X0a.shape[0], dtype=np.int)
-        y0b = np.zeros(X0b.shape[0], dtype=np.int)
-        y1a = np.ones(X1a.shape[0], dtype=np.int)
-        y1b = np.ones(X1b.shape[0], dtype=np.int)
-        return X0a, X0b, X1a, X1b, y0a, y0b, y1a, y1b
-
-    def get_params(self):
-        return {}
-
-    def set_params(self, **params):
-        pass
diff --git a/TODO.txt b/TODO.txt
index 5baa937..6ff9e9c 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,16 +1,55 @@
+Packaging:
+==========================================
 Documentation with sphinx
+Document methods with paper references
+allow for "pip install"
+unit-tests
+
+New features:
+==========================================
 Add NAE, NRAE
 Add "measures for evaluating ordinal"?
-Document methods with paper references
+Add datasets for topic.
+Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?
+
+Current issues:
+==========================================
 In binary quantification (hp, kindle, imdb) we used F1 in the minority class (which in kindle and hp happens to be the
 negative class). This is not covered in this new implementation, in which the binary case is not treated as such, but as
 an instance of single-label with 2 labels. Check
-Add classnames to LabelledCollection ?
+Add classnames to LabelledCollection? This should improve visualization of reports
 Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps)
-Add datasets for topic.
+OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
+Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
+
+Improvements:
+==========================================
 Clarify whether QuaNet is an aggregative method or not.
 Explore the hyperparameter "number of bins" in HDy
 Rename EMQ to SLD ?
+Parallelize the kFCV in ACC and PACC?
+Parallelize model selection trainings
+We might want to think of (improving and) adding the class Tabular (it is defined and used on branch tweetsent). A more
+    recent version is in the project ql4facct. This class is meant to generate latex tables from results (highligting
+    best results, computing statistical tests, colouring cells, producing rankings, producing averages, etc.). Trying
+    to generate tables is typically a bad idea, but in this specific case we do have pretty good control of what an
+    experiment looks like. (Do we want to abstract experimental results? this could be useful not only for tables but
+    also for plots).
+
+Checks:
+==========================================
 How many times is the system of equations for ACC and PACC not solved? How many times is it clipped? Do they sum up
     to one always?
-Parallelize the kFCV in ACC and PACC
+Re-check how hyperparameters from the quantifier and hyperparameters from the classifier (in aggregative quantifiers)
+    is handled. In scikit-learn the hyperparameters from a wrapper method are indicated directly whereas the hyperparams
+    from the internal learner are prefixed with "estimator__". In QuaPy, combinations having to do with the classifier
+    can be computed at the begining, and then in an internal loop the hyperparams of the quantifier can be explored,
+    passing fit_learner=False.
+Re-check Ensembles. As for now, they are strongly tied to aggregative quantifiers.
+Re-think the environment variables. Maybe add new ones (like, for example, parameters for the plots)
+Do we want to wrap prevalences (currently simple np.ndarray) as a class? This might be convenient for some interfaces
+    (e.g., for specifying artificial prevalences in samplings, for printing them -- currently supported through
+    F.strprev(), etc.). This might however add some overload, and prevent/difficult post processing with numpy.
+Would be nice to get a better integration with sklearn.
+
+
diff --git a/plot_example.py b/plot_example.py
deleted file mode 100644
index 346455e..0000000
--- a/plot_example.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from sklearn.model_selection import GridSearchCV
-import numpy as np
-import quapy as qp
-from sklearn.linear_model import LogisticRegression
-
-sample_size = 500
-qp.environ['SAMPLE_SIZE'] = sample_size
-
-
-
-def gen_data():
-
-    data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
-
-    models = [
-        qp.method.aggregative.CC,
-        qp.method.aggregative.ACC,
-        qp.method.aggregative.PCC,
-        qp.method.aggregative.PACC,
-        qp.method.aggregative.HDy,
-        qp.method.aggregative.EMQ,
-        qp.method.meta.ECC,
-        qp.method.meta.EACC,
-        qp.method.meta.EHDy,
-    ]
-
-    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
-    for Quantifier in models:
-        print(f'training {Quantifier.__name__}')
-        lr = LogisticRegression(max_iter=1000, class_weight='balanced')
-        # lr = GridSearchCV(lr, param_grid={'C':np.logspace(-3,3,7)}, n_jobs=-1)
-        model = Quantifier(lr).fit(data.training)
-        true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
-            model, data.test, sample_size, n_repetitions=20, n_prevpoints=11)
-
-        method_names.append(Quantifier.__name__)
-        true_prevs.append(true_prev)
-        estim_prevs.append(estim_prev)
-        tr_prevs.append(data.training.prevalence())
-
-    return method_names, true_prevs, estim_prevs, tr_prevs
-
-method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('./plots/plot_data.pkl', gen_data)
-
-qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=11, savepath='./plots/err_drift.png')
-qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots/bin_diag.png')
-qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
-qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=11, savepath='./plots/bin_bias_bin.png')
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 15a3921..79d0bbf 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -10,7 +10,7 @@ from sklearn.model_selection import StratifiedKFold
 
 import pandas as pd
 
-from data.base import Dataset, LabelledCollection
+from quapy.data.base import Dataset, LabelledCollection
 from quapy.data.preprocessing import text2tfidf, reduce_columns
 from quapy.data.reader import *
 from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index f3e9b18..feeb14d 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -5,7 +5,7 @@ from typing import Union, Callable
 
 import quapy as qp
 import quapy.functional as F
-from data.base import LabelledCollection
+from quapy.data.base import LabelledCollection
 from quapy.evaluation import artificial_sampling_prediction
 from quapy.method.aggregative import BaseQuantifier
 
diff --git a/quapy/plot.py b/quapy/plot.py
index 0f5a0aa..ff93237 100644
--- a/quapy/plot.py
+++ b/quapy/plot.py
@@ -83,21 +83,21 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
     binwidth = 1/nbins
     data = {}
     for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
-        true_prev = true_prev[:,pos_class]
-        estim_prev = estim_prev[:,pos_class]
+        true_prev = true_prev[:, pos_class]
+        estim_prev = estim_prev[:, pos_class]
 
         data[method] = []
-        inds = np.digitize(true_prev, bins, right=True)
+        inds = np.digitize(true_prev, bins[1:], right=True)
         for ind in range(len(bins)):
             selected = inds==ind
             data[method].append(estim_prev[selected] - true_prev[selected])
 
     nmethods = len(method_names)
     boxwidth = binwidth/(nmethods+4)
-    for i,bin in enumerate(bins[:-1]):
+    for i,bin in enumerate(bins):
         boxdata = [data[method][i] for method in method_names]
         positions = [bin+(i*boxwidth)+2*boxwidth for i,_ in enumerate(method_names)]
-        box = boxplot(boxdata, showmeans=False, positions=positions, widths = boxwidth, sym='+', patch_artist=True)
+        box = boxplot(boxdata, showmeans=False, positions=positions, widths=boxwidth, sym='+', patch_artist=True)
         for boxid in range(len(method_names)):
             c = colormap.colors[boxid%len(colormap.colors)]
             setp(box['fliers'][boxid], color=c, marker='+', markersize=3., markeredgecolor=c)
@@ -110,7 +110,7 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
         major_xticks_positions.append(b)
         minor_xticks_positions.append(b + binwidth / 2)
         major_xticks_labels.append('')
-        minor_xticks_labels.append(f'[{bins[i]:.2f}-{bins[i + 1]:.2f})')
+        minor_xticks_labels.append(f'[{bins[i]:.2f}-{bins[i + 1]:.2f}' + (')' if i < len(bins)-2 else ']'))
     ax.set_xticks(major_xticks_positions)
     ax.set_xticks(minor_xticks_positions, minor=True)
     ax.set_xticklabels(major_xticks_labels)
diff --git a/test.py b/test.py
deleted file mode 100644
index 3d664b3..0000000
--- a/test.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import LinearSVC, LinearSVR
-import quapy as qp
-import quapy.functional as F
-import sys
-import numpy as np
-
-from NewMethods.methods import AveragePoolQuantification
-from classification.methods import PCALR
-from data import Dataset
-from method.meta import EPACC
-from quapy.model_selection import GridSearchQ
-from tqdm import tqdm
-import pandas as pd
-
-sample_size=100
-qp.environ['SAMPLE_SIZE'] = sample_size
-
-np.random.seed(0)
-
-nfolds=5
-nrepeats=1
-
-df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
-for datasetname in qp.datasets.UCI_DATASETS:
-    collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
-    scores = []
-    pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)
-    for data in pbar:
-        pbar.set_description(f'{data.name}')
-        # learner = GridSearchCV(LogisticRegression(class_weight='balanced'), param_grid={'C': np.logspace(-3,3,7)}, n_jobs=-1)
-        learner = LogisticRegression(class_weight='balanced')
-        # model = qp.method.aggregative.CC(learner)
-        model = qp.method.meta.EHDy(learner, size=30, red_size=15, verbose=False)
-        model.fit(data.training)
-        err = qp.evaluation.artificial_sampling_eval(model, data.test, sample_size, n_prevpoints=101, n_jobs=-1,
-                                                     error_metric='mse', verbose=False)
-        scores.append(err)
-
-    score = np.mean(scores)
-    df = df.append({
-        'dataset': datasetname,
-        'method': model.__class__.__name__,
-        'mse': score
-    }, ignore_index=True)
-    print(df)
-
-sys.exit(0)
-
-
-
-#param_grid = {'C': np.logspace(-3,3,7), 'class_weight': ['balanced', None]}
-param_grid = {'C': np.logspace(0,3,4), 'class_weight': ['balanced']}
-max_evaluations = 500
-
-sample_size = qp.environ['SAMPLE_SIZE']
-binary = False
-svmperf_home = './svm_perf_quantification'
-
-if binary:
-    #dataset = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
-    dataset = qp.datasets.fetch_UCIDataset('german', verbose=True)
-    #qp.data.preprocessing.index(dataset, inplace=True)
-
-else:
-    dataset = qp.datasets.fetch_twitter('gasp', for_model_selection=True, min_df=5, pickle=True)
-    #dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
-
-print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
-
-
-# training a quantifier
-# learner = LogisticRegression(max_iter=1000)
-#model = qp.method.aggregative.ClassifyAndCount(learner)
-# model = qp.method.aggregative.AdjustedClassifyAndCount(learner)
-# model = qp.method.aggregative.ProbabilisticClassifyAndCount(learner)
-# model = qp.method.aggregative.ProbabilisticAdjustedClassifyAndCount(learner)
-# model = qp.method.aggregative.HellingerDistanceY(learner)
-# model = qp.method.aggregative.ExpectationMaximizationQuantifier(learner)
-# model = qp.method.aggregative.ExplicitLossMinimisationBinary(svmperf_home, loss='q', C=100)
-# model = qp.method.aggregative.SVMQ(svmperf_home, C=1)
-
-#learner = PCALR()
-#learner = NeuralClassifierTrainer(CNNnet(dataset.vocabulary_size, dataset.n_classes))
-#print(learner.get_params())
-#model = qp.method.meta.QuaNet(learner, sample_size, device='cpu')
-
-#learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
-#learner = LogisticRegression(max_iter=1000)
-# model = qp.method.aggregative.ClassifyAndCount(learner)
-
-param_mod_sel = {
-    'sample_size': 100,
-    'n_prevpoints': 21,
-    'n_repetitions': 5,
-    'verbose': False
-}
-common = {
-    'max_sample_size': 50,
-    'n_jobs': -1,
-    'param_grid': {'C': np.logspace(0,2,2), 'class_weight': ['balanced']},
-    'param_mod_sel': param_mod_sel,
-    'val_split': 0.4,
-    'min_pos': 10,
-    'size':6,
-    'red_size':3
-}
-
-# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
-# will be skipped (by setting hyperparameters to None)
-model = EPACC(LogisticRegression(max_iter=100), optim='mrae', policy='mrae', **common)
-
-"""    
-Problemas:
-- La interfaz es muy fea, hay que conocer practicamente todos los detalles así que no ahorra nada con respecto a crear
-    un objeto con otros anidados dentro
-- El fit genera las prevalences random, y esto hace que despues de la model selection, un nuevo fit tire todo el trabajo
-    hecho.
-- El fit de un GridSearcQ tiene dentro un best_estimator, pero después de la model selection, hacer fit otra vez sobre
-    este objeto no se limita a re-entrenar el modelo con los mejores parámetros, sino que inicia una nueva búsqueda 
-    en modo grid search.
-- Posible solución (no vale): sería hacer directamente model selection con el benchmark final, aunque esto haría que los hyper-
-    parámetros se buscasen en un conjunto diferente del resto de models....
-- Posible solución: 
-    - Elegir las prevalences en init
-    - 
-- Problema: el parámetro val_split es muy ambiguo en todo el framework. Por ejemplo, en EPACC podría ser un float que,
-    en el caso de un GridSearchQ podría referir al split de validación para los hyperparámetros o al split que usa PACC
-    para encontrar los parámetros...
-"""
-
-# regressor = LinearSVR(max_iter=10000)
-# param_grid = {'C': np.logspace(-1,3,5)}
-# model = AveragePoolQuantification(regressor, sample_size, trials=5000, n_components=500, zscore=False)
-
-# model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
-#                           sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
-#model = qp.method.aggregative.ClassifyAndCount(learner)
-
-# model = qp.method.meta.QuaNet(PCALR(n_components=100, max_iter=1000),
-#                                sample_size=100,
-#                                patience=10,
-#                                tr_iter_per_poch=500, va_iter_per_poch=100, #lstm_nlayers=2, lstm_hidden_size=64,
-#                                ff_layers=[500, 250, 50],
-                               # checkpointdir='./checkpoint', device='cuda')
-
-if qp.isbinary(model) and not qp.isbinary(dataset):
-    model = qp.method.aggregative.OneVsAll(model)
-
-
-# Model fit and Evaluation on the test data
-# ----------------------------------------------------------------------------
-
-print(f'fitting model {model.__class__.__name__}')
-#train, val = dataset.training.split_stratified(0.6)
-#model.fit(train, val_split=val)
-qp.SAMPLE=1
-qp.environ['SAMPLE_SIZE']=2
-model.fit(dataset.training)
-
-
-
-
-
-# estimating class prevalences
-# print('quantifying')
-# prevalences_estim = model.quantify(dataset.test.instances)
-# prevalences_true  = dataset.test.prevalence()
-#
-# evaluation (one single prediction)
-# error = qp.error.mae(prevalences_true, prevalences_estim)
-#
-# print(f'Evaluation in test (1 eval)')
-# print(f'true prevalence {F.strprev(prevalences_true)}')
-# print(f'estim prevalence {F.strprev(prevalences_estim)}')
-# print(f'mae={error:.3f}')
-
-
-# Model fit and Evaluation according to the artificial sampling protocol
-# ----------------------------------------------------------------------------
-
-n_prevpoints = F.get_nprevpoints_approximation(combinations_budget=max_evaluations, n_classes=dataset.n_classes)
-n_evaluations = F.num_prevalence_combinations(n_prevpoints, dataset.n_classes)
-print(f'the prevalence interval [0,1] will be split in {n_prevpoints} prevalence points for each class, so that\n'
-      f'the requested maximum number of sample evaluations ({max_evaluations}) is not exceeded.\n'
-      f'For the {dataset.n_classes} classes this dataset has, this will yield a total of {n_evaluations} evaluations.')
-
-true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(model, dataset.test, sample_size, n_prevpoints)
-
-#qp.error.SAMPLE_SIZE = sample_size
-print(f'Evaluation according to the artificial sampling protocol ({len(true_prev)} evals)')
-for error in qp.error.QUANTIFICATION_ERROR:
-    score = error(true_prev, estim_prev)
-    print(f'{error.__name__}={score:.5f}')
-
-sys.exit(0)
-# Model selection and Evaluation according to the artificial sampling protocol
-# ----------------------------------------------------------------------------
-
-model_selection = GridSearchQ(model,
-                              param_grid=param_grid,
-                              sample_size=sample_size,
-                              eval_budget=max_evaluations//10,
-                              error='mae',
-                              refit=True,
-                              verbose=True,
-                              timeout=60*60)
-
-model = model_selection.fit(dataset.training, val_split=0.3)
-#model = model_selection.fit(train, validation=val)
-print(f'Model selection: best_params = {model_selection.best_params_}')
-print(f'param scores:')
-for params, score in model_selection.param_scores_.items():
-    print(f'\t{params}: {score:.5f}')
-
-true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(model, dataset.test, sample_size, n_prevpoints)
-
-print(f'After model selection: Evaluation according to the artificial sampling protocol ({len(true_prev)} evals)')
-for error in qp.error.QUANTIFICATION_ERROR:
-    score = error(true_prev, estim_prev)
-    print(f'{error.__name__}={score:.5f}')
\ No newline at end of file