diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index ced7dd9..34dbb14 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -1,6 +1,10 @@
 Change Log 0.1.8
 ----------------
 
+- Added Kernel Density Estimation methods (KDEyML, KDEyCS, KDEyHD) as proposed in the paper:
+    Moreo, A., González, P., & del Coz, J. J. Kernel Density Estimation for Multiclass Quantification.
+    arXiv preprint arXiv:2401.00490, 2024
+
 - Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
     of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
     does sometimes not exist. In cases like this, quapy < 0.1.8 resorted to CC for providing a plausible solution.
@@ -21,7 +25,7 @@ Change Log 0.1.8
         - classification datasets
         - Python API available
 
-- New IFCB (plankton) dataset added. See fetch_IFCB.
+- New IFCB (plankton) dataset added (thanks to Pablo González). See qp.datasets.fetch_IFCB.
 
 - Added new evaluation measures NAE, NRAE
 
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index 25248cd..3a98dd0 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -119,22 +119,18 @@ class MedianEstimator(BinaryQuantifier):
 
     def _delayed_fit_classifier(self, args):
         with qp.util.temp_seed(self.random_state):
-            print('enter job')
             cls_params, training = args
             model = deepcopy(self.base_quantifier)
             model.set_params(**cls_params)
             predictions = model.classifier_fit_predict(training, predict_on=model.val_split)
-            print('exit job')
             return (model, predictions)
 
     def _delayed_fit_aggregation(self, args):
         with qp.util.temp_seed(self.random_state):
-            print('\tenter job')
             ((model, predictions), q_params), training = args
             model = deepcopy(model)
             model.set_params(**q_params)
             model.aggregation_fit(predictions, training)
-            print('\texit job')
             return model
 
 
@@ -153,7 +149,6 @@ class MedianEstimator(BinaryQuantifier):
                     asarray=False
                 )
             else:
-                print('only 1')
                 model = self.base_quantifier
                 model.set_params(**cls_configs[0])
                 predictions = model.classifier_fit_predict(training, predict_on=model.val_split)
@@ -263,9 +258,10 @@ class Ensemble(BaseQuantifier):
             print('[Ensemble]' + msg)
 
     def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float] = None):
-        self._sout('Fit')
+
         if self.policy == 'ds' and not data.binary:
             raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary')
+
         if val_split is None:
             val_split = self.val_split
 
@@ -288,6 +284,7 @@ class Ensemble(BaseQuantifier):
         self.ensemble = qp.util.parallel(
             _delayed_new_instance,
             tqdm(args, desc='fitting ensamble', total=self.size) if self.verbose else args,
+            asarray=False,
             n_jobs=self.n_jobs)
 
         # static selection policy (the name of a quantification-oriented error function to minimize)
@@ -369,30 +366,31 @@ class Ensemble(BaseQuantifier):
 
     def _ds_policy_get_posteriors(self, data: LabelledCollection):
         """
-        In the original article, this procedure is not described in a sufficient level of detail. The paper only says
+        In the original article, there are some aspects regarding this method that are not mentioned. The paper says
         that the distribution of posterior probabilities from training and test examples is compared by means of the
         Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article,
         a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in
         general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the
         quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not
-        be correct to generate the posterior probabilities for training documents that have concurred in training the
+        be correct to generate the posterior probabilities for training instances that have concurred in training the
         classifier that generates them.
+
         This function thus generates the posterior probabilities for all training documents in a cross-validation way,
-        using a LR with hyperparameters that have previously been optimized via grid search in 5FCV.
-        :return P,f, where P is a ndarray containing the posterior probabilities of the training data, generated via
-        cross-validation and using an optimized LR, and the function to be used in order to generate posterior
-        probabilities for test instances.
+        using LR with hyperparameters that have previously been optimized via grid search in 5FCV.
+
+        :param data: a LabelledCollection
+        :return: (P,f,) where P is an ndarray containing the posterior probabilities of the training data, generated via
+            cross-validation and using an optimized LR, and the function to be used in order to generate posterior
+            probabilities for test instances.
         """
+
         X, y = data.Xy
         lr_base = LogisticRegression(class_weight='balanced', max_iter=1000)
 
-        optim = GridSearchCV(
-            lr_base, param_grid={'C': np.logspace(-4, 4, 9)}, cv=5, n_jobs=self.n_jobs, refit=True
-        ).fit(X, y)
+        param_grid = {'C': np.logspace(-4, 4, 9)}
+        optim = GridSearchCV(lr_base, param_grid=param_grid, cv=5, n_jobs=self.n_jobs, refit=True).fit(X, y)
 
-        posteriors = cross_val_predict(
-            optim.best_estimator_, X, y, cv=5, n_jobs=self.n_jobs, method='predict_proba'
-        )
+        posteriors = cross_val_predict(optim.best_estimator_, X, y, cv=5, n_jobs=self.n_jobs, method='predict_proba')
         posteriors_generator = optim.best_estimator_.predict_proba
 
         return posteriors, posteriors_generator
@@ -463,8 +461,10 @@ def _delayed_new_instance(args):
 
     tr_prevalence = sample.prevalence()
     tr_distribution = get_probability_distribution(posteriors[sample_index]) if (posteriors is not None) else None
+
     if verbose:
         print(f'\t\--fit-ended for prev {F.strprev(prev)}')
+
     return (model, tr_prevalence, tr_distribution, sample if keep_samples else None)
 
 
@@ -475,8 +475,9 @@ def _delayed_quantify(args):
 
 def _draw_simplex(ndim, min_val, max_trials=100):
     """
-    returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions
+    Returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions
     are >= min_class_prev (for min_val>0, this makes the sampling not truly uniform)
+
     :param ndim: number of dimensions of the simplex
     :param min_val: minimum class prevalence allowed. If less than 1/ndim a ValueError will be throw since
     there is no possible solution.
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index b0c2f7a..e90a502 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -1,8 +1,8 @@
 import pytest
 
 from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \
-    TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, LEQUA2022_TASKS, \
-    fetch_reviews, fetch_twitter, fetch_UCIDataset, fetch_lequa2022
+    TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, LEQUA2022_TASKS, UCI_MULTICLASS_DATASETS,\
+    fetch_reviews, fetch_twitter, fetch_UCIDataset, fetch_lequa2022, fetch_UCIMulticlassLabelledCollection
 
 
 @pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
@@ -44,6 +44,15 @@ def test_fetch_UCIDataset(dataset_name):
     print('Test set stats')
 
 
+@pytest.mark.parametrize('dataset_name', UCI_MULTICLASS_DATASETS)
+def test_fetch_UCIMultiDataset(dataset_name):
+    dataset = fetch_UCIMulticlassLabelledCollection(dataset_name)
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.stats()
+    print('Test set stats')
+
+
 @pytest.mark.parametrize('dataset_name', LEQUA2022_TASKS)
 def test_fetch_lequa2022(dataset_name):
     train, gen_val, gen_test = fetch_lequa2022(dataset_name)
diff --git a/quapy/tests/test_hierarchy.py b/quapy/tests/test_hierarchy.py
index b0842e5..33af5da 100644
--- a/quapy/tests/test_hierarchy.py
+++ b/quapy/tests/test_hierarchy.py
@@ -1,12 +1,8 @@
 import unittest
-
 from sklearn.linear_model import LogisticRegression
-
-import quapy as qp
 from quapy.method.aggregative import *
 
 
-
 class HierarchyTestCase(unittest.TestCase):
 
     def test_aggregative(self):
@@ -22,8 +18,10 @@ class HierarchyTestCase(unittest.TestCase):
     def test_probabilistic(self):
         lr = LogisticRegression()
         for m in [CC(lr), ACC(lr)]:
+            self.assertEqual(isinstance(m, AggregativeCrispQuantifier), True)
             self.assertEqual(isinstance(m, AggregativeSoftQuantifier), False)
         for m in [PCC(lr), PACC(lr)]:
+            self.assertEqual(isinstance(m, AggregativeCrispQuantifier), False)
             self.assertEqual(isinstance(m, AggregativeSoftQuantifier), True)
 
 
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index 43c5c99..2139e8d 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -67,15 +67,16 @@ def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method):
 @pytest.mark.parametrize('dataset', tinydatasets)
 @pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
 def test_ensemble_method(base_method, learner, dataset: Dataset, policy):
+
     qp.environ['SAMPLE_SIZE'] = 20
+
     base_quantifier=base_method(learner())
-    if isinstance(base_quantifier, BinaryQuantifier) and not dataset.binary:
-        print(f'skipping the test of binary model {base_quantifier} on non-binary dataset {dataset}')
-        return
+
     if not dataset.binary and policy=='ds':
         print(f'skipping the test of binary policy ds on non-binary dataset {dataset}')
         return
-    model = Ensemble(quantifier=base_quantifier, size=5, policy=policy, n_jobs=-1)
+
+    model = Ensemble(quantifier=base_quantifier, size=3, policy=policy, n_jobs=-1)
 
     model.fit(dataset.training)
 
@@ -97,9 +98,7 @@ def test_quanet_method():
     qp.environ['SAMPLE_SIZE'] = 100
 
     # load the kindle dataset as text, and convert words to numerical indexes
-    dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
-    dataset = Dataset(dataset.training.sampling(200, *dataset.training.prevalence()),
-                      dataset.test.sampling(200, *dataset.test.prevalence()))
+    dataset = qp.datasets.fetch_reviews('kindle', pickle=True).reduce(200, 200)
     qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
 
     from quapy.classification.neural import CNNnet
diff --git a/quapy/tests/test_replicability.py b/quapy/tests/test_replicability.py
index e89531a..871f519 100644
--- a/quapy/tests/test_replicability.py
+++ b/quapy/tests/test_replicability.py
@@ -3,11 +3,13 @@ import quapy as qp
 from quapy.data import LabelledCollection
 from quapy.functional import strprev
 from sklearn.linear_model import LogisticRegression
-
+import numpy as np
 from quapy.method.aggregative import PACC
+import quapy.functional as F
 
 
 class MyTestCase(unittest.TestCase):
+
     def test_prediction_replicability(self):
 
         dataset = qp.datasets.fetch_UCIDataset('yeast')
@@ -26,8 +28,8 @@ class MyTestCase(unittest.TestCase):
 
         self.assertEqual(str_prev1, str_prev2)  # add assertion here
 
+
     def test_samping_replicability(self):
-        import numpy as np
 
         def equal_collections(c1, c2, value=True):
             self.assertEqual(np.all(c1.X == c2.X), value)
@@ -74,5 +76,36 @@ class MyTestCase(unittest.TestCase):
         equal_collections(sample1_te, sample2_te, True)
 
 
+    def test_parallel_replicability(self):
+
+        train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean').train_test
+
+        test = test.sampling(500, *[0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.0])
+
+        with qp.util.temp_seed(10):
+            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
+            pacc.fit(train, val_split=0.5)
+            prev1 = F.strprev(pacc.quantify(test.instances))
+
+        with qp.util.temp_seed(0):
+            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
+            pacc.fit(train, val_split=0.5)
+            prev2 = F.strprev(pacc.quantify(test.instances))
+
+        with qp.util.temp_seed(0):
+            pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
+            pacc.fit(train, val_split=0.5)
+            prev3 = F.strprev(pacc.quantify(test.instances))
+
+        print(prev1)
+        print(prev2)
+        print(prev3)
+
+        self.assertNotEqual(prev1, prev2)
+        self.assertEqual(prev2, prev3)
+
+
+
+
 if __name__ == '__main__':
     unittest.main()