From f5603135a7e9d8f7c1f0fa4b599401a21a57e03d Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Thu, 11 Apr 2024 20:07:59 +0200
Subject: [PATCH 01/11] Excluded vscode config files

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 8eaff3e..5a3d613 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,6 +69,9 @@ instance/
 # Scrapy stuff:
 .scrapy
 
+# vscode config:
+.vscode/
+
 # Sphinx documentation
 docs/_build/
 

From f69fca32b4e92638373103ae0f858a50a66971cb Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Thu, 11 Apr 2024 20:08:52 +0200
Subject: [PATCH 02/11] Added UCI multiclass datasets; added filter for min
 instances per class to UCI multiclass datasets

---
 quapy/data/datasets.py | 211 +++++++++++++++++++++++++++++++----------
 1 file changed, 162 insertions(+), 49 deletions(-)

diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 5b9806f..72ee924 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -14,41 +14,76 @@ from quapy.util import download_file_if_not_exists, download_file, get_quapy_hom
 
 
 REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
-TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',
-                              'semeval13', 'semeval14', 'semeval15', 'semeval16',
-                              'sst', 'wa', 'wb']
-TWITTER_SENTIMENT_DATASETS_TRAIN = ['gasp', 'hcr', 'omd', 'sanders',
-                                 'semeval', 'semeval16',
-                                 'sst', 'wa', 'wb']
-UCI_BINARY_DATASETS = ['acute.a', 'acute.b',
-                'balance.1', 'balance.2', 'balance.3',
-                'breast-cancer',
-                'cmc.1', 'cmc.2', 'cmc.3',
-                'ctg.1', 'ctg.2', 'ctg.3',
-                       #'diabetes', # <-- I haven't found this one...
-                'german',
-                'haberman',
-                'ionosphere',
-                'iris.1', 'iris.2', 'iris.3',
-                'mammographic',
-                       'pageblocks.5',
-                       #'phoneme', # <-- I haven't found this one...
-                       'semeion',
-                       'sonar',
-                       'spambase',
-                       'spectf',
-                       'tictactoe',
-                       'transfusion',
-                       'wdbc',
-                       'wine.1', 'wine.2', 'wine.3',
-                       'wine-q-red', 'wine-q-white',
-                       'yeast']
+TWITTER_SENTIMENT_DATASETS_TEST = [
+    'gasp', 'hcr', 'omd', 'sanders',
+    'semeval13', 'semeval14', 'semeval15', 'semeval16',
+    'sst', 'wa', 'wb',
+]
+TWITTER_SENTIMENT_DATASETS_TRAIN = [
+    'gasp', 'hcr', 'omd', 'sanders',
+    'semeval', 'semeval16',
+    'sst', 'wa', 'wb',
+]
+UCI_BINARY_DATASETS = [
+    'acute.a', 'acute.b',
+    'balance.1', 'balance.2', 'balance.3',
+    'breast-cancer',
+    'cmc.1', 'cmc.2', 'cmc.3',
+    'ctg.1', 'ctg.2', 'ctg.3',
+    #'diabetes', # <-- I haven't found this one...
+    'german',
+    'haberman',
+    'ionosphere',
+    'iris.1', 'iris.2', 'iris.3',
+    'mammographic',
+    'pageblocks.5',
+    #'phoneme', # <-- I haven't found this one...
+    'semeion',
+    'sonar',
+    'spambase',
+    'spectf',
+    'tictactoe',
+    'transfusion',
+    'wdbc',
+    'wine.1', 'wine.2', 'wine.3',
+    'wine-q-red',
+    'wine-q-white',
+    'yeast',
+]
 
-UCI_MULTICLASS_DATASETS = ['dry-bean',
-                           'wine-quality',
-                           'academic-success',
-                           'digits',
-                           'letter']
+UCI_MULTICLASS_DATASETS = [
+    'dry-bean',
+    'wine-quality',
+    'academic-success',
+    'digits',
+    'letter',
+    'abalone',
+    'obesity',
+    'covertype',
+    'nursery',
+    'diabetes',
+    'yeast',
+    'hand_digits',
+    'satellite',
+    'shuttle',
+    'cmc',
+    'isolet',
+    'waveform.v1',
+    'molecular',
+    'poker_hand',
+    'connect-4',
+    'cardiotocography',
+    'mhr',
+    'chess2',
+    'page_block',
+    'room',
+    'phishing2',
+    'rt-iot22',
+    'support2',
+    'image_seg',
+    'steel_plates',
+    'hcv',
+]
 
 LEQUA2022_TASKS = ['T1A', 'T1B', 'T2A', 'T2B']
 
@@ -556,7 +591,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
     return data
 
 
-def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset:
+def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False, min_ipc=100) -> Dataset:
     """
     Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. 
 
@@ -580,13 +615,15 @@ def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, ver
         ~/quay_data/ directory)
     :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
     :param verbose: set to True (default is False) to get information (stats) about the dataset
+    :param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded 
+        (deafult is 100)
     :return: a :class:`quapy.data.base.Dataset` instance
     """
-    data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose)
+    data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose, min_ipc)
     return Dataset(*data.split_stratified(1 - test_split, random_state=0))
 
 
-def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection:
+def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False, min_ipc=100) -> LabelledCollection:
     """
     Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`.
 
@@ -610,6 +647,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
         ~/quay_data/ directory)
     :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
     :param verbose: set to True (default is False) to get information (stats) about the dataset
+    :param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded 
+        (deafult is 100) 
     :return: a :class:`quapy.data.base.LabelledCollection` instance
     """
     assert dataset_name in UCI_MULTICLASS_DATASETS, \
@@ -621,19 +660,71 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
         data_home = get_quapy_home()
     
     identifiers = {
-        "dry-bean": 602,
-        "wine-quality": 186,
-        "academic-success": 697,
-        "digits": 80,
-        "letter": 59
+        'dry-bean': 602,
+        'wine-quality': 186,
+        'academic-success': 697,
+        'digits': 80,
+        'letter': 59,
+        'abalone': 1,
+        'obesity': 544,
+        'covertype': 31,
+        'nursery': 76,
+        'diabetes': 296,
+        'yeast': 110,
+        'hand_digits': 81,
+        'satellite': 146,
+        'shuttle': 148,
+        'cmc': 30,
+        'isolet': 54,
+        'waveform.v1': 107,
+        'molecular': 69,
+        'poker_hand': 158,
+        'connect-4': 26,
+        'cardiotocography': 193,
+        'mhr': 863,
+        'chess2': 23,
+        'page_block': 78,
+        'room': 864,
+        'phishing2': 379,
+        'rt-iot22': 942,
+        'support2': 880,
+        'image_seg': 147,
+        'steel_plates': 198,
+        'hcv': 503,
     }
     
     full_names = {
-        "dry-bean": "Dry Bean Dataset",
-        "wine-quality": "Wine Quality",
-        "academic-success": "Predict students' dropout and academic success",
-        "digits": "Optical Recognition of Handwritten Digits",
-        "letter": "Letter Recognition"
+        'dry-bean': 'Dry Bean Dataset',
+        'wine-quality': 'Wine Quality',
+        'academic-success': 'Predict students\' dropout and academic success',
+        'digits': 'Optical Recognition of Handwritten Digits',
+        'letter': 'Letter Recognition',
+        'abalone': 'Abalone',
+        'obesity': 'Estimation of Obesity Levels Based On Eating Habits and Physical Condition',
+        'covertype': 'Covertype',
+        'nursery': 'Nursery',
+        'diabetes': 'Diabetes 130-US Hospitals for Years 1999-2008',
+        'yeast': 'Yeast',
+        'hand_digits': 'Pen-Based Recognition of Handwritten Digits',
+        'satellite': 'Statlog Landsat Satellite',
+        'shuttle': 'Statlog Shuttle',
+        'cmc': 'Contraceptive Method Choice',
+        'isolet': 'ISOLET',
+        'waveform.v1': 'Waveform Database Generator (Version 1)',
+        'molecular': 'Molecular Biology (Splice-junction Gene Sequences)',
+        'poker_hand': 'Poker Hand',
+        'connect-4': 'Connect-4',
+        'cardiotocography': 'Cardiotocography',
+        'mhr': 'Maternal Health Risk',
+        'chess2': 'Chess (King-Rook vs. King)',
+        'page_block': 'Page Blocks Classification',
+        'room': 'Room Occupancy Estimation',
+        'phishing2': 'Website Phishing',
+        'rt-iot22': 'RT-IoT2022',
+        'support2': 'SUPPORT2',
+        'image_seg': 'Statlog (Image Segmentation)',
+        'steel_plates': 'Steel Plates Faults',
+        'hcv': 'Hepatitis C Virus (HCV) for Egyptian patients',
     }
     
     identifier = identifiers[dataset_name]
@@ -644,14 +735,36 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
 
     file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
     
-    def download(id):
+    def download(id, name):
         data = fetch_ucirepo(id=id)
         X, y = data['data']['features'].to_numpy(), data['data']['targets'].to_numpy().squeeze()
+        # classes represented as arrays are transformed to tuples to treat them as signle objects
+        if name == 'support2':
+            y[:, 2] = np.fromiter((str(elm) for elm in y[:, 2]), dtype='object')
+        if y.ndim > 1:
+            y = np.fromiter((tuple(elm) for elm in y), dtype='object')
         classes = np.sort(np.unique(y))
         y = np.searchsorted(classes, y)
         return LabelledCollection(X, y)
 
-    data = pickled_resource(file, download, identifier)
+    def filter_classes(data: LabelledCollection, min_ipc):
+        classes = data.classes_
+        # restrict classes to only those with at least min_ipc instances
+        classes = classes[data.counts() >= min_ipc]
+        # filter X and y keeping only datapoints belonging to valid classes
+        filter_idx = np.in1d(data.y, classes)
+        X, y = data.X[filter_idx], data.y[filter_idx]
+        # map classes to range(len(classes))
+        y = np.searchsorted(classes, y)
+        return LabelledCollection(X, y)
+
+    data = pickled_resource(file, download, identifier, dataset_name)
+    data = filter_classes(data, min_ipc)
+    if data.n_classes <= 2:
+        raise ValueError(
+            f'Dataset {dataset_name} has too few valid classes to be multiclass with {min_ipc=}. '
+            'Try a lower value for min_ipc.'
+        )
 
     if verbose:
         data.stats()

From b53d41724091f8b3cc2f5ee6fbb7abd54405a5d0 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 12 Apr 2024 13:35:13 +0200
Subject: [PATCH 03/11] merged

---
 quapy/data/_ifcb.py         | 26 ++++++++++----------------
 quapy/data/datasets.py      |  7 ++++---
 quapy/method/aggregative.py |  2 +-
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py
index f862ed0..d5c1bdf 100644
--- a/quapy/data/_ifcb.py
+++ b/quapy/data/_ifcb.py
@@ -1,20 +1,17 @@
 import os
 import pandas as pd
 import math
-
 from quapy.data import LabelledCollection
 from quapy.protocol import AbstractProtocol
 from pathlib import Path
 
 
 def get_sample_list(path_dir):
-    """Gets a sample list finding the csv files in a directory
+    """
+    Gets a sample list finding the csv files in a directory
 
-    Args:
-        path_dir (_type_): directory to look for samples
-
-    Returns:
-        _type_: list of samples
+    :param path_dir: directory to look for samples
+    :return: list of samples
     """
     samples = []
     for filename in sorted(os.listdir(path_dir)):
@@ -23,18 +20,15 @@ def get_sample_list(path_dir):
     return samples
 
 
-def generate_modelselection_split(samples, split=0.3):
-    """This function generates a train/test split for model selection
+def generate_modelselection_split(samples, test_prop=0.3):
+    """This function generates a train/test partition for model selection
     without the use of random numbers so the split is always the same
 
-    Args:
-        samples (_type_): list of samples
-        split (float, optional): percentage saved for test. Defaults to 0.3.
-
-    Returns:
-        _type_: list of samples to use as train and list of samples to use as test
+    :param samples: list of samples
+    :param test_prop: float, percentage saved for test. Defaults to 0.3.
+    :return: list of samples to use as train and list of samples to use as test
     """
-    num_items_to_pick = math.ceil(len(samples) * split)
+    num_items_to_pick = math.ceil(len(samples) * test_prop)
     step_size = math.floor(len(samples) / num_items_to_pick)
     test_indices = [i * step_size for i in range(num_items_to_pick)]
     test = [samples[i] for i in test_indices]
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 5b9806f..bcbdb0e 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -735,14 +735,15 @@ def fetch_lequa2022(task, data_home=None):
     return train, val_gen, test_gen
 
 
+
 def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None):
     """
     Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more
     information on this dataset, please follow the zenodo link).
     This dataset is based on the data available publicly at
     `WHOI-Plankton repo <https://github.com/hsosik/WHOI-Plankton>`_.
-    The scripts for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.
-    Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.
+    The dataset already comes with processed features.
+    The scripts used for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.
 
     The datasets are downloaded only once, and stored for fast reuse.
 
@@ -798,7 +799,7 @@ def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=No
     if for_model_selection:
         # In this case, return 70% of training data as the training set and 30% as the test set
         samples = get_sample_list(train_samples_path)
-        train, test = generate_modelselection_split(samples, split=0.3)
+        train, test = generate_modelselection_split(samples, test_prop=0.3)
         train_gen = IFCBTrainSamplesFromDir(path_dir=train_samples_path, classes=classes, samples=train)
 
         # Test prevalence is computed from class labels
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 46e56d7..2f3fab5 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
             raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
         if self.method not in ACC.METHODS:
             raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
-        if self.clipping not in ACC.NORMALIZATIONS:
+        if self.norm not in ACC.NORMALIZATIONS:
             raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")
 
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):

From 4abec6629b3aa5438fe12c585a996c23beb630ed Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 12 Apr 2024 18:08:00 +0200
Subject: [PATCH 04/11] integrating more uci-multiclass datasets

---
 examples/uci_experiments.py      |  11 ++-
 examples/ucimulti_experiments.py | 113 +++++++++++++++++++++++++++++++
 quapy/data/datasets.py           |  34 ++++++----
 quapy/method/_neural.py          |   2 +-
 quapy/util.py                    |  25 +++++++
 5 files changed, 167 insertions(+), 18 deletions(-)
 create mode 100644 examples/ucimulti_experiments.py

diff --git a/examples/uci_experiments.py b/examples/uci_experiments.py
index 07db7cd..b452feb 100644
--- a/examples/uci_experiments.py
+++ b/examples/uci_experiments.py
@@ -29,12 +29,17 @@ def newLR():
 
 
 def calibratedLR():
-    return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
+    return CalibratedClassifierCV(newLR())
 
 
 __C_range = np.logspace(-3, 3, 7)
-lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
-svmperf_params = {'classifier__C': __C_range}
+lr_params = {
+    'classifier__C': __C_range,
+    'classifier__class_weight': [None, 'balanced']
+}
+svmperf_params = {
+    'classifier__C': __C_range
+}
 
 
 def quantification_models():
diff --git a/examples/ucimulti_experiments.py b/examples/ucimulti_experiments.py
new file mode 100644
index 0000000..1b48834
--- /dev/null
+++ b/examples/ucimulti_experiments.py
@@ -0,0 +1,113 @@
+import pickle
+import os
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+import quapy as qp
+from quapy.method.aggregative import PACC, EMQ, KDEyML
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+from pathlib import Path
+
+
+SEED = 1
+
+
+def newLR():
+    return LogisticRegression(max_iter=3000)
+
+# typical hyperparameters explored for Logistic Regression
+logreg_grid = {
+    'C': np.logspace(-3, 3, 7),
+    'class_weight': ['balanced', None]
+}
+
+def wrap_hyper(classifier_hyper_grid:dict):
+    return {'classifier__'+k:v for k, v in classifier_hyper_grid.items()}
+
+METHODS = [
+    ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
+    ('EMQ',  EMQ(newLR()), wrap_hyper(logreg_grid)),
+    ('KDEy-ML',  KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.linspace(0.01, 0.2, 20)}}),
+]
+
+
+def show_results(result_path):
+    import pandas as pd
+    df = pd.read_csv(result_path+'.csv', sep='\t')
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"], margins=True)
+    print(pv)
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    result_dir = f'results/ucimulti'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    global_result_path = f'{result_dir}/allmethods'
+    with open(global_result_path + '.csv', 'wt') as csv:
+        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
+
+    for method_name, quantifier, param_grid in METHODS:
+
+        print('Init method', method_name)
+
+        with open(global_result_path + '.csv', 'at') as csv:
+
+            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS[:5]:
+
+                if dataset in ['covertype', 'diabetes']:
+                    continue
+
+                print('init', dataset)
+
+                local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
+
+                if os.path.exists(local_result_path):
+                    print(f'result file {local_result_path} already exist; skipping')
+                    report = qp.util.load_report(local_result_path)
+
+                else:
+                    with qp.util.temp_seed(SEED):
+
+                        data = qp.datasets.fetch_UCIMulticlassDataset(dataset, verbose=True)
+
+                        # model selection
+                        train, test = data.train_test
+                        train, val = train.split_stratified(random_state=SEED)
+
+                        protocol = UPP(val, repeats=n_bags_val)
+                        modsel = GridSearchQ(
+                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error='mae'
+                        )
+
+                        try:
+                            modsel.fit(train)
+
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+
+                            quantifier = modsel.best_model()
+                        except:
+                            print('something went wrong... trying to fit the default model')
+                            quantifier.fit(train)
+
+                        protocol = UPP(test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(
+                            quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True
+                        )
+                        report.to_csv(local_result_path)
+
+                means = report.mean()
+                csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
+                csv.flush()
+
+    show_results(global_result_path)
\ No newline at end of file
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index a5a5677..1e0750e 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -591,7 +591,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
     return data
 
 
-def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False, min_ipc=100) -> Dataset:
+def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, min_class_support=100, verbose=False) -> Dataset:
     """
     Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. 
 
@@ -614,16 +614,16 @@ def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, ver
     :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
         ~/quay_data/ directory)
     :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param min_class_support: minimum number of istances per class. Classes with fewer instances
+        are discarded (deafult is 100)
     :param verbose: set to True (default is False) to get information (stats) about the dataset
-    :param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded 
-        (deafult is 100)
     :return: a :class:`quapy.data.base.Dataset` instance
     """
-    data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose, min_ipc)
+    data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, min_class_support, verbose=verbose)
     return Dataset(*data.split_stratified(1 - test_split, random_state=0))
 
 
-def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False, min_ipc=100) -> LabelledCollection:
+def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_class_support=100, verbose=False) -> LabelledCollection:
     """
     Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`.
 
@@ -646,9 +646,9 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
     :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
         ~/quay_data/ directory)
     :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param min_class_support: minimum number of istances per class. Classes with fewer instances
+        are discarded (deafult is 100)
     :param verbose: set to True (default is False) to get information (stats) about the dataset
-    :param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded 
-        (deafult is 100) 
     :return: a :class:`quapy.data.base.LabelledCollection` instance
     """
     assert dataset_name in UCI_MULTICLASS_DATASETS, \
@@ -736,13 +736,20 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
     file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
     
     def download(id, name):
-        data = fetch_ucirepo(id=id)
-        X, y = data['data']['features'].to_numpy(), data['data']['targets'].to_numpy().squeeze()
-        # classes represented as arrays are transformed to tuples to treat them as signle objects
+        df = fetch_ucirepo(id=id)
+
+        df.data.features = pd.get_dummies(df.data.features, drop_first=True)
+
+        X, y = df.data.features.to_numpy(), df.data.targets.to_numpy().squeeze()
+        # classes represented as arrays are transformed to tuples to treat them as single objects
         if name == 'support2':
             y[:, 2] = np.fromiter((str(elm) for elm in y[:, 2]), dtype='object')
+            raise ValueError('this is support 2')
+
         if y.ndim > 1:
             y = np.fromiter((tuple(elm) for elm in y), dtype='object')
+            raise ValueError('more than one y')
+
         classes = np.sort(np.unique(y))
         y = np.searchsorted(classes, y)
         return LabelledCollection(X, y)
@@ -759,11 +766,11 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=
         return LabelledCollection(X, y)
 
     data = pickled_resource(file, download, identifier, dataset_name)
-    data = filter_classes(data, min_ipc)
+    data = filter_classes(data, min_class_support)
     if data.n_classes <= 2:
         raise ValueError(
-            f'Dataset {dataset_name} has too few valid classes to be multiclass with {min_ipc=}. '
-            'Try a lower value for min_ipc.'
+            f'After filtering out classes with less than {min_class_support=} instances, the dataset {dataset_name} '
+            f'is no longer multiclass. Try a reducing this value.'
         )
 
     if verbose:
@@ -848,7 +855,6 @@ def fetch_lequa2022(task, data_home=None):
     return train, val_gen, test_gen
 
 
-
 def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None):
     """
     Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more
diff --git a/quapy/method/_neural.py b/quapy/method/_neural.py
index 11c2dc4..28d848a 100644
--- a/quapy/method/_neural.py
+++ b/quapy/method/_neural.py
@@ -21,7 +21,7 @@ class QuaNetTrainer(BaseQuantifier):
     Example:
 
     >>> import quapy as qp
-    >>> from quapy.method.meta import QuaNet
+    >>> from quapy.method_name.meta import QuaNet
     >>> from quapy.classification.neural import NeuralClassifierTrainer, CNNnet
     >>>
     >>> # use samples of 100 elements
diff --git a/quapy/util.py b/quapy/util.py
index 7f0abc4..9165499 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -6,6 +6,9 @@ import pickle
 import urllib
 from pathlib import Path
 from contextlib import ExitStack
+
+import pandas as pd
+
 import quapy as qp
 
 import numpy as np
@@ -246,6 +249,28 @@ def _check_sample_size(sample_size):
     return sample_size
 
 
+def load_report(path, as_dict=False):
+    def str2prev_arr(strprev):
+        within = strprev.strip('[]').split()
+        float_list = [float(p) for p in within]
+        float_list[-1] = 1. - sum(float_list[:-1])
+        return np.asarray(float_list)
+
+    df = pd.read_csv(path, index_col=0)
+    df['true-prev'] = df['true-prev'].apply(str2prev_arr)
+    df['estim-prev'] = df['estim-prev'].apply(str2prev_arr)
+    if as_dict:
+        d = {}
+        for col in df.columns.values:
+            vals = df[col].values
+            if col in ['true-prev', 'estim-prev']:
+                vals = np.vstack(vals)
+            d[col] = vals
+        return d
+    else:
+        return df
+
+
 class EarlyStop:
     """
     A class implementing the early-stopping condition typically used for training neural networks.

From e0b80167b972d92080ecfd67b368fcb7f82f6583 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 12 Apr 2024 18:24:12 +0200
Subject: [PATCH 05/11] added max_train_instances to
 fetch_UCIMulticlassLabelledCollection

---
 examples/ucimulti_experiments.py |  2 +-
 quapy/data/datasets.py           | 24 ++++++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/examples/ucimulti_experiments.py b/examples/ucimulti_experiments.py
index 1b48834..b01163a 100644
--- a/examples/ucimulti_experiments.py
+++ b/examples/ucimulti_experiments.py
@@ -29,7 +29,7 @@ def wrap_hyper(classifier_hyper_grid:dict):
 METHODS = [
     ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
     ('EMQ',  EMQ(newLR()), wrap_hyper(logreg_grid)),
-    ('KDEy-ML',  KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.linspace(0.01, 0.2, 20)}}),
+    # ('KDEy-ML',  KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.linspace(0.01, 0.2, 20)}}),
 ]
 
 
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 1e0750e..ad0ef6a 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -591,7 +591,13 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
     return data
 
 
-def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, min_class_support=100, verbose=False) -> Dataset:
+def fetch_UCIMulticlassDataset(
+        dataset_name,
+        data_home=None,
+        min_test_split=0.3,
+        max_train_instances=25000,
+        min_class_support=100,
+        verbose=False) -> Dataset:
     """
     Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. 
 
@@ -613,14 +619,24 @@ def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, min
     :param dataset_name: a dataset name
     :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
         ~/quay_data/ directory)
-    :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param min_test_split: minimum proportion of instances to be included in the test set. This value is interpreted
+        as a minimum proportion, meaning that the real proportion could be higher in case the training proportion
+        (1-`min_test_split`% of the instances) surpasses `max_train_instances`. In such case, only `max_train_instances`
+        are taken for training, and the rest (irrespective of `min_test_split`) is taken for test.
+    :param max_train_instances: maximum number of instances to keep for training (defaults to 25000)
     :param min_class_support: minimum number of istances per class. Classes with fewer instances
         are discarded (deafult is 100)
     :param verbose: set to True (default is False) to get information (stats) about the dataset
     :return: a :class:`quapy.data.base.Dataset` instance
     """
     data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, min_class_support, verbose=verbose)
-    return Dataset(*data.split_stratified(1 - test_split, random_state=0))
+    n = len(data)
+    train_prop = (1.-min_test_split)
+    n_train = int(n*train_prop)
+    if n_train > max_train_instances:
+        train_prop = (max_train_instances / n)
+
+    return Dataset(*data.split_stratified(train_prop, random_state=0))
 
 
 def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_class_support=100, verbose=False) -> LabelledCollection:
@@ -645,7 +661,7 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
     :param dataset_name: a dataset name
     :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
         ~/quay_data/ directory)
-    :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
+    :param test_split: proportion of instances to be included in the test set. The rest conforms the training set
     :param min_class_support: minimum number of istances per class. Classes with fewer instances
         are discarded (deafult is 100)
     :param verbose: set to True (default is False) to get information (stats) about the dataset

From 522d0740875cb3fdbbccc7f0b037f9c21cb5d659 Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Tue, 23 Apr 2024 16:29:19 +0200
Subject: [PATCH 06/11] report mean fixed, datasets included

---
 examples/ucimulti_experiments.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ucimulti_experiments.py b/examples/ucimulti_experiments.py
index b01163a..16144cd 100644
--- a/examples/ucimulti_experiments.py
+++ b/examples/ucimulti_experiments.py
@@ -62,9 +62,9 @@ if __name__ == '__main__':
 
         with open(global_result_path + '.csv', 'at') as csv:
 
-            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS[:5]:
+            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS[:12]:
 
-                if dataset in ['covertype', 'diabetes']:
+                if dataset in []:
                     continue
 
                 print('init', dataset)
@@ -106,7 +106,7 @@ if __name__ == '__main__':
                         )
                         report.to_csv(local_result_path)
 
-                means = report.mean()
+                means = report.mean(numeric_only=True)
                 csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
                 csv.flush()
 

From ecfc175622ba71bd02a78275b99de8fb65f35c4f Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Tue, 23 Apr 2024 16:30:17 +0200
Subject: [PATCH 07/11] datasets removed, debug output added

---
 quapy/data/datasets.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index ad0ef6a..66be54a 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -79,7 +79,6 @@ UCI_MULTICLASS_DATASETS = [
     'room',
     'phishing2',
     'rt-iot22',
-    'support2',
     'image_seg',
     'steel_plates',
     'hcv',
@@ -703,7 +702,6 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'room': 864,
         'phishing2': 379,
         'rt-iot22': 942,
-        'support2': 880,
         'image_seg': 147,
         'steel_plates': 198,
         'hcv': 503,
@@ -737,7 +735,6 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'room': 'Room Occupancy Estimation',
         'phishing2': 'Website Phishing',
         'rt-iot22': 'RT-IoT2022',
-        'support2': 'SUPPORT2',
         'image_seg': 'Statlog (Image Segmentation)',
         'steel_plates': 'Steel Plates Faults',
         'hcv': 'Hepatitis C Virus (HCV) for Egyptian patients',
@@ -753,17 +750,25 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
     
     def download(id, name):
         df = fetch_ucirepo(id=id)
+        
 
         df.data.features = pd.get_dummies(df.data.features, drop_first=True)
 
         X, y = df.data.features.to_numpy(), df.data.targets.to_numpy().squeeze()
-        # classes represented as arrays are transformed to tuples to treat them as single objects
-        if name == 'support2':
-            y[:, 2] = np.fromiter((str(elm) for elm in y[:, 2]), dtype='object')
-            raise ValueError('this is support 2')
+
+        with open(f"var/{name}_Xy.txt", "w") as f:
+            for row in X:
+                f.write(str(row) + "\n")
+            f.write("\n\n")
+            if y.ndim > 1:
+                unique_y = np.unique(np.fromiter((tuple(elm) for elm in y), dtype='object'))
+            else:
+                unique_y = np.unique(y)
+            f.write(str(unique_y) + "\n\n")
+            for row in y:
+                f.write(str(row) + "\n")
 
         if y.ndim > 1:
-            y = np.fromiter((tuple(elm) for elm in y), dtype='object')
             raise ValueError('more than one y')
 
         classes = np.sort(np.unique(y))

From f74b048e2d0a4bee150712d367b89e18eb301e01 Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Wed, 24 Apr 2024 15:20:14 +0200
Subject: [PATCH 08/11] uci_multi dataset removed

---
 .gitignore             | 5 +++++
 quapy/data/datasets.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5a3d613..418b54f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,6 +88,11 @@ ipython_config.py
 # pyenv
 .python-version
 
+# poetry
+poetry.toml
+pyproject.toml 
+poetry.lock
+
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 66be54a..2e56b48 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -61,7 +61,7 @@ UCI_MULTICLASS_DATASETS = [
     'obesity',
     'covertype',
     'nursery',
-    'diabetes',
+    # 'diabetes', --> very slow, skipped
     'yeast',
     'hand_digits',
     'satellite',

From 498fd8b05062e33b297286300d8652eb9fe305cb Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Wed, 24 Apr 2024 17:23:01 +0200
Subject: [PATCH 09/11] datasets removed from ucimulti

---
 examples/ucimulti_experiments.py |  2 +-
 quapy/data/datasets.py           | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/ucimulti_experiments.py b/examples/ucimulti_experiments.py
index 16144cd..aae8c88 100644
--- a/examples/ucimulti_experiments.py
+++ b/examples/ucimulti_experiments.py
@@ -62,7 +62,7 @@ if __name__ == '__main__':
 
         with open(global_result_path + '.csv', 'at') as csv:
 
-            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS[:12]:
+            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
 
                 if dataset in []:
                     continue
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 2e56b48..d197717 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -59,7 +59,7 @@ UCI_MULTICLASS_DATASETS = [
     'letter',
     'abalone',
     'obesity',
-    'covertype',
+    # 'covertype', --> very slow, skipped
     'nursery',
     # 'diabetes', --> very slow, skipped
     'yeast',
@@ -70,17 +70,17 @@ UCI_MULTICLASS_DATASETS = [
     'isolet',
     'waveform.v1',
     'molecular',
-    'poker_hand',
+    # 'poker_hand', --> very slow, skipped
     'connect-4',
-    'cardiotocography',
+    # 'cardiotocography', --> multiple labels, skipped
     'mhr',
-    'chess2',
+    'chess',
     'page_block',
-    'room',
+    # 'room', --> very slow, skipped
     'phishing2',
-    'rt-iot22',
+    # 'rt-iot22', --> very slow, skipped
     'image_seg',
-    'steel_plates',
+    # 'steel_plates', --> multiple labels, skipped
     'hcv',
 ]
 
@@ -697,7 +697,7 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'connect-4': 26,
         'cardiotocography': 193,
         'mhr': 863,
-        'chess2': 23,
+        'chess': 23,
         'page_block': 78,
         'room': 864,
         'phishing2': 379,
@@ -730,7 +730,7 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'connect-4': 'Connect-4',
         'cardiotocography': 'Cardiotocography',
         'mhr': 'Maternal Health Risk',
-        'chess2': 'Chess (King-Rook vs. King)',
+        'chess': 'Chess (King-Rook vs. King)',
         'page_block': 'Page Blocks Classification',
         'room': 'Room Occupancy Estimation',
         'phishing2': 'Website Phishing',

From 93dd6cb1c15eda8ef3ee6701364c11fc5bfcb5ec Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Mon, 29 Apr 2024 17:35:43 +0200
Subject: [PATCH 10/11] training times added to globar report

---
 examples/ucimulti_experiments.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/examples/ucimulti_experiments.py b/examples/ucimulti_experiments.py
index aae8c88..5193376 100644
--- a/examples/ucimulti_experiments.py
+++ b/examples/ucimulti_experiments.py
@@ -1,5 +1,7 @@
 import pickle
 import os
+from time import time
+from collections import defaultdict
 
 import numpy as np
 from sklearn.linear_model import LogisticRegression
@@ -38,9 +40,17 @@ def show_results(result_path):
     df = pd.read_csv(result_path+'.csv', sep='\t')
     pd.set_option('display.max_columns', None)
     pd.set_option('display.max_rows', None)
-    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"], margins=True)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE", "t_train"], margins=True)
     print(pv)
 
+def load_timings(result_path):
+    import pandas as pd
+    timings = defaultdict(lambda: {})
+    if not Path(result_path + '.csv').exists():
+        return timings
+
+    df = pd.read_csv(result_path+'.csv', sep='\t')
+    return timings | df.pivot_table(index='Dataset', columns='Method', values='t_train').to_dict()
 
 if __name__ == '__main__':
 
@@ -53,8 +63,9 @@ if __name__ == '__main__':
     os.makedirs(result_dir, exist_ok=True)
 
     global_result_path = f'{result_dir}/allmethods'
+    timings = load_timings(global_result_path)
     with open(global_result_path + '.csv', 'wt') as csv:
-        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
+        csv.write(f'Method\tDataset\tMAE\tMRAE\tt_train\n')
 
     for method_name, quantifier, param_grid in METHODS:
 
@@ -64,9 +75,6 @@ if __name__ == '__main__':
 
             for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
 
-                if dataset in []:
-                    continue
-
                 print('init', dataset)
 
                 local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
@@ -88,7 +96,8 @@ if __name__ == '__main__':
                         modsel = GridSearchQ(
                             quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error='mae'
                         )
-
+                        
+                        t_init = time()
                         try:
                             modsel.fit(train)
 
@@ -99,6 +108,8 @@ if __name__ == '__main__':
                         except:
                             print('something went wrong... trying to fit the default model')
                             quantifier.fit(train)
+                        timings[method_name][dataset] = time() - t_init
+                        
 
                         protocol = UPP(test, repeats=n_bags_test)
                         report = qp.evaluation.evaluation_report(
@@ -107,7 +118,7 @@ if __name__ == '__main__':
                         report.to_csv(local_result_path)
 
                 means = report.mean(numeric_only=True)
-                csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
+                csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{timings[method_name][dataset]:.3f}\n')
                 csv.flush()
 
     show_results(global_result_path)
\ No newline at end of file

From 19524f9aa82a8cb70601009e19d53433cbf8b0e3 Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Mon, 29 Apr 2024 17:36:13 +0200
Subject: [PATCH 11/11] ucimulti datasets removed, cleaning

---
 quapy/data/datasets.py | 49 +++++++-----------------------------------
 1 file changed, 8 insertions(+), 41 deletions(-)

diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index d197717..bfd709d 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -59,28 +59,22 @@ UCI_MULTICLASS_DATASETS = [
     'letter',
     'abalone',
     'obesity',
-    # 'covertype', --> very slow, skipped
     'nursery',
-    # 'diabetes', --> very slow, skipped
     'yeast',
     'hand_digits',
     'satellite',
     'shuttle',
     'cmc',
     'isolet',
-    'waveform.v1',
+    'waveform-v1',
     'molecular',
-    # 'poker_hand', --> very slow, skipped
+    'poker_hand',
     'connect-4',
-    # 'cardiotocography', --> multiple labels, skipped
     'mhr',
     'chess',
     'page_block',
-    # 'room', --> very slow, skipped
-    'phishing2',
-    # 'rt-iot22', --> very slow, skipped
+    'phishing',
     'image_seg',
-    # 'steel_plates', --> multiple labels, skipped
     'hcv',
 ]
 
@@ -682,28 +676,22 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'letter': 59,
         'abalone': 1,
         'obesity': 544,
-        'covertype': 31,
         'nursery': 76,
-        'diabetes': 296,
         'yeast': 110,
         'hand_digits': 81,
         'satellite': 146,
         'shuttle': 148,
         'cmc': 30,
         'isolet': 54,
-        'waveform.v1': 107,
+        'waveform-v1': 107,
         'molecular': 69,
         'poker_hand': 158,
         'connect-4': 26,
-        'cardiotocography': 193,
         'mhr': 863,
         'chess': 23,
         'page_block': 78,
-        'room': 864,
-        'phishing2': 379,
-        'rt-iot22': 942,
+        'phishing': 379,
         'image_seg': 147,
-        'steel_plates': 198,
         'hcv': 503,
     }
     
@@ -715,28 +703,22 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         'letter': 'Letter Recognition',
         'abalone': 'Abalone',
         'obesity': 'Estimation of Obesity Levels Based On Eating Habits and Physical Condition',
-        'covertype': 'Covertype',
         'nursery': 'Nursery',
-        'diabetes': 'Diabetes 130-US Hospitals for Years 1999-2008',
         'yeast': 'Yeast',
         'hand_digits': 'Pen-Based Recognition of Handwritten Digits',
         'satellite': 'Statlog Landsat Satellite',
         'shuttle': 'Statlog Shuttle',
         'cmc': 'Contraceptive Method Choice',
         'isolet': 'ISOLET',
-        'waveform.v1': 'Waveform Database Generator (Version 1)',
+        'waveform-v1': 'Waveform Database Generator (Version 1)',
         'molecular': 'Molecular Biology (Splice-junction Gene Sequences)',
         'poker_hand': 'Poker Hand',
         'connect-4': 'Connect-4',
-        'cardiotocography': 'Cardiotocography',
         'mhr': 'Maternal Health Risk',
         'chess': 'Chess (King-Rook vs. King)',
         'page_block': 'Page Blocks Classification',
-        'room': 'Room Occupancy Estimation',
-        'phishing2': 'Website Phishing',
-        'rt-iot22': 'RT-IoT2022',
+        'phishing': 'Website Phishing',
         'image_seg': 'Statlog (Image Segmentation)',
-        'steel_plates': 'Steel Plates Faults',
         'hcv': 'Hepatitis C Virus (HCV) for Egyptian patients',
     }
     
@@ -750,26 +732,11 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
     
     def download(id, name):
         df = fetch_ucirepo(id=id)
-        
 
         df.data.features = pd.get_dummies(df.data.features, drop_first=True)
-
         X, y = df.data.features.to_numpy(), df.data.targets.to_numpy().squeeze()
 
-        with open(f"var/{name}_Xy.txt", "w") as f:
-            for row in X:
-                f.write(str(row) + "\n")
-            f.write("\n\n")
-            if y.ndim > 1:
-                unique_y = np.unique(np.fromiter((tuple(elm) for elm in y), dtype='object'))
-            else:
-                unique_y = np.unique(y)
-            f.write(str(unique_y) + "\n\n")
-            for row in y:
-                f.write(str(row) + "\n")
-
-        if y.ndim > 1:
-            raise ValueError('more than one y')
+        assert y.ndim == 1, 'more than one y'
 
         classes = np.sort(np.unique(y))
         y = np.searchsorted(classes, y)