diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 3cc8f18..72ee924 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -591,7 +591,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals return data -def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset: +def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False, min_ipc=100) -> Dataset: """ Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. @@ -615,9 +615,11 @@ def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, ver ~/quay_data/ directory) :param test_split: proportion of documents to be included in the test set. The rest conforms the training set :param verbose: set to True (default is False) to get information (stats) about the dataset + :param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded + (deafult is 100) :return: a :class:`quapy.data.base.Dataset` instance """ - data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose) + data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose, min_ipc) return Dataset(*data.split_stratified(1 - test_split, random_state=0))