fetch_UCIMulticlassDataset updated with min_ipc parameter

This commit is contained in:
Lorenzo Volpi 2024-04-10 20:46:10 +02:00
parent a4e4c3a3f6
commit c04a935c9a
1 changed files with 4 additions and 2 deletions

View File

@ -591,7 +591,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
return data
def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset:
def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False, min_ipc=100) -> Dataset:
"""
Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`.
@ -615,9 +615,11 @@ def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, ver
~/quay_data/ directory)
:param test_split: proportion of documents to be included in the test set. The rest conforms the training set
:param verbose: set to True (default is False) to get information (stats) about the dataset
:param min_ipc: minimum number of istances per class. Classes with less instances than min_ipc are discarded
(deafult is 100)
:return: a :class:`quapy.data.base.Dataset` instance
"""
data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose)
data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose, min_ipc)
return Dataset(*data.split_stratified(1 - test_split, random_state=0))