From b53d41724091f8b3cc2f5ee6fbb7abd54405a5d0 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 12 Apr 2024 13:35:13 +0200 Subject: [PATCH] merged --- quapy/data/_ifcb.py | 26 ++++++++++---------------- quapy/data/datasets.py | 7 ++++--- quapy/method/aggregative.py | 2 +- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py index f862ed0..d5c1bdf 100644 --- a/quapy/data/_ifcb.py +++ b/quapy/data/_ifcb.py @@ -1,20 +1,17 @@ import os import pandas as pd import math - from quapy.data import LabelledCollection from quapy.protocol import AbstractProtocol from pathlib import Path def get_sample_list(path_dir): - """Gets a sample list finding the csv files in a directory + """ + Gets a sample list finding the csv files in a directory - Args: - path_dir (_type_): directory to look for samples - - Returns: - _type_: list of samples + :param path_dir: directory to look for samples + :return: list of samples """ samples = [] for filename in sorted(os.listdir(path_dir)): @@ -23,18 +20,15 @@ def get_sample_list(path_dir): return samples -def generate_modelselection_split(samples, split=0.3): - """This function generates a train/test split for model selection +def generate_modelselection_split(samples, test_prop=0.3): + """This function generates a train/test partition for model selection without the use of random numbers so the split is always the same - Args: - samples (_type_): list of samples - split (float, optional): percentage saved for test. Defaults to 0.3. - - Returns: - _type_: list of samples to use as train and list of samples to use as test + :param samples: list of samples + :param test_prop: float, percentage saved for test. Defaults to 0.3. + :return: list of samples to use as train and list of samples to use as test """ - num_items_to_pick = math.ceil(len(samples) * split) + num_items_to_pick = math.ceil(len(samples) * test_prop) step_size = math.floor(len(samples) / num_items_to_pick) test_indices = [i * step_size for i in range(num_items_to_pick)] test = [samples[i] for i in test_indices] diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 5b9806f..bcbdb0e 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -735,14 +735,15 @@ def fetch_lequa2022(task, data_home=None): return train, val_gen, test_gen + def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None): """ Loads the IFCB dataset for quantification from `Zenodo `_ (for more information on this dataset, please follow the zenodo link). This dataset is based on the data available publicly at `WHOI-Plankton repo `_. - The scripts for the processing are available at `P. González's repo `_. - Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms. + The dataset already comes with processed features. + The scripts used for the processing are available at `P. González's repo `_. The datasets are downloaded only once, and stored for fast reuse. @@ -798,7 +799,7 @@ def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=No if for_model_selection: # In this case, return 70% of training data as the training set and 30% as the test set samples = get_sample_list(train_samples_path) - train, test = generate_modelselection_split(samples, split=0.3) + train, test = generate_modelselection_split(samples, test_prop=0.3) train_gen = IFCBTrainSamplesFromDir(path_dir=train_samples_path, classes=classes, samples=train) # Test prevalence is computed from class labels diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 46e56d7..2f3fab5 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier): raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}") if self.method not in ACC.METHODS: raise ValueError(f"unknown method; valid ones are {ACC.METHODS}") - if self.clipping not in ACC.NORMALIZATIONS: + if self.norm not in ACC.NORMALIZATIONS: raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):