From b53d41724091f8b3cc2f5ee6fbb7abd54405a5d0 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 12 Apr 2024 13:35:13 +0200
Subject: [PATCH] merged

---
 quapy/data/_ifcb.py         | 26 ++++++++++----------------
 quapy/data/datasets.py      |  7 ++++---
 quapy/method/aggregative.py |  2 +-
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py
index f862ed0..d5c1bdf 100644
--- a/quapy/data/_ifcb.py
+++ b/quapy/data/_ifcb.py
@@ -1,20 +1,17 @@
 import os
 import pandas as pd
 import math
-
 from quapy.data import LabelledCollection
 from quapy.protocol import AbstractProtocol
 from pathlib import Path
 
 
 def get_sample_list(path_dir):
-    """Gets a sample list finding the csv files in a directory
+    """
+    Gets a sample list finding the csv files in a directory
 
-    Args:
-        path_dir (_type_): directory to look for samples
-
-    Returns:
-        _type_: list of samples
+    :param path_dir: directory to look for samples
+    :return: list of samples
     """
     samples = []
     for filename in sorted(os.listdir(path_dir)):
@@ -23,18 +20,15 @@ def get_sample_list(path_dir):
     return samples
 
 
-def generate_modelselection_split(samples, split=0.3):
-    """This function generates a train/test split for model selection
+def generate_modelselection_split(samples, test_prop=0.3):
+    """This function generates a train/test partition for model selection
     without the use of random numbers so the split is always the same
 
-    Args:
-        samples (_type_): list of samples
-        split (float, optional): percentage saved for test. Defaults to 0.3.
-
-    Returns:
-        _type_: list of samples to use as train and list of samples to use as test
+    :param samples: list of samples
+    :param test_prop: float, percentage saved for test. Defaults to 0.3.
+    :return: list of samples to use as train and list of samples to use as test
     """
-    num_items_to_pick = math.ceil(len(samples) * split)
+    num_items_to_pick = math.ceil(len(samples) * test_prop)
     step_size = math.floor(len(samples) / num_items_to_pick)
     test_indices = [i * step_size for i in range(num_items_to_pick)]
     test = [samples[i] for i in test_indices]
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 5b9806f..bcbdb0e 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -735,14 +735,15 @@ def fetch_lequa2022(task, data_home=None):
     return train, val_gen, test_gen
 
 
+
 def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None):
     """
     Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more
     information on this dataset, please follow the zenodo link).
     This dataset is based on the data available publicly at
     `WHOI-Plankton repo <https://github.com/hsosik/WHOI-Plankton>`_.
-    The scripts for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.
-    Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.
+    The dataset already comes with processed features.
+    The scripts used for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.
 
     The datasets are downloaded only once, and stored for fast reuse.
 
@@ -798,7 +799,7 @@ def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=No
     if for_model_selection:
         # In this case, return 70% of training data as the training set and 30% as the test set
         samples = get_sample_list(train_samples_path)
-        train, test = generate_modelselection_split(samples, split=0.3)
+        train, test = generate_modelselection_split(samples, test_prop=0.3)
         train_gen = IFCBTrainSamplesFromDir(path_dir=train_samples_path, classes=classes, samples=train)
 
         # Test prevalence is computed from class labels
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 46e56d7..2f3fab5 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
             raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
         if self.method not in ACC.METHODS:
             raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
-        if self.clipping not in ACC.NORMALIZATIONS:
+        if self.norm not in ACC.NORMALIZATIONS:
             raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")
 
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):