From 7705c92c8c2edf18cca4bfdff23b3d0ab042ee1f Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 12 Feb 2024 12:39:18 +0100
Subject: [PATCH] fixing ifcb and documenting

---
 CHANGE_LOG.txt         |  2 +-
 quapy/data/_ifcb.py    |  4 +++-
 quapy/data/datasets.py | 11 +----------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index 03405ce..5bf2643 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -17,7 +17,7 @@ Change Log 0.1.8
     As a result, a method with a param grid of 10 combinations for the classifier and 10 combinations for the
     quantifier, now implies 10 trainings of the classifier + 10*10 trainings of the aggregation function (this is
     typically much faster than the classifier training), whereas in versions <0.1.8 this amounted to training
-    10*10 classifiers+aggregations.       
+    10*10 (classifiers+aggregations).
 
 - Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
     of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py
index 96af189..f862ed0 100644
--- a/quapy/data/_ifcb.py
+++ b/quapy/data/_ifcb.py
@@ -1,6 +1,8 @@
 import os
 import pandas as pd
 import math
+
+from quapy.data import LabelledCollection
 from quapy.protocol import AbstractProtocol
 from pathlib import Path
 
@@ -57,7 +59,7 @@ class IFCBTrainSamplesFromDir(AbstractProtocol):
             # all columns but the first where we get the class
             X = s.iloc[:, 1:].to_numpy()
             y = s.iloc[:, 0].to_numpy()
-            yield X, y
+            yield LabelledCollection(X, y, classes=self.classes)
 
     def total(self):
         """
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 3d426f2..5b9806f 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -810,16 +810,7 @@ def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=No
 
     # In the case the user wants it, join all the train samples in one LabelledCollection
     if single_sample_train:
-        X, y = [], []
-        for X_, y_ in train_gen():
-            X.append(X_)
-            y.append(y_)   
-
-        X = np.vstack(X)
-        y = np.concatenate(y)
-        train = LabelledCollection(X, y, classes = classes)
-        
+        train = LabelledCollection.join(*[lc for lc in train_gen()])
         return train, test_gen
-
     else:
         return train_gen, test_gen