From 7705c92c8c2edf18cca4bfdff23b3d0ab042ee1f Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 12 Feb 2024 12:39:18 +0100 Subject: [PATCH] fixing ifcb and documenting --- CHANGE_LOG.txt | 2 +- quapy/data/_ifcb.py | 4 +++- quapy/data/datasets.py | 11 +---------- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index 03405ce..5bf2643 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -17,7 +17,7 @@ Change Log 0.1.8 As a result, a method with a param grid of 10 combinations for the classifier and 10 combinations for the quantifier, now implies 10 trainings of the classifier + 10*10 trainings of the aggregation function (this is typically much faster than the classifier training), whereas in versions <0.1.8 this amounted to training - 10*10 classifiers+aggregations. + 10*10 (classifiers+aggregations). - Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py index 96af189..f862ed0 100644 --- a/quapy/data/_ifcb.py +++ b/quapy/data/_ifcb.py @@ -1,6 +1,8 @@ import os import pandas as pd import math + +from quapy.data import LabelledCollection from quapy.protocol import AbstractProtocol from pathlib import Path @@ -57,7 +59,7 @@ class IFCBTrainSamplesFromDir(AbstractProtocol): # all columns but the first where we get the class X = s.iloc[:, 1:].to_numpy() y = s.iloc[:, 0].to_numpy() - yield X, y + yield LabelledCollection(X, y, classes=self.classes) def total(self): """ diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 3d426f2..5b9806f 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -810,16 +810,7 @@ def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=No # In the case the user wants it, join all the train samples in one LabelledCollection if single_sample_train: - X, y = [], [] - for X_, y_ in train_gen(): - X.append(X_) - y.append(y_) - - X = np.vstack(X) - y = np.concatenate(y) - train = LabelledCollection(X, y, classes = classes) - + train = LabelledCollection.join(*[lc for lc in train_gen()]) return train, test_gen - else: return train_gen, test_gen