QuaPy/quapy/data/_ifcb.py

import os
import pandas as pd
from quapy.protocol import AbstractProtocol

class IFCBTrainSamplesFromDir(AbstractProtocol):

    def __init__(self, path_dir:str, classes: list):
        self.path_dir = path_dir
        self.classes = classes
        self.samples = []
        for filename in os.listdir(path_dir):
            if filename.endswith('.csv'):
                self.samples.append(filename)

    def __call__(self):
        for sample in self.samples:
            s = pd.read_csv(os.path.join(self.path_dir,sample))
            # all columns but the first where we get the class
            X = s.iloc[:, 1:].to_numpy()
            y = s.iloc[:, 0].to_numpy()
            yield X, y

    def total(self):
        """
        Returns the total number of samples that the protocol generates.

        :return: The number of training samples to generate.
        """
        return len(self.samples)


class IFCBTestSamples(AbstractProtocol):

    def __init__(self, path_dir:str, test_prevalences_path: str):
        self.path_dir = path_dir
        self.test_prevalences = pd.read_csv(os.path.join(path_dir, test_prevalences_path))

    def __call__(self):
        for _, test_sample in self.test_prevalences.iterrows():
            #Load the sample from disk
            X = pd.read_csv(os.path.join(self.path_dir,test_sample['sample']+'.csv')).to_numpy()
            prevalences = test_sample.iloc[1:].to_numpy().astype(float)
            yield X, prevalences

    def total(self):
        """
        Returns the total number of samples that the protocol generates.

        :return: The number of test samples to generate.
        """
        return len(self.test_prevalences.index)
added dataset IFCB plankton 2023-11-08 11:07:47 +01:00			`import os`
			`import pandas as pd`
			`from quapy.protocol import AbstractProtocol`

			`class IFCBTrainSamplesFromDir(AbstractProtocol):`

			`def __init__(self, path_dir:str, classes: list):`
			`self.path_dir = path_dir`
			`self.classes = classes`
			`self.samples = []`
			`for filename in os.listdir(path_dir):`
			`if filename.endswith('.csv'):`
			`self.samples.append(filename)`

			`def __call__(self):`
			`for sample in self.samples:`
			`s = pd.read_csv(os.path.join(self.path_dir,sample))`
			`# all columns but the first where we get the class`
			`X = s.iloc[:, 1:].to_numpy()`
			`y = s.iloc[:, 0].to_numpy()`
			`yield X, y`

implementing the 'total' function of IFCB protocols 2023-11-08 11:31:33 +01:00			`def total(self):`
			`"""`
			`Returns the total number of samples that the protocol generates.`

			`:return: The number of training samples to generate.`
			`"""`
			`return len(self.samples)`


added dataset IFCB plankton 2023-11-08 11:07:47 +01:00			`class IFCBTestSamples(AbstractProtocol):`

			`def __init__(self, path_dir:str, test_prevalences_path: str):`
			`self.path_dir = path_dir`
			`self.test_prevalences = pd.read_csv(os.path.join(path_dir, test_prevalences_path))`

			`def __call__(self):`
			`for _, test_sample in self.test_prevalences.iterrows():`
			`#Load the sample from disk`
			`X = pd.read_csv(os.path.join(self.path_dir,test_sample['sample']+'.csv')).to_numpy()`
			`prevalences = test_sample.iloc[1:].to_numpy().astype(float)`
implementing the 'total' function of IFCB protocols 2023-11-08 11:31:33 +01:00			`yield X, prevalences`

			`def total(self):`
			`"""`
			`Returns the total number of samples that the protocol generates.`

			`:return: The number of test samples to generate.`
			`"""`
			`return len(self.test_prevalences.index)`