copying a modification from devel

2024-02-28 08:49:19 +01:00 · 2024-02-28 08:49:19 +01:00 · 29eaa54d82
parent 3932cf22ce
commit 29eaa54d82
1 changed files with 7 additions and 24 deletions
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -108,8 +108,7 @@ class LabelledCollection:
        """
        Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
        prevalence values are not specified, then returns the index of a uniform sampling.
-        For each class, the sampling is drawn with replacement if the requested prevalence is larger than
-        the actual prevalence of the class, or without replacement otherwise.
+        For each class, the sampling is drawn with replacement.

        :param size: integer, the requested size
        :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
@ -153,7 +152,7 @@ class LabelledCollection:
            for class_, n_requested in n_requests.items():
                n_candidates = len(self.index[class_])
                index_sample = self.index[class_][
-                    np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
+                    np.random.choice(n_candidates, size=n_requested, replace=True)
                ] if n_requested > 0 else []

                indexes_sample.append(index_sample)
@ -168,8 +167,7 @@ class LabelledCollection:
    def uniform_sampling_index(self, size, random_state=None):
        """
        Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
-        with replacement if the requested size is greater than the number of instances, or without replacement
-        otherwise.
+        with replacement.

        :param size: integer, the size of the uniform sample
        :param random_state: if specified, guarantees reproducibility of the split.
@ -179,13 +177,12 @@ class LabelledCollection:
            ng = RandomState(seed=random_state)
        else:
            ng = np.random
-        return ng.choice(len(self), size, replace=size > len(self))
+        return ng.choice(len(self), size, replace=True)

    def sampling(self, size, *prevs, shuffle=True, random_state=None):
        """
        Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
-        values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
-        the actual prevalence of the class, or with replacement otherwise.
+        values. For each class, the sampling is drawn with replacement.

        :param size: integer, the requested size
        :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
@ -202,8 +199,7 @@ class LabelledCollection:
    def uniform_sampling(self, size, random_state=None):
        """
        Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
-        with replacement if the requested size is greater than the number of instances, or without replacement
-        otherwise.
+        with replacement.

        :param size: integer, the requested size
        :param random_state: if specified, guarantees reproducibility of the split.
@ -236,24 +232,11 @@ class LabelledCollection:
        :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the
            second one with `1-train_prop` elements
        """
-        instances = self.instances
-        labels = self.labels
-        remainder = None
-        for idx in np.argwhere(self.counts()==1):
-            class_with_1 = self.classes_[idx.item()]
-            if remainder is None:
-                remainder = LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
-            else:
-                remainder += LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
-            instances = instances[labels!=class_with_1]
-            labels = labels[labels!=class_with_1]
        tr_docs, te_docs, tr_labels, te_labels = train_test_split(
-            instances, labels, train_size=train_prop, stratify=labels, random_state=random_state
+            self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state
        )
        training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_)
        test = LabelledCollection(te_docs, te_labels, classes=self.classes_)
-        if remainder is not None:
-            training += remainder
        return training, test

    def split_random(self, train_prop=0.6, random_state=None):