copying a modification from devel
This commit is contained in:
parent
3932cf22ce
commit
29eaa54d82
|
@ -108,8 +108,7 @@ class LabelledCollection:
|
|||
"""
|
||||
Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
||||
prevalence values are not specified, then returns the index of a uniform sampling.
|
||||
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or without replacement otherwise.
|
||||
For each class, the sampling is drawn with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||
|
@ -153,7 +152,7 @@ class LabelledCollection:
|
|||
for class_, n_requested in n_requests.items():
|
||||
n_candidates = len(self.index[class_])
|
||||
index_sample = self.index[class_][
|
||||
np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
|
||||
np.random.choice(n_candidates, size=n_requested, replace=True)
|
||||
] if n_requested > 0 else []
|
||||
|
||||
indexes_sample.append(index_sample)
|
||||
|
@ -168,8 +167,7 @@ class LabelledCollection:
|
|||
def uniform_sampling_index(self, size, random_state=None):
|
||||
"""
|
||||
Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.
|
||||
with replacement.
|
||||
|
||||
:param size: integer, the size of the uniform sample
|
||||
:param random_state: if specified, guarantees reproducibility of the split.
|
||||
|
@ -179,13 +177,12 @@ class LabelledCollection:
|
|||
ng = RandomState(seed=random_state)
|
||||
else:
|
||||
ng = np.random
|
||||
return ng.choice(len(self), size, replace=size > len(self))
|
||||
return ng.choice(len(self), size, replace=True)
|
||||
|
||||
def sampling(self, size, *prevs, shuffle=True, random_state=None):
|
||||
"""
|
||||
Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
|
||||
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or with replacement otherwise.
|
||||
values. For each class, the sampling is drawn with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||
|
@ -202,8 +199,7 @@ class LabelledCollection:
|
|||
def uniform_sampling(self, size, random_state=None):
|
||||
"""
|
||||
Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.
|
||||
with replacement.
|
||||
|
||||
:param size: integer, the requested size
|
||||
:param random_state: if specified, guarantees reproducibility of the split.
|
||||
|
@ -236,24 +232,11 @@ class LabelledCollection:
|
|||
:return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the
|
||||
second one with `1-train_prop` elements
|
||||
"""
|
||||
instances = self.instances
|
||||
labels = self.labels
|
||||
remainder = None
|
||||
for idx in np.argwhere(self.counts()==1):
|
||||
class_with_1 = self.classes_[idx.item()]
|
||||
if remainder is None:
|
||||
remainder = LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
|
||||
else:
|
||||
remainder += LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
|
||||
instances = instances[labels!=class_with_1]
|
||||
labels = labels[labels!=class_with_1]
|
||||
tr_docs, te_docs, tr_labels, te_labels = train_test_split(
|
||||
instances, labels, train_size=train_prop, stratify=labels, random_state=random_state
|
||||
self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state
|
||||
)
|
||||
training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_)
|
||||
test = LabelledCollection(te_docs, te_labels, classes=self.classes_)
|
||||
if remainder is not None:
|
||||
training += remainder
|
||||
return training, test
|
||||
|
||||
def split_random(self, train_prop=0.6, random_state=None):
|
||||
|
|
Loading…
Reference in New Issue