TODO: better stratified sampling for GLAMI-1M
This commit is contained in:
parent
65407f51fa
commit
f32b9227ae
|
@ -108,6 +108,7 @@ class gFunDataset:
|
|||
return dataset, labels, data_langs
|
||||
|
||||
def _load_glami(self, dataset_dir, nrows):
|
||||
# TODO: a better way to get a stratified sampling of the dataset (see: groupby + sample)
|
||||
def _balanced_sample(data, n, remainder=0):
|
||||
import pandas as pd
|
||||
|
||||
|
|
Loading…
Reference in New Issue