TODO: better stratified sampling for GLAMI-1M

This commit is contained in:
Andrea Pedrotti 2023-03-15 11:48:03 +01:00
parent 65407f51fa
commit f32b9227ae
1 changed files with 1 additions and 0 deletions

View File

@ -108,6 +108,7 @@ class gFunDataset:
return dataset, labels, data_langs
def _load_glami(self, dataset_dir, nrows):
# TODO: a better way to get a stratified sampling of the dataset (see: groupby + sample)
def _balanced_sample(data, n, remainder=0):
import pandas as pd