TODO: better stratified sampling for GLAMI-1M

2023-03-15 11:48:03 +01:00 · 2023-03-15 11:48:03 +01:00 · f32b9227ae
parent 65407f51fa
commit f32b9227ae
1 changed files with 1 additions and 0 deletions
--- a/dataManager/gFunDataset.py
+++ b/dataManager/gFunDataset.py
@ -108,6 +108,7 @@ class gFunDataset:
        return dataset, labels, data_langs

    def _load_glami(self, dataset_dir, nrows):
+        # TODO: a better way to get a stratified sampling of the dataset (see: groupby + sample)
        def _balanced_sample(data, n, remainder=0):
            import pandas as pd