diff --git a/LeQua2022/TODO.txt b/LeQua2022/TODO.txt
index 61c56cd..e51cf0d 100644
--- a/LeQua2022/TODO.txt
+++ b/LeQua2022/TODO.txt
@@ -4,4 +4,5 @@
 4. model selection
 5. plots
 6. estoy leyendo los samples en orden, y no hace falta. Sería mejor una función genérica que lee todos los ejemplos y
-    que de todos modos genera un output con el mismo nombre del file
\ No newline at end of file
+    que de todos modos genera un output con el mismo nombre del file
+7. Make ResultSubmission class abstract, and create 4 instances thus forcing the field task_name to be set correctly
\ No newline at end of file
diff --git a/LeQua2022/data.py b/LeQua2022/data.py
index f4be5a6..9a133c4 100644
--- a/LeQua2022/data.py
+++ b/LeQua2022/data.py
@@ -11,17 +11,71 @@ import sklearn
 #     return documents, labels
 
 
-def load_multiclass_raw_document(path):
-    return qp.data.from_text(path, verbose=0, class2int=False)
+# def load_multiclass_raw_document(path):
+#     return qp.data.from_text(path, verbose=0, class2int=False)
 
 
 def load_binary_vectors(path, nF=None):
     return sklearn.datasets.load_svmlight_file(path, n_features=nF)
 
 
-if __name__ == '__main__':
-    X, y = load_binary_vectors('./data/T1A/public/training_vectors.txt')
-    print(X.shape)
-    print(y)
+def gen_load_samples_T1A(path_dir:str, ground_truth_path:str = None):
+    # for ... : yield
+    pass
+
+
+def gen_load_samples_T1B(path_dir:str, ground_truth_path:str = None):
+    # for ... : yield
+    pass
+
+
+def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None):
+    # for ... : yield
+    pass
+
+
+def gen_load_samples_T2B(path_dir:str, ground_truth_path:str = None):
+    # for ... : yield
+    pass
+
+
+class ResultSubmission:
+    def __init__(self, team_name, run_name, task_name):
+        assert isinstance(team_name, str) and team_name, \
+            f'invalid value encountered for team_name'
+        assert isinstance(run_name, str) and run_name, \
+            f'invalid value encountered for run_name'
+        assert isinstance(task_name, str) and task_name in {'T1A', 'T1B', 'T2A', 'T2B'}, \
+            f'invalid value encountered for task_name; valid values are T1A, T1B, T2A, and T2B'
+        self.team_name = team_name
+        self.run_name = run_name
+        self.task_name = task_name
+        self.data = {}
+
+    def add(self, sample_name:str, prevalence_values:np.ndarray):
+        # assert the result is a valid sample_name (not repeated)
+        pass
+
+    def __len__(self):
+        return len(self.data)
+
+    @classmethod
+    def load(cls, path:str)-> 'ResultSubmission':
+        pass
+
+    def dump(self, path:str):
+        # assert all samples are covered (check for test and dev accordingly)
+        pass
+
+    def get(self, sample_name:str):
+        pass
+
+
+def evaluate_submission(ground_truth_prevs: ResultSubmission, submission_prevs: ResultSubmission):
+
+    pass
+
+
+
 
 
diff --git a/LeQua2022/main_binary_vector.py b/LeQua2022/main_binary_vector.py
index 5a60520..fab1bc2 100644
--- a/LeQua2022/main_binary_vector.py
+++ b/LeQua2022/main_binary_vector.py
@@ -1,7 +1,6 @@
 import pickle
 
 import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
 from tqdm import tqdm
 import pandas as pd