diff --git a/Retrieval/commons.py b/Retrieval/commons.py
index b2007ea..289993c 100644
--- a/Retrieval/commons.py
+++ b/Retrieval/commons.py
@@ -6,6 +6,19 @@ from os.path import join
 import quapy.functional as F
 
 
+Ks = [50, 100, 500, 1000]
+
+CLASS_NAMES = ['continent', 'gender', 'years_category'] # ['relative_pageviews_category', 'num_sitelinks_category']:
+
+DATA_SIZES = ['10K', '50K', '100K', '500K', '1M', 'FULL']
+
+protected_group = {
+    'gender': 'Female',
+    'continent': 'Africa',
+    'years_category': 'Pre-1900s',
+}
+
+
 def load_sample(path, class_name):
     """
     Loads a sample json as a dataframe and returns text and labels for
@@ -48,7 +61,9 @@ class RetrievedSamples:
         self.positive_class = positive_class
         self.classes = classes
 
-    def get_text_label_score(self, df):
+    def get_text_label_score(self, df, filter_rank=1000):
+        df = df[df['rank']<filter_rank]
+
         class_name = self.class_name
         vectorizer = self.vectorizer
         filter_classes = self.classes
diff --git a/Retrieval/experiments.py b/Retrieval/experiments.py
index be6a75e..a0ee37f 100644
--- a/Retrieval/experiments.py
+++ b/Retrieval/experiments.py
@@ -1,25 +1,14 @@
-import os.path
-import pickle
-from collections import defaultdict
-from pathlib import Path
-
-import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import GridSearchCV, cross_val_predict
 from sklearn.base import clone
-from sklearn.svm import LinearSVC
-from scipy.special import rel_entr as KLD
 
 import quapy as qp
-import quapy.functional as F
-from Retrieval.commons import RetrievedSamples, load_sample, binarize_labels
-from Retrieval.methods import M3rND_ModelB, M3rND_ModelD, AbstractM3rND
+from Retrieval.commons import *
+from Retrieval.methods import *
 from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
 from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
 from quapy.data.base import LabelledCollection
-from scipy.sparse import vstack
 
 from os.path import join
 from tqdm import tqdm
@@ -62,7 +51,8 @@ def methods(classifier, class_name=None, binarize=False):
         'years_category':0.03
     }
 
-    yield ('Naive', Naive())
+    # yield ('Naive', Naive())
+    # yield ('NaiveHalf', Naive())
     yield ('NaiveQuery', Naive())
     yield ('CC', ClassifyAndCount(classifier))
     # yield ('PCC', PCC(classifier))
@@ -159,10 +149,14 @@ def run_experiment():
 
         train_col = LabelledCollection(Xtr, ytr, classes=classifier.classes_)
 
-        if method_name not in ['Naive', 'NaiveQuery', 'M3b', 'M3b+', 'M3d', 'M3d+']:
+        if not method_name.startswith('Naive') and not method_name.startswith('M3'):
             method.fit(train_col, val_split=train_col, fit_classifier=False)
         elif method_name == 'Naive':
             method.fit(train_col)
+        elif method_name == 'NaiveHalf':
+            n = len(ytr)//2
+            train_col = LabelledCollection(Xtr[:n], ytr[:n], classes=classifier.classes_)
+            method.fit(train_col)
 
         test_col = LabelledCollection(Xte, yte, classes=classifier.classes_)
         rKL_estim, rKL_true = [], []
@@ -231,17 +225,7 @@ def run_experiment():
     return results
 
 
-
-# Ks = [5, 10, 25, 50, 75, 100, 250, 500, 750, 1000]
-Ks = [50, 100, 500, 1000]
-CLASS_NAMES = ['years_category', 'continent', 'gender'] # ['relative_pageviews_category', 'num_sitelinks_category']:
-DATA_SIZES = ['10K', '50K', '100K', '500K', '1M', 'FULL']
 data_home = 'data'
-protected_group = {
-    'gender': 'Female',
-    'continent': 'Africa',
-    'years_category': 'Pre-1900s',
-}
 
 if __name__ == '__main__':
 
@@ -249,7 +233,7 @@ if __name__ == '__main__':
     # the corresponding rND (for binary) or rKL (for multiclass) score
     tables_RND, tables_DKL = [], []
     tables_final = []
-    for class_mode in ['binary', 'multiclass']:
+    for class_mode in ['multiclass', 'binary']:
         BINARIZE = (class_mode=='binary')
         method_names = [name for name, *other in methods(None, binarize=BINARIZE)]
 
diff --git a/Retrieval/plot_mrae_xaxis_k.py b/Retrieval/plot_mrae_xaxis_k.py
index 95e134c..7b6007d 100644
--- a/Retrieval/plot_mrae_xaxis_k.py
+++ b/Retrieval/plot_mrae_xaxis_k.py
@@ -1,25 +1,9 @@
 import os.path
 import pickle
-from collections import defaultdict
-from pathlib import Path
-
 import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import LinearSVC
-
-import quapy as qp
-from Retrieval.commons import RetrievedSamples, load_sample
-from Retrieval.experiments import methods, benchmark_name
-from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
-from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
-from quapy.data.base import LabelledCollection
-
+from Retrieval.experiments import methods
+from Retrieval.commons import CLASS_NAMES, Ks, DATA_SIZES
 from os.path import join
-from tqdm import tqdm
-
-from result_table.src.table import Table
 import matplotlib.pyplot as plt
 
 
@@ -29,10 +13,6 @@ class_mode = 'multiclass'
 
 method_names = [name for name, *other in methods(None, 'continent')]
 
-# Ks = [5, 10, 25, 50, 75, 100, 250, 500, 750, 1000]
-Ks = [50, 100, 500, 1000]
-DATA_SIZE = ['10K', '50K', '100K', '500K', '1M', 'FULL']
-CLASS_NAME = ['gender', 'continent', 'years_category']
 all_results = {}
 
 
@@ -40,11 +20,11 @@ all_results = {}
 # class_name -> data_size -> method_name -> k -> stat -> float
 # where stat is "mean", "std", "max"
 def load_all_results():
-    for class_name in CLASS_NAME:
+    for class_name in CLASS_NAMES:
 
         all_results[class_name] = {}
 
-        for data_size in DATA_SIZE:
+        for data_size in DATA_SIZES:
 
             all_results[class_name][data_size] = {}
 
@@ -75,8 +55,8 @@ results = load_all_results()
 # generates the class-independent, size-independent plots for y-axis=MRAE in which:
 # - the x-axis displays the Ks
 
-for class_name in CLASS_NAME:
-    for data_size in DATA_SIZE:
+for class_name in CLASS_NAMES:
+    for data_size in DATA_SIZES:
 
         log = True
 
diff --git a/Retrieval/plot_mrae_xaxis_size.py b/Retrieval/plot_mrae_xaxis_size.py
index fca7710..55797cf 100644
--- a/Retrieval/plot_mrae_xaxis_size.py
+++ b/Retrieval/plot_mrae_xaxis_size.py
@@ -1,39 +1,15 @@
 import os.path
-import pickle
-from collections import defaultdict
-from pathlib import Path
-
-import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import LinearSVC
-
-import quapy as qp
-from Retrieval.commons import RetrievedSamples, load_sample
-from Retrieval.experiments import methods, benchmark_name
-from Retrieval.plot_mrae_xaxis_k import load_all_results
-from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
-from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
-from quapy.data.base import LabelledCollection
-
-from os.path import join
-from tqdm import tqdm
-
-from result_table.src.table import Table
+from Retrieval.experiments import methods
+from Retrieval.commons import CLASS_NAMES, Ks, DATA_SIZES
 import matplotlib.pyplot as plt
 
-
+from Retrieval.plot_mrae_xaxis_k import load_all_results
 
 data_home = 'data'
 class_mode = 'multiclass'
 
 method_names = [name for name, *other in methods(None)]
 
-# Ks = [5, 10, 25, 50, 75, 100, 250, 500, 750, 1000]
-Ks = [50, 100, 500, 1000]
-DATA_SIZE = ['10K', '50K', '100K', '500K', '1M', 'FULL']
-CLASS_NAME = ['gender', 'continent', 'years_category']
 all_results = {}
 
 
@@ -44,7 +20,7 @@ results = load_all_results()
 # generates the class-independent, size-independent plots for y-axis=MRAE in which:
 # - the x-axis displays the Ks
 
-for class_name in CLASS_NAME:
+for class_name in CLASS_NAMES:
     for k in Ks:
 
         log = True
@@ -55,10 +31,10 @@ for class_name in CLASS_NAME:
         for method_name in method_names:
             # class_name -> data_size -> method_name -> k -> stat -> float
             means = [
-                results[class_name][data_size][method_name][k]['mean'] for data_size in DATA_SIZE
+                results[class_name][data_size][method_name][k]['mean'] for data_size in DATA_SIZES
             ]
             stds = [
-                results[class_name][data_size][method_name][k]['std'] for data_size in DATA_SIZE
+                results[class_name][data_size][method_name][k]['std'] for data_size in DATA_SIZES
             ]
             # max_mean = np.max([
             #         results[class_name][data_size][method_name][k]['max'] for data_size in DATA_SIZE
@@ -67,7 +43,7 @@ for class_name in CLASS_NAME:
             max_means.append(max(means))
 
             style = 'o-' if method_name != 'CC' else '--'
-            line = ax.plot(DATA_SIZE, means, style, label=method_name, color=None)
+            line = ax.plot(DATA_SIZES, means, style, label=method_name, color=None)
             color = line[-1].get_color()
             if log:
                 ax.set_yscale('log')
diff --git a/Retrieval/relscore_distribution.py b/Retrieval/relscore_distribution.py
index 1db4b38..aac52d5 100644
--- a/Retrieval/relscore_distribution.py
+++ b/Retrieval/relscore_distribution.py
@@ -1,29 +1,9 @@
 import os.path
 import pickle
-from collections import defaultdict
 from itertools import zip_longest
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import LinearSVC
-
-import quapy as qp
-import quapy.functional as F
-from Retrieval.commons import RetrievedSamples, load_sample
-from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
-from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
-from quapy.protocol import AbstractProtocol
-from quapy.data.base import LabelledCollection
-
-from glob import glob
+from Retrieval.commons import RetrievedSamples, load_sample, DATA_SIZES
 from os.path import join
 from tqdm import tqdm
-
-from result_table.src.table import Table
 import numpy as np
 import matplotlib.pyplot as plt
 
@@ -35,12 +15,11 @@ Plots the distribution of (predicted) relevance score for the test samples and f
 
 
 data_home = 'data'
-Ks = [5, 10, 25, 50, 75, 100, 250, 500, 750, 1000]
 
 for class_name in ['num_sitelinks_category', 'relative_pageviews_category', 'years_category', 'continent', 'gender']:
     test_added = False
     Mtrs, Mtes, source = [], [], []
-    for data_size in ['10K', '50K', '100K', '500K', '1M', 'FULL']:
+    for data_size in DATA_SIZES:
 
         class_home = join(data_home, class_name, data_size)
         classifier_path = join('classifiers', 'FULL', f'classifier_{class_name}.pkl')