diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py
index 6989af1..01509ef 100644
--- a/quapy/data/preprocessing.py
+++ b/quapy/data/preprocessing.py
@@ -1,6 +1,7 @@
 import numpy as np
 from scipy.sparse import spmatrix
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
 import quapy as qp
@@ -38,10 +39,10 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
         return Dataset(training, test, vectorizer.vocabulary_)
 
 
-def reduce_columns(dataset:Dataset, min_df=5, inplace=False):
+def reduce_columns(dataset: Dataset, min_df=5, inplace=False):
     """
     Reduces the dimensionality of the csr_matrix by removing the columns of words which are not present in at least
-    _min_occurrences_ instances
+    _min_df_ instances
     :param dataset: a Dataset in sparse format (any subtype of scipy.sparse.spmatrix)
     :param min_df: minimum number of instances below which the columns are removed
     :param inplace: whether or not to apply the transformation inplace, or to a new copy
@@ -70,6 +71,19 @@ def reduce_columns(dataset:Dataset, min_df=5, inplace=False):
         return Dataset(training, test)
 
 
+def standardize(dataset: Dataset, inplace=True):
+    s = StandardScaler(copy=not inplace)
+    training = s.fit_transform(dataset.training.instances)
+    test = s.transform(dataset.test.instances)
+    if inplace:
+        return dataset
+    else:
+        return Dataset(training, test, dataset.vocabulary, dataset.name)
+
+
+
+
+
 def index(dataset: Dataset, min_df=5, inplace=False, **kwargs):
     """
     Indexes a dataset of strings. To index a document means to replace each different token by a unique numerical index.