diff --git a/src/data/supervised.py b/src/data/supervised.py index 5f97e7f..4ed7f59 100755 --- a/src/data/supervised.py +++ b/src/data/supervised.py @@ -12,6 +12,7 @@ def zscores(x, axis=0): #scipy.stats.zscores does not avoid division by 0, which def supervised_embeddings_tfidf(X,Y): tfidf_norm = X.sum(axis=0) + tfidf_norm[tfidf_norm==0] = 1 F = (X.T).dot(Y) / tfidf_norm.T return F diff --git a/src/util/util.py b/src/util/util.py new file mode 100644 index 0000000..e69de29