From a95511b4d905e5f9d1ce71764cce612a7bf047d6 Mon Sep 17 00:00:00 2001 From: andrea Date: Mon, 16 Dec 2019 20:46:09 +0100 Subject: [PATCH] sketched results reader - removed first tier learners optimization --- src/FPEC_andrea.py | 3 +-- src/data/embeddings.py | 6 +++--- src/learning/learners.py | 2 +- src/results/results_manager.py | 7 +++++++ src/util/decompositions.py | 2 +- src/util/util.py | 2 ++ 6 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 src/results/results_manager.py diff --git a/src/FPEC_andrea.py b/src/FPEC_andrea.py index 0ed414e..09514de 100644 --- a/src/FPEC_andrea.py +++ b/src/FPEC_andrea.py @@ -85,7 +85,6 @@ if __name__ == '__main__': lXtr, lytr = data.training() lXte, lyte = data.test() - if op.set_c != -1: meta_parameters = None else: @@ -152,7 +151,7 @@ if __name__ == '__main__': config=config, first_tier_learner=get_learner(calibrate=True), meta_learner=get_learner(calibrate=False, kernel='rbf'), - first_tier_parameters=get_params(dense=False), + first_tier_parameters=None, # get_params(dense=False),-->first_tier should not be optimized meta_parameters=get_params(dense=True), n_jobs=op.n_jobs) diff --git a/src/data/embeddings.py b/src/data/embeddings.py index 91cb9ee..1e5da1e 100644 --- a/src/data/embeddings.py +++ b/src/data/embeddings.py @@ -220,6 +220,7 @@ class StorageEmbeddings: optimal_n = get_optimal_dim(self.lang_U, 'U') self.lang_U = run_pca(optimal_n, self.lang_U) elif max_label_space < nC: + print(f'Applying PCA to unsupervised matrix U') self.lang_U = run_pca(max_label_space, self.lang_U) return @@ -258,7 +259,8 @@ class StorageEmbeddings: print(f'Applying PCA(n_components={i}') for lang in languages: self.lang_S[lang] = stacked_pca.transform(self.lang_S[lang]) - elif max_label_space < nC: + elif max_label_space <= nC: + print(f'Computing PCA on Supervised Matrix PCA(n_components:{max_label_space})') self.lang_S = run_pca(max_label_space, self.lang_S) return @@ -276,7 +278,6 @@ class StorageEmbeddings: self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'], vocs) return self - def predict(self, config, docs): if config['supervised'] and config['unsupervised']: return self._concatenate_embeddings(docs) @@ -289,4 +290,3 @@ class StorageEmbeddings: for lang in docs.keys(): _r[lang] = docs[lang].dot(self.lang_U[lang]) return _r - diff --git a/src/learning/learners.py b/src/learning/learners.py index 5d3f7fa..89420bb 100644 --- a/src/learning/learners.py +++ b/src/learning/learners.py @@ -549,4 +549,4 @@ class PolylingualEmbeddingsClassifier: return _joblib_transform_multiling(self.model.predict_proba, lWEte, n_jobs=self.n_jobs) def best_params(self): - return self.model.best_params() \ No newline at end of file + return self.model.best_params() diff --git a/src/results/results_manager.py b/src/results/results_manager.py new file mode 100644 index 0000000..af074af --- /dev/null +++ b/src/results/results_manager.py @@ -0,0 +1,7 @@ +import pandas as pd +import numpy as np + +df = pd.read_csv("/home/andreapdr/funneling_pdr/src/results/results.csv", delimiter='\t') +pivot = pd.pivot_table(df, values=['time', 'macrof1', 'microf1', 'macrok', 'microk'], index=['embed'], aggfunc=[np.mean, np.std]) +print(pivot) +print('Finished ...') \ No newline at end of file diff --git a/src/util/decompositions.py b/src/util/decompositions.py index 7b50ffc..9d14a0c 100644 --- a/src/util/decompositions.py +++ b/src/util/decompositions.py @@ -47,4 +47,4 @@ def get_optimal_dim(X, embed_type): plt.axvline(best_n, color='r', label='optimal N') plt.legend() plt.show() - return best_n \ No newline at end of file + return best_n diff --git a/src/util/util.py b/src/util/util.py index e69de29..1d7b000 100644 --- a/src/util/util.py +++ b/src/util/util.py @@ -0,0 +1,2 @@ +def fill_missing_classes(lXtr, lytr): + pass