sketched results reader - removed first tier learners optimization

This commit is contained in:
andrea 2019-12-16 20:46:09 +01:00
parent 174aa8ca05
commit a95511b4d9
6 changed files with 15 additions and 7 deletions

View File

@ -85,7 +85,6 @@ if __name__ == '__main__':
lXtr, lytr = data.training() lXtr, lytr = data.training()
lXte, lyte = data.test() lXte, lyte = data.test()
if op.set_c != -1: if op.set_c != -1:
meta_parameters = None meta_parameters = None
else: else:
@ -152,7 +151,7 @@ if __name__ == '__main__':
config=config, config=config,
first_tier_learner=get_learner(calibrate=True), first_tier_learner=get_learner(calibrate=True),
meta_learner=get_learner(calibrate=False, kernel='rbf'), meta_learner=get_learner(calibrate=False, kernel='rbf'),
first_tier_parameters=get_params(dense=False), first_tier_parameters=None, # get_params(dense=False),-->first_tier should not be optimized
meta_parameters=get_params(dense=True), meta_parameters=get_params(dense=True),
n_jobs=op.n_jobs) n_jobs=op.n_jobs)

View File

@ -220,6 +220,7 @@ class StorageEmbeddings:
optimal_n = get_optimal_dim(self.lang_U, 'U') optimal_n = get_optimal_dim(self.lang_U, 'U')
self.lang_U = run_pca(optimal_n, self.lang_U) self.lang_U = run_pca(optimal_n, self.lang_U)
elif max_label_space < nC: elif max_label_space < nC:
print(f'Applying PCA to unsupervised matrix U')
self.lang_U = run_pca(max_label_space, self.lang_U) self.lang_U = run_pca(max_label_space, self.lang_U)
return return
@ -258,7 +259,8 @@ class StorageEmbeddings:
print(f'Applying PCA(n_components={i}') print(f'Applying PCA(n_components={i}')
for lang in languages: for lang in languages:
self.lang_S[lang] = stacked_pca.transform(self.lang_S[lang]) self.lang_S[lang] = stacked_pca.transform(self.lang_S[lang])
elif max_label_space < nC: elif max_label_space <= nC:
print(f'Computing PCA on Supervised Matrix PCA(n_components:{max_label_space})')
self.lang_S = run_pca(max_label_space, self.lang_S) self.lang_S = run_pca(max_label_space, self.lang_S)
return return
@ -276,7 +278,6 @@ class StorageEmbeddings:
self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'], vocs) self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'], vocs)
return self return self
def predict(self, config, docs): def predict(self, config, docs):
if config['supervised'] and config['unsupervised']: if config['supervised'] and config['unsupervised']:
return self._concatenate_embeddings(docs) return self._concatenate_embeddings(docs)
@ -289,4 +290,3 @@ class StorageEmbeddings:
for lang in docs.keys(): for lang in docs.keys():
_r[lang] = docs[lang].dot(self.lang_U[lang]) _r[lang] = docs[lang].dot(self.lang_U[lang])
return _r return _r

View File

@ -549,4 +549,4 @@ class PolylingualEmbeddingsClassifier:
return _joblib_transform_multiling(self.model.predict_proba, lWEte, n_jobs=self.n_jobs) return _joblib_transform_multiling(self.model.predict_proba, lWEte, n_jobs=self.n_jobs)
def best_params(self): def best_params(self):
return self.model.best_params() return self.model.best_params()

View File

@ -0,0 +1,7 @@
import pandas as pd
import numpy as np
df = pd.read_csv("/home/andreapdr/funneling_pdr/src/results/results.csv", delimiter='\t')
pivot = pd.pivot_table(df, values=['time', 'macrof1', 'microf1', 'macrok', 'microk'], index=['embed'], aggfunc=[np.mean, np.std])
print(pivot)
print('Finished ...')

View File

@ -47,4 +47,4 @@ def get_optimal_dim(X, embed_type):
plt.axvline(best_n, color='r', label='optimal N') plt.axvline(best_n, color='r', label='optimal N')
plt.legend() plt.legend()
plt.show() plt.show()
return best_n return best_n

View File

@ -0,0 +1,2 @@
def fill_missing_classes(lXtr, lytr):
pass