sketched results reader - removed first tier learners optimization
This commit is contained in:
parent
174aa8ca05
commit
a95511b4d9
|
@ -85,7 +85,6 @@ if __name__ == '__main__':
|
||||||
lXtr, lytr = data.training()
|
lXtr, lytr = data.training()
|
||||||
lXte, lyte = data.test()
|
lXte, lyte = data.test()
|
||||||
|
|
||||||
|
|
||||||
if op.set_c != -1:
|
if op.set_c != -1:
|
||||||
meta_parameters = None
|
meta_parameters = None
|
||||||
else:
|
else:
|
||||||
|
@ -152,7 +151,7 @@ if __name__ == '__main__':
|
||||||
config=config,
|
config=config,
|
||||||
first_tier_learner=get_learner(calibrate=True),
|
first_tier_learner=get_learner(calibrate=True),
|
||||||
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
||||||
first_tier_parameters=get_params(dense=False),
|
first_tier_parameters=None, # get_params(dense=False),-->first_tier should not be optimized
|
||||||
meta_parameters=get_params(dense=True),
|
meta_parameters=get_params(dense=True),
|
||||||
n_jobs=op.n_jobs)
|
n_jobs=op.n_jobs)
|
||||||
|
|
||||||
|
|
|
@ -220,6 +220,7 @@ class StorageEmbeddings:
|
||||||
optimal_n = get_optimal_dim(self.lang_U, 'U')
|
optimal_n = get_optimal_dim(self.lang_U, 'U')
|
||||||
self.lang_U = run_pca(optimal_n, self.lang_U)
|
self.lang_U = run_pca(optimal_n, self.lang_U)
|
||||||
elif max_label_space < nC:
|
elif max_label_space < nC:
|
||||||
|
print(f'Applying PCA to unsupervised matrix U')
|
||||||
self.lang_U = run_pca(max_label_space, self.lang_U)
|
self.lang_U = run_pca(max_label_space, self.lang_U)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -258,7 +259,8 @@ class StorageEmbeddings:
|
||||||
print(f'Applying PCA(n_components={i}')
|
print(f'Applying PCA(n_components={i}')
|
||||||
for lang in languages:
|
for lang in languages:
|
||||||
self.lang_S[lang] = stacked_pca.transform(self.lang_S[lang])
|
self.lang_S[lang] = stacked_pca.transform(self.lang_S[lang])
|
||||||
elif max_label_space < nC:
|
elif max_label_space <= nC:
|
||||||
|
print(f'Computing PCA on Supervised Matrix PCA(n_components:{max_label_space})')
|
||||||
self.lang_S = run_pca(max_label_space, self.lang_S)
|
self.lang_S = run_pca(max_label_space, self.lang_S)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -276,7 +278,6 @@ class StorageEmbeddings:
|
||||||
self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'], vocs)
|
self._add_emebeddings_supervised(docs, labels, config['reduction'], config['max_label_space'], vocs)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
def predict(self, config, docs):
|
def predict(self, config, docs):
|
||||||
if config['supervised'] and config['unsupervised']:
|
if config['supervised'] and config['unsupervised']:
|
||||||
return self._concatenate_embeddings(docs)
|
return self._concatenate_embeddings(docs)
|
||||||
|
@ -289,4 +290,3 @@ class StorageEmbeddings:
|
||||||
for lang in docs.keys():
|
for lang in docs.keys():
|
||||||
_r[lang] = docs[lang].dot(self.lang_U[lang])
|
_r[lang] = docs[lang].dot(self.lang_U[lang])
|
||||||
return _r
|
return _r
|
||||||
|
|
||||||
|
|
|
@ -549,4 +549,4 @@ class PolylingualEmbeddingsClassifier:
|
||||||
return _joblib_transform_multiling(self.model.predict_proba, lWEte, n_jobs=self.n_jobs)
|
return _joblib_transform_multiling(self.model.predict_proba, lWEte, n_jobs=self.n_jobs)
|
||||||
|
|
||||||
def best_params(self):
|
def best_params(self):
|
||||||
return self.model.best_params()
|
return self.model.best_params()
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
df = pd.read_csv("/home/andreapdr/funneling_pdr/src/results/results.csv", delimiter='\t')
|
||||||
|
pivot = pd.pivot_table(df, values=['time', 'macrof1', 'microf1', 'macrok', 'microk'], index=['embed'], aggfunc=[np.mean, np.std])
|
||||||
|
print(pivot)
|
||||||
|
print('Finished ...')
|
|
@ -47,4 +47,4 @@ def get_optimal_dim(X, embed_type):
|
||||||
plt.axvline(best_n, color='r', label='optimal N')
|
plt.axvline(best_n, color='r', label='optimal N')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.show()
|
plt.show()
|
||||||
return best_n
|
return best_n
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
def fill_missing_classes(lXtr, lytr):
|
||||||
|
pass
|
Loading…
Reference in New Issue