refactor
This commit is contained in:
parent
cfd3a609a2
commit
73d1e70ae9
|
|
@ -3,7 +3,7 @@ import pickle
|
|||
from torchtext.vocab import Vectors
|
||||
import torch
|
||||
from abc import ABC, abstractmethod
|
||||
from learning.supervised import get_supervised_embeddings
|
||||
from embeddings.supervised import get_supervised_embeddings
|
||||
from util.decompositions import *
|
||||
from util.SIF_embed import *
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import numpy as np
|
||||
import time
|
||||
from learning.embeddings import WordEmbeddings, StorageEmbeddings
|
||||
from embeddings.embeddings import WordEmbeddings, StorageEmbeddings
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
|
@ -9,7 +9,7 @@ from joblib import Parallel, delayed
|
|||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from transformers.StandardizeTransformer import StandardizeTransformer
|
||||
from sklearn.decomposition import PCA
|
||||
from models.cnn_class import CNN_pdr
|
||||
from models.cnn_class_bu import CNN_pdr
|
||||
|
||||
|
||||
def _sort_if_sparse(X):
|
||||
|
|
@ -325,7 +325,7 @@ class MonolingualClassifier:
|
|||
return self.best_params_
|
||||
|
||||
|
||||
class AndreaCLF(FunnellingPolylingualClassifier):
|
||||
class FunnellingMultimodal(FunnellingPolylingualClassifier):
|
||||
def __init__(self,
|
||||
we_path,
|
||||
config,
|
||||
|
|
@ -627,7 +627,7 @@ class MonolingualNetSvm:
|
|||
:param word_index:
|
||||
:return: filtered embedding matrix
|
||||
"""
|
||||
from learning.embeddings import EmbeddingsAligned
|
||||
from embeddings.embeddings import EmbeddingsAligned
|
||||
type = 'MUSE'
|
||||
path = '/home/andreapdr/CLESA/'
|
||||
MUSE = EmbeddingsAligned(type, path, lang, word_index.keys())
|
||||
|
|
|
|||
|
|
@ -126,13 +126,13 @@ if __name__ == '__main__':
|
|||
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
|
||||
|
||||
print(f'### PolyEmbedd_andrea_{_config_id}\n')
|
||||
classifier = AndreaCLF(we_path=op.we_path,
|
||||
config=config,
|
||||
first_tier_learner=get_learner(calibrate=True),
|
||||
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
||||
first_tier_parameters=None, # TODO get_params(dense=False),--> first_tier should not be optimized - or not?
|
||||
meta_parameters=get_params(dense=True),
|
||||
n_jobs=op.n_jobs)
|
||||
classifier = FunnellingMultimodal(we_path=op.we_path,
|
||||
config=config,
|
||||
first_tier_learner=get_learner(calibrate=True),
|
||||
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
||||
first_tier_parameters=None, # TODO get_params(dense=False),--> first_tier should not be optimized - or not?
|
||||
meta_parameters=get_params(dense=True),
|
||||
n_jobs=op.n_jobs)
|
||||
|
||||
print('# Fitting ...')
|
||||
classifier.fit(lXtr, lytr)
|
||||
|
|
@ -33,4 +33,7 @@ def list_files(dir):
|
|||
def makedirs_if_not_exist(path):
|
||||
if not exists(path): makedirs(path)
|
||||
|
||||
def create_if_not_exist(path):
|
||||
if not exists(path): makedirs(path)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
import numpy as np
|
||||
import numpy as np
|
||||
from scipy.sparse import lil_matrix, issparse
|
||||
from sklearn.metrics import f1_score, accuracy_score
|
||||
|
||||
|
||||
|
||||
"""
|
||||
|
|
@ -166,3 +170,87 @@ def smoothmacroK(true_labels, posterior_probabilities):
|
|||
def smoothmicroK(true_labels, posterior_probabilities):
|
||||
return micro_average(true_labels, posterior_probabilities, K, metric_statistics=soft_single_metric_statistics)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Scikit learn provides a full set of evaluation metrics, but they treat special cases differently.
|
||||
I.e., when the number of true positives, false positives, and false negatives ammount to 0, all
|
||||
affected metrices (precision, recall, and thus f1) output 0 in Scikit learn.
|
||||
We adhere to the common practice of outputting 1 in this case since the classifier has correctly
|
||||
classified all examples as negatives.
|
||||
"""
|
||||
|
||||
def evaluation(y_true, y_pred, classification_type):
|
||||
|
||||
if classification_type == 'multilabel':
|
||||
eval_function = multilabel_eval
|
||||
elif classification_type == 'singlelabel':
|
||||
eval_function = singlelabel_eval
|
||||
|
||||
Mf1, mf1, accuracy = eval_function(y_true, y_pred)
|
||||
|
||||
return Mf1, mf1, accuracy
|
||||
|
||||
|
||||
def multilabel_eval(y, y_):
|
||||
|
||||
tp = y.multiply(y_)
|
||||
|
||||
fn = lil_matrix(y.shape)
|
||||
true_ones = y==1
|
||||
fn[true_ones]=1-tp[true_ones]
|
||||
|
||||
fp = lil_matrix(y.shape)
|
||||
pred_ones = y_==1
|
||||
if pred_ones.nnz>0:
|
||||
fp[pred_ones]=1-tp[pred_ones]
|
||||
|
||||
#macro-f1
|
||||
tp_macro = np.asarray(tp.sum(axis=0), dtype=int).flatten()
|
||||
fn_macro = np.asarray(fn.sum(axis=0), dtype=int).flatten()
|
||||
fp_macro = np.asarray(fp.sum(axis=0), dtype=int).flatten()
|
||||
|
||||
pos_pred = tp_macro+fp_macro
|
||||
pos_true = tp_macro+fn_macro
|
||||
prec=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||
rec=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||
np.divide(tp_macro, pos_pred, out=prec, where=pos_pred>0)
|
||||
np.divide(tp_macro, pos_true, out=rec, where=pos_true>0)
|
||||
den=prec+rec
|
||||
|
||||
macrof1=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||
np.divide(np.multiply(prec,rec),den,out=macrof1,where=den>0)
|
||||
macrof1 *=2
|
||||
|
||||
macrof1[(pos_pred==0)*(pos_true==0)]=1
|
||||
macrof1 = np.mean(macrof1)
|
||||
|
||||
#micro-f1
|
||||
tp_micro = tp_macro.sum()
|
||||
fn_micro = fn_macro.sum()
|
||||
fp_micro = fp_macro.sum()
|
||||
pos_pred = tp_micro + fp_micro
|
||||
pos_true = tp_micro + fn_micro
|
||||
prec = (tp_micro / pos_pred) if pos_pred>0 else 0
|
||||
rec = (tp_micro / pos_true) if pos_true>0 else 0
|
||||
den = prec+rec
|
||||
microf1 = 2*prec*rec/den if den>0 else 0
|
||||
if pos_pred==pos_true==0:
|
||||
microf1=1
|
||||
|
||||
#accuracy
|
||||
ndecisions = np.multiply(*y.shape)
|
||||
tn = ndecisions - (tp_micro+fn_micro+fp_micro)
|
||||
acc = (tp_micro+tn)/ndecisions
|
||||
|
||||
return macrof1,microf1,acc
|
||||
|
||||
|
||||
def singlelabel_eval(y, y_):
|
||||
if issparse(y_): y_ = y_.toarray().flatten()
|
||||
macrof1 = f1_score(y, y_, average='macro')
|
||||
microf1 = f1_score(y, y_, average='micro')
|
||||
acc = accuracy_score(y, y_)
|
||||
return macrof1,microf1,acc
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue