refactor
This commit is contained in:
parent
cfd3a609a2
commit
73d1e70ae9
|
|
@ -3,7 +3,7 @@ import pickle
|
||||||
from torchtext.vocab import Vectors
|
from torchtext.vocab import Vectors
|
||||||
import torch
|
import torch
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from learning.supervised import get_supervised_embeddings
|
from embeddings.supervised import get_supervised_embeddings
|
||||||
from util.decompositions import *
|
from util.decompositions import *
|
||||||
from util.SIF_embed import *
|
from util.SIF_embed import *
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
from learning.embeddings import WordEmbeddings, StorageEmbeddings
|
from embeddings.embeddings import WordEmbeddings, StorageEmbeddings
|
||||||
from scipy.sparse import issparse
|
from scipy.sparse import issparse
|
||||||
from sklearn.multiclass import OneVsRestClassifier
|
from sklearn.multiclass import OneVsRestClassifier
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
@ -9,7 +9,7 @@ from joblib import Parallel, delayed
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
from transformers.StandardizeTransformer import StandardizeTransformer
|
from transformers.StandardizeTransformer import StandardizeTransformer
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
from models.cnn_class import CNN_pdr
|
from models.cnn_class_bu import CNN_pdr
|
||||||
|
|
||||||
|
|
||||||
def _sort_if_sparse(X):
|
def _sort_if_sparse(X):
|
||||||
|
|
@ -325,7 +325,7 @@ class MonolingualClassifier:
|
||||||
return self.best_params_
|
return self.best_params_
|
||||||
|
|
||||||
|
|
||||||
class AndreaCLF(FunnellingPolylingualClassifier):
|
class FunnellingMultimodal(FunnellingPolylingualClassifier):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
we_path,
|
we_path,
|
||||||
config,
|
config,
|
||||||
|
|
@ -627,7 +627,7 @@ class MonolingualNetSvm:
|
||||||
:param word_index:
|
:param word_index:
|
||||||
:return: filtered embedding matrix
|
:return: filtered embedding matrix
|
||||||
"""
|
"""
|
||||||
from learning.embeddings import EmbeddingsAligned
|
from embeddings.embeddings import EmbeddingsAligned
|
||||||
type = 'MUSE'
|
type = 'MUSE'
|
||||||
path = '/home/andreapdr/CLESA/'
|
path = '/home/andreapdr/CLESA/'
|
||||||
MUSE = EmbeddingsAligned(type, path, lang, word_index.keys())
|
MUSE = EmbeddingsAligned(type, path, lang, word_index.keys())
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ if __name__ == '__main__':
|
||||||
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
|
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
|
||||||
|
|
||||||
print(f'### PolyEmbedd_andrea_{_config_id}\n')
|
print(f'### PolyEmbedd_andrea_{_config_id}\n')
|
||||||
classifier = AndreaCLF(we_path=op.we_path,
|
classifier = FunnellingMultimodal(we_path=op.we_path,
|
||||||
config=config,
|
config=config,
|
||||||
first_tier_learner=get_learner(calibrate=True),
|
first_tier_learner=get_learner(calibrate=True),
|
||||||
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
meta_learner=get_learner(calibrate=False, kernel='rbf'),
|
||||||
|
|
@ -33,4 +33,7 @@ def list_files(dir):
|
||||||
def makedirs_if_not_exist(path):
|
def makedirs_if_not_exist(path):
|
||||||
if not exists(path): makedirs(path)
|
if not exists(path): makedirs(path)
|
||||||
|
|
||||||
|
def create_if_not_exist(path):
|
||||||
|
if not exists(path): makedirs(path)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,8 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import numpy as np
|
||||||
|
from scipy.sparse import lil_matrix, issparse
|
||||||
|
from sklearn.metrics import f1_score, accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -166,3 +170,87 @@ def smoothmacroK(true_labels, posterior_probabilities):
|
||||||
def smoothmicroK(true_labels, posterior_probabilities):
|
def smoothmicroK(true_labels, posterior_probabilities):
|
||||||
return micro_average(true_labels, posterior_probabilities, K, metric_statistics=soft_single_metric_statistics)
|
return micro_average(true_labels, posterior_probabilities, K, metric_statistics=soft_single_metric_statistics)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Scikit learn provides a full set of evaluation metrics, but they treat special cases differently.
|
||||||
|
I.e., when the number of true positives, false positives, and false negatives ammount to 0, all
|
||||||
|
affected metrices (precision, recall, and thus f1) output 0 in Scikit learn.
|
||||||
|
We adhere to the common practice of outputting 1 in this case since the classifier has correctly
|
||||||
|
classified all examples as negatives.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def evaluation(y_true, y_pred, classification_type):
|
||||||
|
|
||||||
|
if classification_type == 'multilabel':
|
||||||
|
eval_function = multilabel_eval
|
||||||
|
elif classification_type == 'singlelabel':
|
||||||
|
eval_function = singlelabel_eval
|
||||||
|
|
||||||
|
Mf1, mf1, accuracy = eval_function(y_true, y_pred)
|
||||||
|
|
||||||
|
return Mf1, mf1, accuracy
|
||||||
|
|
||||||
|
|
||||||
|
def multilabel_eval(y, y_):
|
||||||
|
|
||||||
|
tp = y.multiply(y_)
|
||||||
|
|
||||||
|
fn = lil_matrix(y.shape)
|
||||||
|
true_ones = y==1
|
||||||
|
fn[true_ones]=1-tp[true_ones]
|
||||||
|
|
||||||
|
fp = lil_matrix(y.shape)
|
||||||
|
pred_ones = y_==1
|
||||||
|
if pred_ones.nnz>0:
|
||||||
|
fp[pred_ones]=1-tp[pred_ones]
|
||||||
|
|
||||||
|
#macro-f1
|
||||||
|
tp_macro = np.asarray(tp.sum(axis=0), dtype=int).flatten()
|
||||||
|
fn_macro = np.asarray(fn.sum(axis=0), dtype=int).flatten()
|
||||||
|
fp_macro = np.asarray(fp.sum(axis=0), dtype=int).flatten()
|
||||||
|
|
||||||
|
pos_pred = tp_macro+fp_macro
|
||||||
|
pos_true = tp_macro+fn_macro
|
||||||
|
prec=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||||
|
rec=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||||
|
np.divide(tp_macro, pos_pred, out=prec, where=pos_pred>0)
|
||||||
|
np.divide(tp_macro, pos_true, out=rec, where=pos_true>0)
|
||||||
|
den=prec+rec
|
||||||
|
|
||||||
|
macrof1=np.zeros(shape=tp_macro.shape,dtype=float)
|
||||||
|
np.divide(np.multiply(prec,rec),den,out=macrof1,where=den>0)
|
||||||
|
macrof1 *=2
|
||||||
|
|
||||||
|
macrof1[(pos_pred==0)*(pos_true==0)]=1
|
||||||
|
macrof1 = np.mean(macrof1)
|
||||||
|
|
||||||
|
#micro-f1
|
||||||
|
tp_micro = tp_macro.sum()
|
||||||
|
fn_micro = fn_macro.sum()
|
||||||
|
fp_micro = fp_macro.sum()
|
||||||
|
pos_pred = tp_micro + fp_micro
|
||||||
|
pos_true = tp_micro + fn_micro
|
||||||
|
prec = (tp_micro / pos_pred) if pos_pred>0 else 0
|
||||||
|
rec = (tp_micro / pos_true) if pos_true>0 else 0
|
||||||
|
den = prec+rec
|
||||||
|
microf1 = 2*prec*rec/den if den>0 else 0
|
||||||
|
if pos_pred==pos_true==0:
|
||||||
|
microf1=1
|
||||||
|
|
||||||
|
#accuracy
|
||||||
|
ndecisions = np.multiply(*y.shape)
|
||||||
|
tn = ndecisions - (tp_micro+fn_micro+fp_micro)
|
||||||
|
acc = (tp_micro+tn)/ndecisions
|
||||||
|
|
||||||
|
return macrof1,microf1,acc
|
||||||
|
|
||||||
|
|
||||||
|
def singlelabel_eval(y, y_):
|
||||||
|
if issparse(y_): y_ = y_.toarray().flatten()
|
||||||
|
macrof1 = f1_score(y, y_, average='macro')
|
||||||
|
microf1 = f1_score(y, y_, average='micro')
|
||||||
|
acc = accuracy_score(y, y_)
|
||||||
|
return macrof1,microf1,acc
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue