evaluate update

This commit is contained in:
Andrea Pedrotti 2023-06-22 11:32:27 +02:00
parent 9437ccc837
commit 2554c58fac
1 changed files with 22 additions and 11 deletions

View File

@ -1,8 +1,9 @@
from joblib import Parallel, delayed
from collections import defaultdict
from evaluation.metrics import *
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score
# from evaluation.metrics import *
import numpy as np
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score, precision_score, recall_score
def evaluation_metrics(y, y_, clf_type):
@ -13,13 +14,17 @@ def evaluation_metrics(y, y_, clf_type):
# TODO: we need logits top_k_accuracy_score(y, y_, k=10),
f1_score(y, y_, average="macro", zero_division=1),
f1_score(y, y_, average="micro"),
precision_score(y, y_, zero_division=1, average="macro"),
recall_score(y, y_, zero_division=1, average="macro"),
)
elif clf_type == "multilabel":
return (
macroF1(y, y_),
microF1(y, y_),
macroK(y, y_),
microK(y, y_),
f1_score(y, y_, average="macro", zero_division=1),
f1_score(y, y_, average="micro"),
0,
0,
# macroK(y, y_),
# microK(y, y_),
)
else:
raise ValueError("clf_type must be either 'singlelabel' or 'multilabel'")
@ -48,8 +53,10 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
if clf_type == "multilabel":
for lang in l_eval.keys():
macrof1, microf1, macrok, microk = l_eval[lang]
metrics.append([macrof1, microf1, macrok, microk])
# macrof1, microf1, macrok, microk = l_eval[lang]
# metrics.append([macrof1, microf1, macrok, microk])
macrof1, microf1, precision, recall = l_eval[lang]
metrics.append([macrof1, microf1, precision, recall])
if phase != "validation":
print(f"Lang {lang}: macro-F1 = {macrof1:.3f} micro-F1 = {microf1:.3f}")
averages = np.mean(np.array(metrics), axis=0)
@ -69,12 +76,15 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
# "acc10", # "accuracy-at-10",
"MF1", # "macro-F1",
"mF1", # "micro-F1",
"precision",
"recall"
]
for lang in l_eval.keys():
# acc, top5, top10, macrof1, microf1 = l_eval[lang]
acc, macrof1, microf1 = l_eval[lang]
acc, macrof1, microf1, precision, recall= l_eval[lang]
# metrics.append([acc, top5, top10, macrof1, microf1])
metrics.append([acc, macrof1, microf1])
# metrics.append([acc, macrof1, microf1])
metrics.append([acc, macrof1, microf1, precision, recall])
for m, v in zip(_metrics, l_eval[lang]):
lang_metrics[m][lang] = v
@ -82,7 +92,8 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
if phase != "validation":
print(
# f"Lang {lang}: acc = {acc:.3f} acc-top5 = {top5:.3f} acc-top10 = {top10:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
# f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f} pr = {precision:.3f} re = {recall:.3f}"
)
averages = np.mean(np.array(metrics), axis=0)
if verbose: