evaluate update
This commit is contained in:
parent
9437ccc837
commit
2554c58fac
|
|
@ -1,8 +1,9 @@
|
|||
from joblib import Parallel, delayed
|
||||
from collections import defaultdict
|
||||
|
||||
from evaluation.metrics import *
|
||||
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score
|
||||
# from evaluation.metrics import *
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score, precision_score, recall_score
|
||||
|
||||
|
||||
def evaluation_metrics(y, y_, clf_type):
|
||||
|
|
@ -13,13 +14,17 @@ def evaluation_metrics(y, y_, clf_type):
|
|||
# TODO: we need logits top_k_accuracy_score(y, y_, k=10),
|
||||
f1_score(y, y_, average="macro", zero_division=1),
|
||||
f1_score(y, y_, average="micro"),
|
||||
precision_score(y, y_, zero_division=1, average="macro"),
|
||||
recall_score(y, y_, zero_division=1, average="macro"),
|
||||
)
|
||||
elif clf_type == "multilabel":
|
||||
return (
|
||||
macroF1(y, y_),
|
||||
microF1(y, y_),
|
||||
macroK(y, y_),
|
||||
microK(y, y_),
|
||||
f1_score(y, y_, average="macro", zero_division=1),
|
||||
f1_score(y, y_, average="micro"),
|
||||
0,
|
||||
0,
|
||||
# macroK(y, y_),
|
||||
# microK(y, y_),
|
||||
)
|
||||
else:
|
||||
raise ValueError("clf_type must be either 'singlelabel' or 'multilabel'")
|
||||
|
|
@ -48,8 +53,10 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
|||
|
||||
if clf_type == "multilabel":
|
||||
for lang in l_eval.keys():
|
||||
macrof1, microf1, macrok, microk = l_eval[lang]
|
||||
metrics.append([macrof1, microf1, macrok, microk])
|
||||
# macrof1, microf1, macrok, microk = l_eval[lang]
|
||||
# metrics.append([macrof1, microf1, macrok, microk])
|
||||
macrof1, microf1, precision, recall = l_eval[lang]
|
||||
metrics.append([macrof1, microf1, precision, recall])
|
||||
if phase != "validation":
|
||||
print(f"Lang {lang}: macro-F1 = {macrof1:.3f} micro-F1 = {microf1:.3f}")
|
||||
averages = np.mean(np.array(metrics), axis=0)
|
||||
|
|
@ -69,12 +76,15 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
|||
# "acc10", # "accuracy-at-10",
|
||||
"MF1", # "macro-F1",
|
||||
"mF1", # "micro-F1",
|
||||
"precision",
|
||||
"recall"
|
||||
]
|
||||
for lang in l_eval.keys():
|
||||
# acc, top5, top10, macrof1, microf1 = l_eval[lang]
|
||||
acc, macrof1, microf1 = l_eval[lang]
|
||||
acc, macrof1, microf1, precision, recall= l_eval[lang]
|
||||
# metrics.append([acc, top5, top10, macrof1, microf1])
|
||||
metrics.append([acc, macrof1, microf1])
|
||||
# metrics.append([acc, macrof1, microf1])
|
||||
metrics.append([acc, macrof1, microf1, precision, recall])
|
||||
|
||||
for m, v in zip(_metrics, l_eval[lang]):
|
||||
lang_metrics[m][lang] = v
|
||||
|
|
@ -82,7 +92,8 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
|||
if phase != "validation":
|
||||
print(
|
||||
# f"Lang {lang}: acc = {acc:.3f} acc-top5 = {top5:.3f} acc-top10 = {top10:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
||||
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
||||
# f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
||||
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f} pr = {precision:.3f} re = {recall:.3f}"
|
||||
)
|
||||
averages = np.mean(np.array(metrics), axis=0)
|
||||
if verbose:
|
||||
|
|
|
|||
Loading…
Reference in New Issue