evaluate update
This commit is contained in:
parent
9437ccc837
commit
2554c58fac
|
|
@ -1,8 +1,9 @@
|
||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from evaluation.metrics import *
|
# from evaluation.metrics import *
|
||||||
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score
|
import numpy as np
|
||||||
|
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score, precision_score, recall_score
|
||||||
|
|
||||||
|
|
||||||
def evaluation_metrics(y, y_, clf_type):
|
def evaluation_metrics(y, y_, clf_type):
|
||||||
|
|
@ -13,13 +14,17 @@ def evaluation_metrics(y, y_, clf_type):
|
||||||
# TODO: we need logits top_k_accuracy_score(y, y_, k=10),
|
# TODO: we need logits top_k_accuracy_score(y, y_, k=10),
|
||||||
f1_score(y, y_, average="macro", zero_division=1),
|
f1_score(y, y_, average="macro", zero_division=1),
|
||||||
f1_score(y, y_, average="micro"),
|
f1_score(y, y_, average="micro"),
|
||||||
|
precision_score(y, y_, zero_division=1, average="macro"),
|
||||||
|
recall_score(y, y_, zero_division=1, average="macro"),
|
||||||
)
|
)
|
||||||
elif clf_type == "multilabel":
|
elif clf_type == "multilabel":
|
||||||
return (
|
return (
|
||||||
macroF1(y, y_),
|
f1_score(y, y_, average="macro", zero_division=1),
|
||||||
microF1(y, y_),
|
f1_score(y, y_, average="micro"),
|
||||||
macroK(y, y_),
|
0,
|
||||||
microK(y, y_),
|
0,
|
||||||
|
# macroK(y, y_),
|
||||||
|
# microK(y, y_),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError("clf_type must be either 'singlelabel' or 'multilabel'")
|
raise ValueError("clf_type must be either 'singlelabel' or 'multilabel'")
|
||||||
|
|
@ -48,8 +53,10 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
||||||
|
|
||||||
if clf_type == "multilabel":
|
if clf_type == "multilabel":
|
||||||
for lang in l_eval.keys():
|
for lang in l_eval.keys():
|
||||||
macrof1, microf1, macrok, microk = l_eval[lang]
|
# macrof1, microf1, macrok, microk = l_eval[lang]
|
||||||
metrics.append([macrof1, microf1, macrok, microk])
|
# metrics.append([macrof1, microf1, macrok, microk])
|
||||||
|
macrof1, microf1, precision, recall = l_eval[lang]
|
||||||
|
metrics.append([macrof1, microf1, precision, recall])
|
||||||
if phase != "validation":
|
if phase != "validation":
|
||||||
print(f"Lang {lang}: macro-F1 = {macrof1:.3f} micro-F1 = {microf1:.3f}")
|
print(f"Lang {lang}: macro-F1 = {macrof1:.3f} micro-F1 = {microf1:.3f}")
|
||||||
averages = np.mean(np.array(metrics), axis=0)
|
averages = np.mean(np.array(metrics), axis=0)
|
||||||
|
|
@ -69,12 +76,15 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
||||||
# "acc10", # "accuracy-at-10",
|
# "acc10", # "accuracy-at-10",
|
||||||
"MF1", # "macro-F1",
|
"MF1", # "macro-F1",
|
||||||
"mF1", # "micro-F1",
|
"mF1", # "micro-F1",
|
||||||
|
"precision",
|
||||||
|
"recall"
|
||||||
]
|
]
|
||||||
for lang in l_eval.keys():
|
for lang in l_eval.keys():
|
||||||
# acc, top5, top10, macrof1, microf1 = l_eval[lang]
|
# acc, top5, top10, macrof1, microf1 = l_eval[lang]
|
||||||
acc, macrof1, microf1 = l_eval[lang]
|
acc, macrof1, microf1, precision, recall= l_eval[lang]
|
||||||
# metrics.append([acc, top5, top10, macrof1, microf1])
|
# metrics.append([acc, top5, top10, macrof1, microf1])
|
||||||
metrics.append([acc, macrof1, microf1])
|
# metrics.append([acc, macrof1, microf1])
|
||||||
|
metrics.append([acc, macrof1, microf1, precision, recall])
|
||||||
|
|
||||||
for m, v in zip(_metrics, l_eval[lang]):
|
for m, v in zip(_metrics, l_eval[lang]):
|
||||||
lang_metrics[m][lang] = v
|
lang_metrics[m][lang] = v
|
||||||
|
|
@ -82,7 +92,8 @@ def log_eval(l_eval, phase="training", clf_type="multilabel", verbose=True):
|
||||||
if phase != "validation":
|
if phase != "validation":
|
||||||
print(
|
print(
|
||||||
# f"Lang {lang}: acc = {acc:.3f} acc-top5 = {top5:.3f} acc-top10 = {top10:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
# f"Lang {lang}: acc = {acc:.3f} acc-top5 = {top5:.3f} acc-top10 = {top10:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
||||||
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
# f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f}"
|
||||||
|
f"Lang {lang}: acc = {acc:.3f} macro-F1: {macrof1:.3f} micro-F1 = {microf1:.3f} pr = {precision:.3f} re = {recall:.3f}"
|
||||||
)
|
)
|
||||||
averages = np.mean(np.array(metrics), axis=0)
|
averages = np.mean(np.array(metrics), axis=0)
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue