60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
from argparse import ArgumentParser
|
|
from csvlogger import CsvLogger
|
|
import pandas as pd
|
|
from sklearn.metrics import mean_absolute_error
|
|
|
|
from os.path import join
|
|
|
|
"""
|
|
MEA and classification is meaningful only in "ordinal" tasks e.g., sentiment classification.
|
|
Otherwise the distance between the categories has no semantics!
|
|
|
|
- NB: we want to get the macro-averaged class specific MAE!
|
|
"""
|
|
|
|
def main():
|
|
# SETTINGS = ["p", "m", "w", "t", "mp", "mpw", "mpt", "mptw"]
|
|
SETTINGS = ["mbert"]
|
|
results = []
|
|
for setting in SETTINGS:
|
|
results.append(evalaute(setting))
|
|
df = pd.DataFrame()
|
|
for r in results:
|
|
df = df.append(r)
|
|
print(df)
|
|
|
|
|
|
def evalaute(setting):
|
|
result_dir = "results"
|
|
# result_file = f"lang-specific.gfun.{setting}.webis.csv"
|
|
result_file = f"lang-specific.mbert.webis.csv"
|
|
# print(f"- reading from: {result_file}")
|
|
df = pd.read_csv(join(result_dir, result_file))
|
|
langs = df.langs.unique()
|
|
res = []
|
|
for lang in langs:
|
|
l_df = df.langs == lang
|
|
selected_neg = df.labels == 0
|
|
seleteced_neutral = df.labels == 1
|
|
selected_pos = df.labels == 2
|
|
neg = df[l_df & selected_neg]
|
|
neutral = df[l_df & seleteced_neutral]
|
|
pos = df[l_df & selected_pos]
|
|
|
|
# print(f"{lang=}")
|
|
# print(neg.shape, neutral.shape, pos.shape)
|
|
|
|
neg_mae = mean_absolute_error(neg.labels, neg.preds).round(3)
|
|
neutral_mae = mean_absolute_error(neutral.labels, neutral.preds).round(3)
|
|
pos_mae = mean_absolute_error(pos.labels, pos.preds).round(3)
|
|
|
|
macro_mae = ((neg_mae + neutral_mae + pos_mae) / 3).round(3)
|
|
# print(f"{lang=} - {neg_mae=}, {neutral_mae=}, {pos_mae=}, {macro_mae=}")
|
|
res.append([lang, neg_mae, neutral_mae, pos_mae, setting])
|
|
return res
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |