gfun_multimodal/compute_results.py

60 lines
1.8 KiB
Python

from argparse import ArgumentParser
from csvlogger import CsvLogger
import pandas as pd
from sklearn.metrics import mean_absolute_error
from os.path import join
"""
MEA and classification is meaningful only in "ordinal" tasks e.g., sentiment classification.
Otherwise the distance between the categories has no semantics!
- NB: we want to get the macro-averaged class specific MAE!
"""
def main():
# SETTINGS = ["p", "m", "w", "t", "mp", "mpw", "mpt", "mptw"]
SETTINGS = ["mbert"]
results = []
for setting in SETTINGS:
results.append(evalaute(setting))
df = pd.DataFrame()
for r in results:
df = df.append(r)
print(df)
def evalaute(setting):
result_dir = "results"
# result_file = f"lang-specific.gfun.{setting}.webis.csv"
result_file = f"lang-specific.mbert.webis.csv"
# print(f"- reading from: {result_file}")
df = pd.read_csv(join(result_dir, result_file))
langs = df.langs.unique()
res = []
for lang in langs:
l_df = df.langs == lang
selected_neg = df.labels == 0
seleteced_neutral = df.labels == 1
selected_pos = df.labels == 2
neg = df[l_df & selected_neg]
neutral = df[l_df & seleteced_neutral]
pos = df[l_df & selected_pos]
# print(f"{lang=}")
# print(neg.shape, neutral.shape, pos.shape)
neg_mae = mean_absolute_error(neg.labels, neg.preds).round(3)
neutral_mae = mean_absolute_error(neutral.labels, neutral.preds).round(3)
pos_mae = mean_absolute_error(pos.labels, pos.preds).round(3)
macro_mae = ((neg_mae + neutral_mae + pos_mae) / 3).round(3)
# print(f"{lang=} - {neg_mae=}, {neutral_mae=}, {pos_mae=}, {macro_mae=}")
res.append([lang, neg_mae, neutral_mae, pos_mae, setting])
return res
if __name__ == "__main__":
main()