QuAcc/quacc/plot.py

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

from quacc.environ import env


def _get_markers(n: int):
    ls = [
        "o",
        "v",
        "x",
        "+",
        "s",
        "D",
        "p",
        "h",
        "*",
        "^",
        "1",
        "2",
        "3",
        "4",
        "X",
        ">",
        "<",
        ".",
        "P",
        "d",
    ]
    if n > len(ls):
        ls = ls * (n / len(ls) + 1)
    return ls[:n]


def plot_delta(
    base_prevs,
    dict_vals,
    *,
    pos_class=1,
    metric="acc",
    name="default",
    train_prev=None,
    legend=True,
) -> Path:
    if train_prev is not None:
        t_prev_pos = int(round(train_prev[pos_class] * 100))
        title = f"delta_{name}_{t_prev_pos}_{metric}"
    else:
        title = f"delta_{name}_{metric}"

    fig, ax = plt.subplots()
    ax.set_aspect("auto")
    ax.grid()

    NUM_COLORS = len(dict_vals)
    cm = plt.get_cmap("tab10")
    if NUM_COLORS > 10:
        cm = plt.get_cmap("tab20")
    ax.set_prop_cycle(
        color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)],
    )

    base_prevs = [bp[pos_class] for bp in base_prevs]
    for method, deltas in dict_vals.items():
        avg = np.array([np.mean(d, axis=-1) for d in deltas])
        # std = np.array([np.std(d, axis=-1) for d in deltas])
        ax.plot(
            base_prevs,
            avg,
            label=method,
            linestyle="-",
            marker="o",
            markersize=3,
            zorder=2,
        )
        # ax.fill_between(base_prevs, avg - std, avg + std, alpha=0.25)

    ax.set(xlabel="test prevalence", ylabel=metric, title=title)

    if legend:
        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    output_path = env.PLOT_OUT_DIR / f"{title}.png"
    fig.savefig(output_path, bbox_inches="tight")

    return output_path


def plot_diagonal(
    reference,
    dict_vals,
    *,
    pos_class=1,
    metric="acc",
    name="default",
    train_prev=None,
    legend=True,
):
    if train_prev is not None:
        t_prev_pos = int(round(train_prev[pos_class] * 100))
        title = f"diagonal_{name}_{t_prev_pos}_{metric}"
    else:
        title = f"diagonal_{name}_{metric}"

    fig, ax = plt.subplots()
    ax.set_aspect("auto")
    ax.grid()

    NUM_COLORS = len(dict_vals)
    cm = plt.get_cmap("tab10")
    ax.set_prop_cycle(
        marker=_get_markers(NUM_COLORS) * 2,
        color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)] * 2,
    )

    reference = np.array(reference)
    x_ticks = np.unique(reference)
    x_ticks.sort()

    for _, deltas in dict_vals.items():
        deltas = np.array(deltas)
        ax.plot(
            reference,
            deltas,
            linestyle="None",
            markersize=3,
            zorder=2,
        )

    for method, deltas in dict_vals.items():
        deltas = np.array(deltas)
        x_interp = x_ticks[[0, -1]]
        y_interp = np.interp(x_interp, reference, deltas)
        ax.plot(
            x_interp,
            y_interp,
            label=method,
            linestyle="-",
            markersize="0",
            zorder=1,
        )

    ax.set(xlabel="test prevalence", ylabel=metric, title=title)

    if legend:
        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    output_path = env.PLOT_OUT_DIR / f"{title}.png"
    fig.savefig(output_path, bbox_inches="tight")
    return output_path


def plot_shift(
    base_prevs,
    dict_vals,
    *,
    pos_class=1,
    metric="acc",
    name="default",
    train_prev=None,
    legend=True,
) -> Path:
    if train_prev is None:
        raise AttributeError("train_prev cannot be None.")

    train_prev = train_prev[pos_class]
    t_prev_pos = int(round(train_prev * 100))
    title = f"shift_{name}_{t_prev_pos}_{metric}"

    fig, ax = plt.subplots()
    ax.set_aspect("auto")
    ax.grid()

    NUM_COLORS = len(dict_vals)
    cm = plt.get_cmap("tab10")
    if NUM_COLORS > 10:
        cm = plt.get_cmap("tab20")
    ax.set_prop_cycle(
        color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)],
    )

    base_prevs = np.around(
        [abs(bp[pos_class] - train_prev) for bp in base_prevs], decimals=2
    )
    for method, deltas in dict_vals.items():
        delta_bins = {}
        for bp, delta in zip(base_prevs, deltas):
            if bp not in delta_bins:
                delta_bins[bp] = []
            delta_bins[bp].append(delta)

        bp_unique, delta_avg = zip(
            *sorted(
                {k: np.mean(v) for k, v in delta_bins.items()}.items(),
                key=lambda db: db[0],
            )
        )

        ax.plot(
            bp_unique,
            delta_avg,
            label=method,
            linestyle="-",
            marker="o",
            markersize=3,
            zorder=2,
        )

    ax.set(xlabel="test prevalence", ylabel=metric, title=title)

    if legend:
        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    output_path = env.PLOT_OUT_DIR / f"{title}.png"
    fig.savefig(output_path, bbox_inches="tight")

    return output_path