QuaPy/ClassifierAccuracy/utils.py

import itertools
import os
from collections import defaultdict

import matplotlib.pyplot as plt
from pathlib import Path
from os import makedirs
from os.path import join
import numpy as np
import json
from scipy.stats import pearsonr
from sklearn.linear_model import LogisticRegression
from time import time
import quapy as qp
from glob import glob

from commons import cap_errors
from models_multiclass import ClassifierAccuracyPrediction, CAPContingencyTable


def plot_diagonal(cls_name, measure_name, results, base_dir='plots'):

    makedirs(base_dir, exist_ok=True)
    makedirs(join(base_dir, measure_name), exist_ok=True)

    # Create scatter plot
    plt.figure(figsize=(10, 10))
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.plot([0, 1], [0, 1], color='black', linestyle='--')

    for method_name in results.keys():
        print(method_name, measure_name)
        xs = results[method_name]['true_acc']
        ys = results[method_name]['estim_acc']
        print('max xs', np.max(xs))
        print('max ys', np.max(ys))
        err = cap_errors(xs, ys).mean()
        #pear_cor, _ = 0, 0  #pearsonr(xs, ys)
        plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6)

    plt.legend()

    # Add labels and title
    plt.xlabel(f'True {measure_name}')
    plt.ylabel(f'Estimated {measure_name}')

    # Display the plot
    # plt.show()
    plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png'))


def getpath(cls_name, acc_name, dataset_name, method_name):
    return f"results/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"


def open_results(cls_name, acc_name, dataset_name='*', method_name='*'):
    path = getpath(cls_name, acc_name, dataset_name, method_name)
    results = defaultdict(lambda : {'true_acc':[], 'estim_acc':[]})
    for file in glob(path):
        #print(file)
        method = Path(file).name.replace('.json','')
        result = json.load(open(file, 'r'))
        results[method]['true_acc'].extend(result['true_acc'])
        results[method]['estim_acc'].extend(result['estim_acc'])
    return results


def save_json_file(path, data):
    os.makedirs(Path(path).parent, exist_ok=True)
    with open(path, 'w') as f:
        json.dump(data, f)


def save_json_result(path, true_accs, estim_accs, t_train, t_test):
    result = {
        't_train': t_train,
        't_test_ave': t_test,
        'true_acc': true_accs,
        'estim_acc': estim_accs
    }
    save_json_file(path, result)


def get_dataset_stats(path, test_prot, L, V):
    test_prevs = [Ui.prevalence() for Ui in test_prot()]
    shifts = [qp.error.ae(L.prevalence(), Ui_prev) for Ui_prev in test_prevs]
    info = {
        'n_classes': L.n_classes,
        'n_train': len(L),
        'n_val': len(V),
        'train_prev': L.prevalence().tolist(),
        'val_prev': V.prevalence().tolist(),
        'test_prevs': [x.tolist() for x in test_prevs],
        'shifts': [x.tolist() for x in shifts],
        'sample_size': test_prot.sample_size,
        'num_samples': test_prot.total()
    }
    save_json_file(path, info)


def gen_tables():
    from commons import gen_datasets, gen_classifiers, gen_acc_measure, gen_CAP, gen_CAP_cont_table
    from tabular import Table

    mock_h = LogisticRegression(),
    methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
    datasets = [dataset for dataset, _ in gen_datasets()]
    classifiers = [classifier for classifier, _ in gen_classifiers()]
    measures = [measure for measure, _ in gen_acc_measure()]

    os.makedirs('tables', exist_ok=True)

    tex_doc = """
    \\documentclass[10pt,a4paper]{article}
    \\usepackage[utf8]{inputenc}
    \\usepackage{amsmath}
    \\usepackage{amsfonts}
    \\usepackage{amssymb}
    \\usepackage{graphicx}
    \\usepackage{tabularx}
    \\usepackage{color}
    \\usepackage{colortbl}
    \\usepackage{xcolor}
    \\begin{document}
    """

    classifier = classifiers[0]
    metric = "vanilla_accuracy"

    table = Table(datasets, methods)
    for method, dataset in itertools.product(methods, datasets):
        path = f'results/{classifier}/{metric}/{dataset}/{method}.json'
        results = json.load(open(path, 'r'))
        true_acc = results['true_acc']
        estim_acc = np.asarray(results['estim_acc'])
        if any(np.isnan(estim_acc)) or any(estim_acc>1) or any(estim_acc<0):
            print(f'error in {method=} {dataset=}')
            continue
        errors = cap_errors(true_acc, estim_acc)
        table.add(dataset, method, errors)

    tex = table.latexTabular()
    table_name = f'{classifier}_{metric}.tex'
    with open(f'./tables/{table_name}', 'wt') as foo:
        foo.write('\\resizebox{\\textwidth}{!}{%\n')
        foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
        foo.write(tex)
        foo.write('\\end{tabular}%\n')
        foo.write('}\n')

    tex_doc += "\input{" + table_name + "}\n"

    tex_doc += """
    \\end{document}
    """
    with open(f'./tables/main.tex', 'wt') as foo:
        foo.write(tex_doc)

    print("[Tables Done] runing latex")
    os.chdir('./tables/')
    os.system('pdflatex main.tex')
    os.system('rm main.aux main.bbl main.blg main.log main.out main.dvi')