QuaPy/ClassifierAccuracy/utils.py

import itertools
import os
from collections import defaultdict

import matplotlib.pyplot as plt
from pathlib import Path
from os import makedirs
from os.path import join
import numpy as np
import json
from scipy.stats import pearsonr
from sklearn.linear_model import LogisticRegression
from time import time
import quapy as qp
from glob import glob

from commons import cap_errors
from models_multiclass import ClassifierAccuracyPrediction, CAPContingencyTable


def plot_diagonal(cls_name, measure_name, results, base_dir='plots'):

    makedirs(base_dir, exist_ok=True)
    makedirs(join(base_dir, measure_name), exist_ok=True)

    # Create scatter plot
    plt.figure(figsize=(10, 10))
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.plot([0, 1], [0, 1], color='black', linestyle='--')

    for method_name in results.keys():
        print(method_name, measure_name)
        xs = results[method_name]['true_acc']
        ys = results[method_name]['estim_acc']
        print('max xs', np.max(xs))
        print('max ys', np.max(ys))
        err = cap_errors(xs, ys).mean()
        #pear_cor, _ = 0, 0  #pearsonr(xs, ys)
        plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6)

    plt.legend()

    # Add labels and title
    plt.xlabel(f'True {measure_name}')
    plt.ylabel(f'Estimated {measure_name}')

    # Display the plot
    # plt.show()
    plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png'))


def getpath(cls_name, acc_name, dataset_name, method_name):
    return f"results/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"


def open_results(cls_name, acc_name, dataset_name='*', method_name='*'):
    path = getpath(cls_name, acc_name, dataset_name, method_name)
    results = defaultdict(lambda : {'true_acc':[], 'estim_acc':[]})
    for file in glob(path):
        #print(file)
        method = Path(file).name.replace('.json','')
        result = json.load(open(file, 'r'))
        results[method]['true_acc'].extend(result['true_acc'])
        results[method]['estim_acc'].extend(result['estim_acc'])
    return results


def save_json_file(path, data):
    os.makedirs(Path(path).parent, exist_ok=True)
    with open(path, 'w') as f:
        json.dump(data, f)


def save_json_result(path, true_accs, estim_accs, t_train, t_test):
    result = {
        't_train': t_train,
        't_test_ave': t_test,
        'true_acc': true_accs,
        'estim_acc': estim_accs
    }
    save_json_file(path, result)


def get_dataset_stats(path, test_prot, L, V):
    test_prevs = [Ui.prevalence() for Ui in test_prot()]
    shifts = [qp.error.ae(L.prevalence(), Ui_prev) for Ui_prev in test_prevs]
    info = {
        'n_classes': L.n_classes,
        'n_train': len(L),
        'n_val': len(V),
        'train_prev': L.prevalence().tolist(),
        'val_prev': V.prevalence().tolist(),
        'test_prevs': [x.tolist() for x in test_prevs],
        'shifts': [x.tolist() for x in shifts],
        'sample_size': test_prot.sample_size,
        'num_samples': test_prot.total()
    }
    save_json_file(path, info)


def gen_tables():
    from commons import gen_datasets, gen_classifiers, gen_acc_measure, gen_CAP, gen_CAP_cont_table
    from tabular import Table

    mock_h = LogisticRegression(),
    methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
    datasets = [dataset for dataset, _ in gen_datasets()]
    classifiers = [classifier for classifier, _ in gen_classifiers()]
    measures = [measure for measure, _ in gen_acc_measure()]

    os.makedirs('tables', exist_ok=True)

    tex_doc = """
    \\documentclass[10pt,a4paper]{article}
    \\usepackage[utf8]{inputenc}
    \\usepackage{amsmath}
    \\usepackage{amsfonts}
    \\usepackage{amssymb}
    \\usepackage{graphicx}
    \\usepackage{tabularx}
    \\usepackage{color}
    \\usepackage{colortbl}
    \\usepackage{xcolor}
    \\begin{document}
    """

    classifier = classifiers[0]
    metric = "vanilla_accuracy"

    table = Table(datasets, methods)
    for method, dataset in itertools.product(methods, datasets):
        path = f'results/{classifier}/{metric}/{dataset}/{method}.json'
        results = json.load(open(path, 'r'))
        true_acc = results['true_acc']
        estim_acc = np.asarray(results['estim_acc'])
        if any(np.isnan(estim_acc)) or any(estim_acc>1) or any(estim_acc<0):
            print(f'error in {method=} {dataset=}')
            continue
        errors = cap_errors(true_acc, estim_acc)
        table.add(dataset, method, errors)

    tex = table.latexTabular()
    table_name = f'{classifier}_{metric}.tex'
    with open(f'./tables/{table_name}', 'wt') as foo:
        foo.write('\\resizebox{\\textwidth}{!}{%\n')
        foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
        foo.write(tex)
        foo.write('\\end{tabular}%\n')
        foo.write('}\n')

    tex_doc += "\input{" + table_name + "}\n"

    tex_doc += """
    \\end{document}
    """
    with open(f'./tables/main.tex', 'wt') as foo:
        foo.write(tex_doc)

    print("[Tables Done] runing latex")
    os.chdir('./tables/')
    os.system('pdflatex main.tex')
    os.system('rm main.aux main.bbl main.blg main.log main.out main.dvi')
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`import itertools`
			`import os`
			`from collections import defaultdict`

some good refactoring 2024-02-23 18:19:00 +01:00			`import matplotlib.pyplot as plt`
			`from pathlib import Path`
			`from os import makedirs`
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`from os.path import join`
some good refactoring 2024-02-23 18:19:00 +01:00			`import numpy as np`
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`import json`
			`from scipy.stats import pearsonr`
			`from sklearn.linear_model import LogisticRegression`
			`from time import time`
			`import quapy as qp`
			`from glob import glob`

			`from commons import cap_errors`
			`from models_multiclass import ClassifierAccuracyPrediction, CAPContingencyTable`
some good refactoring 2024-02-23 18:19:00 +01:00

added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`def plot_diagonal(cls_name, measure_name, results, base_dir='plots'):`
some good refactoring 2024-02-23 18:19:00 +01:00
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`makedirs(base_dir, exist_ok=True)`
			`makedirs(join(base_dir, measure_name), exist_ok=True)`
some good refactoring 2024-02-23 18:19:00 +01:00
			`# Create scatter plot`
			`plt.figure(figsize=(10, 10))`
			`plt.xlim(0, 1)`
			`plt.ylim(0, 1)`
			`plt.plot([0, 1], [0, 1], color='black', linestyle='--')`

added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`for method_name in results.keys():`
			`print(method_name, measure_name)`
			`xs = results[method_name]['true_acc']`
			`ys = results[method_name]['estim_acc']`
			`print('max xs', np.max(xs))`
			`print('max ys', np.max(ys))`
			`err = cap_errors(xs, ys).mean()`
			`#pear_cor, _ = 0, 0 #pearsonr(xs, ys)`
			`plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6)`
some good refactoring 2024-02-23 18:19:00 +01:00
			`plt.legend()`

			`# Add labels and title`
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`plt.xlabel(f'True {measure_name}')`
			`plt.ylabel(f'Estimated {measure_name}')`
some good refactoring 2024-02-23 18:19:00 +01:00
			`# Display the plot`
			`# plt.show()`
added Sebastiani's method and Pablo's method 2024-02-27 18:38:39 +01:00			`plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png'))`


			`def getpath(cls_name, acc_name, dataset_name, method_name):`
			`return f"results/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"`


			`def open_results(cls_name, acc_name, dataset_name='', method_name=''):`
			`path = getpath(cls_name, acc_name, dataset_name, method_name)`
			`results = defaultdict(lambda : {'true_acc':[], 'estim_acc':[]})`
			`for file in glob(path):`
			`#print(file)`
			`method = Path(file).name.replace('.json','')`
			`result = json.load(open(file, 'r'))`
			`results[method]['true_acc'].extend(result['true_acc'])`
			`results[method]['estim_acc'].extend(result['estim_acc'])`
			`return results`


			`def save_json_file(path, data):`
			`os.makedirs(Path(path).parent, exist_ok=True)`
			`with open(path, 'w') as f:`
			`json.dump(data, f)`


			`def save_json_result(path, true_accs, estim_accs, t_train, t_test):`
			`result = {`
			`'t_train': t_train,`
			`'t_test_ave': t_test,`
			`'true_acc': true_accs,`
			`'estim_acc': estim_accs`
			`}`
			`save_json_file(path, result)`


			`def get_dataset_stats(path, test_prot, L, V):`
			`test_prevs = [Ui.prevalence() for Ui in test_prot()]`
			`shifts = [qp.error.ae(L.prevalence(), Ui_prev) for Ui_prev in test_prevs]`
			`info = {`
			`'n_classes': L.n_classes,`
			`'n_train': len(L),`
			`'n_val': len(V),`
			`'train_prev': L.prevalence().tolist(),`
			`'val_prev': V.prevalence().tolist(),`
			`'test_prevs': [x.tolist() for x in test_prevs],`
			`'shifts': [x.tolist() for x in shifts],`
			`'sample_size': test_prot.sample_size,`
			`'num_samples': test_prot.total()`
			`}`
			`save_json_file(path, info)`


			`def gen_tables():`
			`from commons import gen_datasets, gen_classifiers, gen_acc_measure, gen_CAP, gen_CAP_cont_table`
			`from tabular import Table`

			`mock_h = LogisticRegression(),`
			`methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]`
			`datasets = [dataset for dataset, _ in gen_datasets()]`
			`classifiers = [classifier for classifier, _ in gen_classifiers()]`
			`measures = [measure for measure, _ in gen_acc_measure()]`

			`os.makedirs('tables', exist_ok=True)`

			`tex_doc = """`
			`\\documentclass[10pt,a4paper]{article}`
			`\\usepackage[utf8]{inputenc}`
			`\\usepackage{amsmath}`
			`\\usepackage{amsfonts}`
			`\\usepackage{amssymb}`
			`\\usepackage{graphicx}`
			`\\usepackage{tabularx}`
			`\\usepackage{color}`
			`\\usepackage{colortbl}`
			`\\usepackage{xcolor}`
			`\\begin{document}`
			`"""`

			`classifier = classifiers[0]`
			`metric = "vanilla_accuracy"`

			`table = Table(datasets, methods)`
			`for method, dataset in itertools.product(methods, datasets):`
			`path = f'results/{classifier}/{metric}/{dataset}/{method}.json'`
			`results = json.load(open(path, 'r'))`
			`true_acc = results['true_acc']`
			`estim_acc = np.asarray(results['estim_acc'])`
			`if any(np.isnan(estim_acc)) or any(estim_acc>1) or any(estim_acc<0):`
			`print(f'error in {method=} {dataset=}')`
			`continue`
			`errors = cap_errors(true_acc, estim_acc)`
			`table.add(dataset, method, errors)`

			`tex = table.latexTabular()`
			`table_name = f'{classifier}_{metric}.tex'`
			`with open(f'./tables/{table_name}', 'wt') as foo:`
			`foo.write('\\resizebox{\\textwidth}{!}{%\n')`
			`foo.write('\\begin{tabular}{c\|'+('c'*len(methods))+'}\n')`
			`foo.write(tex)`
			`foo.write('\\end{tabular}%\n')`
			`foo.write('}\n')`

			`tex_doc += "\input{" + table_name + "}\n"`

			`tex_doc += """`
			`\\end{document}`
			`"""`
			`with open(f'./tables/main.tex', 'wt') as foo:`
			`foo.write(tex_doc)`

			`print("[Tables Done] runing latex")`
			`os.chdir('./tables/')`
			`os.system('pdflatex main.tex')`
			`os.system('rm main.aux main.bbl main.blg main.log main.out main.dvi')`