import itertools import os from collections import defaultdict import matplotlib.pyplot as plt from pathlib import Path from os import makedirs from os.path import join import numpy as np import json from scipy.stats import pearsonr from sklearn.linear_model import LogisticRegression from time import time import quapy as qp from glob import glob from commons import cap_errors from models_multiclass import ClassifierAccuracyPrediction, CAPContingencyTable def plot_diagonal(cls_name, measure_name, results, base_dir='plots'): makedirs(base_dir, exist_ok=True) makedirs(join(base_dir, measure_name), exist_ok=True) # Create scatter plot plt.figure(figsize=(10, 10)) plt.xlim(0, 1) plt.ylim(0, 1) plt.plot([0, 1], [0, 1], color='black', linestyle='--') for method_name in results.keys(): print(method_name, measure_name) xs = results[method_name]['true_acc'] ys = results[method_name]['estim_acc'] print('max xs', np.max(xs)) print('max ys', np.max(ys)) err = cap_errors(xs, ys).mean() #pear_cor, _ = 0, 0 #pearsonr(xs, ys) plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6) plt.legend() # Add labels and title plt.xlabel(f'True {measure_name}') plt.ylabel(f'Estimated {measure_name}') # Display the plot # plt.show() plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png')) def getpath(cls_name, acc_name, dataset_name, method_name): return f"results/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json" def open_results(cls_name, acc_name, dataset_name='*', method_name='*'): path = getpath(cls_name, acc_name, dataset_name, method_name) results = defaultdict(lambda : {'true_acc':[], 'estim_acc':[]}) for file in glob(path): #print(file) method = Path(file).name.replace('.json','') result = json.load(open(file, 'r')) results[method]['true_acc'].extend(result['true_acc']) results[method]['estim_acc'].extend(result['estim_acc']) return results def save_json_file(path, data): os.makedirs(Path(path).parent, exist_ok=True) with open(path, 'w') as f: json.dump(data, f) def save_json_result(path, true_accs, estim_accs, t_train, t_test): result = { 't_train': t_train, 't_test_ave': t_test, 'true_acc': true_accs, 'estim_acc': estim_accs } save_json_file(path, result) def get_dataset_stats(path, test_prot, L, V): test_prevs = [Ui.prevalence() for Ui in test_prot()] shifts = [qp.error.ae(L.prevalence(), Ui_prev) for Ui_prev in test_prevs] info = { 'n_classes': L.n_classes, 'n_train': len(L), 'n_val': len(V), 'train_prev': L.prevalence().tolist(), 'val_prev': V.prevalence().tolist(), 'test_prevs': [x.tolist() for x in test_prevs], 'shifts': [x.tolist() for x in shifts], 'sample_size': test_prot.sample_size, 'num_samples': test_prot.total() } save_json_file(path, info) def gen_tables(): from commons import gen_datasets, gen_classifiers, gen_acc_measure, gen_CAP, gen_CAP_cont_table from tabular import Table mock_h = LogisticRegression(), methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)] datasets = [dataset for dataset, _ in gen_datasets()] classifiers = [classifier for classifier, _ in gen_classifiers()] measures = [measure for measure, _ in gen_acc_measure()] os.makedirs('tables', exist_ok=True) tex_doc = """ \\documentclass[10pt,a4paper]{article} \\usepackage[utf8]{inputenc} \\usepackage{amsmath} \\usepackage{amsfonts} \\usepackage{amssymb} \\usepackage{graphicx} \\usepackage{tabularx} \\usepackage{color} \\usepackage{colortbl} \\usepackage{xcolor} \\begin{document} """ classifier = classifiers[0] metric = "vanilla_accuracy" table = Table(datasets, methods) for method, dataset in itertools.product(methods, datasets): path = f'results/{classifier}/{metric}/{dataset}/{method}.json' results = json.load(open(path, 'r')) true_acc = results['true_acc'] estim_acc = np.asarray(results['estim_acc']) if any(np.isnan(estim_acc)) or any(estim_acc>1) or any(estim_acc<0): print(f'error in {method=} {dataset=}') continue errors = cap_errors(true_acc, estim_acc) table.add(dataset, method, errors) tex = table.latexTabular() table_name = f'{classifier}_{metric}.tex' with open(f'./tables/{table_name}', 'wt') as foo: foo.write('\\resizebox{\\textwidth}{!}{%\n') foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n') foo.write(tex) foo.write('\\end{tabular}%\n') foo.write('}\n') tex_doc += "\input{" + table_name + "}\n" tex_doc += """ \\end{document} """ with open(f'./tables/main.tex', 'wt') as foo: foo.write(tex_doc) print("[Tables Done] runing latex") os.chdir('./tables/') os.system('pdflatex main.tex') os.system('rm main.aux main.bbl main.blg main.log main.out main.dvi')