QuaPy/ClassifierAccuracy/utils.py

165 lines
5.1 KiB
Python

import itertools
import os
from collections import defaultdict
import matplotlib.pyplot as plt
from pathlib import Path
from os import makedirs
from os.path import join
import numpy as np
import json
from scipy.stats import pearsonr
from sklearn.linear_model import LogisticRegression
from time import time
import quapy as qp
from glob import glob
from commons import cap_errors
from models_multiclass import ClassifierAccuracyPrediction, CAPContingencyTable
def plot_diagonal(cls_name, measure_name, results, base_dir='plots'):
makedirs(base_dir, exist_ok=True)
makedirs(join(base_dir, measure_name), exist_ok=True)
# Create scatter plot
plt.figure(figsize=(10, 10))
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.plot([0, 1], [0, 1], color='black', linestyle='--')
for method_name in results.keys():
print(method_name, measure_name)
xs = results[method_name]['true_acc']
ys = results[method_name]['estim_acc']
print('max xs', np.max(xs))
print('max ys', np.max(ys))
err = cap_errors(xs, ys).mean()
#pear_cor, _ = 0, 0 #pearsonr(xs, ys)
plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6)
plt.legend()
# Add labels and title
plt.xlabel(f'True {measure_name}')
plt.ylabel(f'Estimated {measure_name}')
# Display the plot
# plt.show()
plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png'))
def getpath(cls_name, acc_name, dataset_name, method_name):
return f"results/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
def open_results(cls_name, acc_name, dataset_name='*', method_name='*'):
path = getpath(cls_name, acc_name, dataset_name, method_name)
results = defaultdict(lambda : {'true_acc':[], 'estim_acc':[]})
for file in glob(path):
#print(file)
method = Path(file).name.replace('.json','')
result = json.load(open(file, 'r'))
results[method]['true_acc'].extend(result['true_acc'])
results[method]['estim_acc'].extend(result['estim_acc'])
return results
def save_json_file(path, data):
os.makedirs(Path(path).parent, exist_ok=True)
with open(path, 'w') as f:
json.dump(data, f)
def save_json_result(path, true_accs, estim_accs, t_train, t_test):
result = {
't_train': t_train,
't_test_ave': t_test,
'true_acc': true_accs,
'estim_acc': estim_accs
}
save_json_file(path, result)
def get_dataset_stats(path, test_prot, L, V):
test_prevs = [Ui.prevalence() for Ui in test_prot()]
shifts = [qp.error.ae(L.prevalence(), Ui_prev) for Ui_prev in test_prevs]
info = {
'n_classes': L.n_classes,
'n_train': len(L),
'n_val': len(V),
'train_prev': L.prevalence().tolist(),
'val_prev': V.prevalence().tolist(),
'test_prevs': [x.tolist() for x in test_prevs],
'shifts': [x.tolist() for x in shifts],
'sample_size': test_prot.sample_size,
'num_samples': test_prot.total()
}
save_json_file(path, info)
def gen_tables():
from commons import gen_datasets, gen_classifiers, gen_acc_measure, gen_CAP, gen_CAP_cont_table
from tabular import Table
mock_h = LogisticRegression(),
methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
datasets = [dataset for dataset, _ in gen_datasets()]
classifiers = [classifier for classifier, _ in gen_classifiers()]
measures = [measure for measure, _ in gen_acc_measure()]
os.makedirs('tables', exist_ok=True)
tex_doc = """
\\documentclass[10pt,a4paper]{article}
\\usepackage[utf8]{inputenc}
\\usepackage{amsmath}
\\usepackage{amsfonts}
\\usepackage{amssymb}
\\usepackage{graphicx}
\\usepackage{tabularx}
\\usepackage{color}
\\usepackage{colortbl}
\\usepackage{xcolor}
\\begin{document}
"""
classifier = classifiers[0]
metric = "vanilla_accuracy"
table = Table(datasets, methods)
for method, dataset in itertools.product(methods, datasets):
path = f'results/{classifier}/{metric}/{dataset}/{method}.json'
results = json.load(open(path, 'r'))
true_acc = results['true_acc']
estim_acc = np.asarray(results['estim_acc'])
if any(np.isnan(estim_acc)) or any(estim_acc>1) or any(estim_acc<0):
print(f'error in {method=} {dataset=}')
continue
errors = cap_errors(true_acc, estim_acc)
table.add(dataset, method, errors)
tex = table.latexTabular()
table_name = f'{classifier}_{metric}.tex'
with open(f'./tables/{table_name}', 'wt') as foo:
foo.write('\\resizebox{\\textwidth}{!}{%\n')
foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
foo.write(tex)
foo.write('\\end{tabular}%\n')
foo.write('}\n')
tex_doc += "\input{" + table_name + "}\n"
tex_doc += """
\\end{document}
"""
with open(f'./tables/main.tex', 'wt') as foo:
foo.write(tex_doc)
print("[Tables Done] runing latex")
os.chdir('./tables/')
os.system('pdflatex main.tex')
os.system('rm main.aux main.bbl main.blg main.log main.out main.dvi')