98 lines
3.5 KiB
Python
98 lines
3.5 KiB
Python
import itertools
|
|
import os.path
|
|
from collections import defaultdict
|
|
from time import time
|
|
from utils import *
|
|
from models_multiclass import *
|
|
from quapy.protocol import UPP
|
|
from commons import *
|
|
|
|
|
|
def fit_method(method, V):
|
|
tinit = time()
|
|
method.fit(V)
|
|
t_train = time() - tinit
|
|
return method, t_train
|
|
|
|
|
|
def predictionsCAP(method, test_prot):
|
|
tinit = time()
|
|
estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
|
|
t_test_ave = (time() - tinit) / test_prot.total()
|
|
return estim_accs, t_test_ave
|
|
|
|
|
|
def predictionsCAPcont_table(method, test_prot, gen_acc_measure):
|
|
estim_accs_dict = {}
|
|
tinit = time()
|
|
estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
|
|
for acc_name, acc_fn in gen_acc_measure():
|
|
estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
|
|
t_test_ave = (time() - tinit) / test_prot.total()
|
|
return estim_accs_dict, t_test_ave
|
|
|
|
|
|
def any_missing(cls_name, dataset_name, method_name):
|
|
for acc_name, _ in gen_acc_measure():
|
|
if not os.path.exists(getpath(cls_name, acc_name, dataset_name, method_name)):
|
|
return True
|
|
return False
|
|
|
|
|
|
qp.environ['SAMPLE_SIZE'] = 250
|
|
NUM_TEST = 100
|
|
|
|
|
|
for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifiers(), gen_datasets()):
|
|
print(f'training {cls_name} in {dataset_name}')
|
|
h.fit(*L.Xy)
|
|
|
|
# test generation protocol
|
|
test_prot = UPP(U, repeats=NUM_TEST, return_type='labelled_collection', random_state=0)
|
|
|
|
# compute some stats of the dataset
|
|
get_dataset_stats(f'dataset_stats/{dataset_name}.json', test_prot, L, V)
|
|
|
|
# precompute the actual accuracy values
|
|
true_accs = {}
|
|
for acc_name, acc_fn in gen_acc_measure():
|
|
true_accs[acc_name] = [true_acc(h, acc_fn, Ui) for Ui in test_prot()]
|
|
|
|
# instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
|
|
# must be nested in the acc-for
|
|
for acc_name, acc_fn in gen_acc_measure():
|
|
for (method_name, method) in gen_CAP(h, acc_fn):
|
|
result_path = getpath(cls_name, acc_name, dataset_name, method_name)
|
|
if os.path.exists(result_path):
|
|
print(f'\t{method_name}-{acc_name} exists, skipping')
|
|
continue
|
|
|
|
print(f'\t{method_name}-{acc_name} computing...')
|
|
method, t_train = fit_method(method, V)
|
|
estim_accs, t_test_ave = predictionsCAP(method, test_prot)
|
|
save_json_result(result_path, true_accs[acc_name], estim_accs, t_train, t_test_ave)
|
|
|
|
# instances of CAPContingencyTable instead are generic, and the evaluation measure can
|
|
# be nested to the predictions to speed up things
|
|
for (method_name, method) in gen_CAP_cont_table(h):
|
|
if not any_missing(cls_name, dataset_name, method_name):
|
|
print(f'\tmethod {method_name} has all results already computed. Skipping.')
|
|
continue
|
|
|
|
print(f'\tmethod {method_name} computing...')
|
|
|
|
method, t_train = fit_method(method, V)
|
|
estim_accs_dict, t_test_ave = predictionsCAPcont_table(method, test_prot, gen_acc_measure)
|
|
for acc_name in estim_accs_dict.keys():
|
|
result_path = getpath(cls_name, acc_name, dataset_name, method_name)
|
|
save_json_result(result_path, true_accs[acc_name], estim_accs_dict[acc_name], t_train, t_test_ave)
|
|
|
|
print()
|
|
|
|
# generate diagonal plots
|
|
for (cls_name, _), (acc_name, _) in itertools.product(gen_classifiers(), gen_acc_measure()):
|
|
results = open_results(cls_name, acc_name)
|
|
plot_diagonal(cls_name, acc_name, results)
|
|
|
|
|