from collections import defaultdict from sklearn.base import BaseEstimator from sklearn.linear_model import LogisticRegression import numpy as np from time import time from sklearn.metrics import confusion_matrix from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC, LinearSVC from method.aggregative import PACC, EMQ, ACC from utils import * import quapy.data.datasets import quapy as qp from models_multiclass import * from quapy.data import LabelledCollection from quapy.protocol import UPP from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS def split(data: LabelledCollection): train_val, test = data.split_stratified(train_prop=0.66, random_state=0) train, val = train_val.split_stratified(train_prop=0.5, random_state=0) return train, val, test def gen_classifiers(): yield 'LR', LogisticRegression() #yield 'NB', GaussianNB() #yield 'SVM(rbf)', SVC() #yield 'SVM(linear)', LinearSVC() def gen_datasets()-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: for dataset_name in UCI_MULTICLASS_DATASETS: dataset = fetch_UCIMulticlassLabelledCollection(dataset_name) yield dataset_name, split(dataset) def gen_CAP(h, acc_fn)->[str, ClassifierAccuracyPrediction]: yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC) yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0) yield 'PabCAP', PabloCAP(h, acc_fn, ACC) yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') def gen_CAP_cont_table(h)->[str,CAPContingencyTable]: acc_fn = None # yield 'Naive', NaiveCAP(h, acc_fn) yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression())) #yield 'CT-PPSh-ACC', ContTableWithHTransferCAP(h, acc_fn, ACC) yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True) # yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC) yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ) def gen_acc_measure(): yield 'vanilla_accuracy', vanilla_acc_fn yield 'macro-F1', macrof1 def true_acc(h:BaseEstimator, acc_fn: callable, U: LabelledCollection): y_pred = h.predict(U.X) y_true = U.y conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_) return acc_fn(conf_table) def vanilla_acc_fn(cont_table): return np.diag(cont_table).sum() / cont_table.sum() def _f1_bin(tp, fp, fn): if tp + fp + fn == 0: return 1 else: return (2 * tp) / (2 * tp + fp + fn) def macrof1(cont_table): n = cont_table.shape[0] if n==2: tp = cont_table[1,1] fp = cont_table[0,1] fn = cont_table[1,0] return _f1_bin(tp, fp, fn) f1_per_class = [] for i in range(n): tp = cont_table[i,i] fp = cont_table[:,i].sum() - tp fn = cont_table[i,:].sum() - tp f1_per_class.append(_f1_bin(tp, fp, fn)) return np.mean(f1_per_class) def microf1(cont_table): n = cont_table.shape[0] if n == 2: tp = cont_table[1, 1] fp = cont_table[0, 1] fn = cont_table[1, 0] return _f1_bin(tp, fp, fn) tp, fp, fn = 0, 0, 0 for i in range(n): tp += cont_table[i, i] fp += cont_table[:, i] - tp fn += cont_table[i, :] - tp return _f1_bin(tp, fp, fn) def cap_errors(true_acc, estim_acc): true_acc = np.asarray(true_acc) estim_acc = np.asarray(estim_acc) #return (true_acc - estim_acc)**2 return np.abs(true_acc - estim_acc)