QuaPy/ClassifierAccuracy/commons.py

118 lines
3.5 KiB
Python

from collections import defaultdict
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import numpy as np
from time import time
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, LinearSVC
from method.aggregative import PACC, EMQ, ACC
from utils import *
import quapy.data.datasets
import quapy as qp
from models_multiclass import *
from quapy.data import LabelledCollection
from quapy.protocol import UPP
from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS
def split(data: LabelledCollection):
train_val, test = data.split_stratified(train_prop=0.66, random_state=0)
train, val = train_val.split_stratified(train_prop=0.5, random_state=0)
return train, val, test
def gen_classifiers():
yield 'LR', LogisticRegression()
#yield 'NB', GaussianNB()
#yield 'SVM(rbf)', SVC()
#yield 'SVM(linear)', LinearSVC()
def gen_datasets()-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
for dataset_name in UCI_MULTICLASS_DATASETS:
dataset = fetch_UCIMulticlassLabelledCollection(dataset_name)
yield dataset_name, split(dataset)
def gen_CAP(h, acc_fn)->[str, ClassifierAccuracyPrediction]:
yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
acc_fn = None
# yield 'Naive', NaiveCAP(h, acc_fn)
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
#yield 'CT-PPSh-ACC', ContTableWithHTransferCAP(h, acc_fn, ACC)
yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
def gen_acc_measure():
yield 'vanilla_accuracy', vanilla_acc_fn
yield 'macro-F1', macrof1
def true_acc(h:BaseEstimator, acc_fn: callable, U: LabelledCollection):
y_pred = h.predict(U.X)
y_true = U.y
conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_)
return acc_fn(conf_table)
def vanilla_acc_fn(cont_table):
return np.diag(cont_table).sum() / cont_table.sum()
def _f1_bin(tp, fp, fn):
if tp + fp + fn == 0:
return 1
else:
return (2 * tp) / (2 * tp + fp + fn)
def macrof1(cont_table):
n = cont_table.shape[0]
if n==2:
tp = cont_table[1,1]
fp = cont_table[0,1]
fn = cont_table[1,0]
return _f1_bin(tp, fp, fn)
f1_per_class = []
for i in range(n):
tp = cont_table[i,i]
fp = cont_table[:,i].sum() - tp
fn = cont_table[i,:].sum() - tp
f1_per_class.append(_f1_bin(tp, fp, fn))
return np.mean(f1_per_class)
def microf1(cont_table):
n = cont_table.shape[0]
if n == 2:
tp = cont_table[1, 1]
fp = cont_table[0, 1]
fn = cont_table[1, 0]
return _f1_bin(tp, fp, fn)
tp, fp, fn = 0, 0, 0
for i in range(n):
tp += cont_table[i, i]
fp += cont_table[:, i] - tp
fn += cont_table[i, :] - tp
return _f1_bin(tp, fp, fn)
def cap_errors(true_acc, estim_acc):
true_acc = np.asarray(true_acc)
estim_acc = np.asarray(estim_acc)
#return (true_acc - estim_acc)**2
return np.abs(true_acc - estim_acc)