118 lines
3.5 KiB
Python
118 lines
3.5 KiB
Python
from collections import defaultdict
|
|
|
|
from sklearn.base import BaseEstimator
|
|
from sklearn.linear_model import LogisticRegression
|
|
import numpy as np
|
|
from time import time
|
|
from sklearn.metrics import confusion_matrix
|
|
from sklearn.naive_bayes import GaussianNB
|
|
from sklearn.svm import SVC, LinearSVC
|
|
|
|
from method.aggregative import PACC, EMQ, ACC
|
|
from utils import *
|
|
|
|
import quapy.data.datasets
|
|
import quapy as qp
|
|
from models_multiclass import *
|
|
from quapy.data import LabelledCollection
|
|
from quapy.protocol import UPP
|
|
from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS
|
|
|
|
|
|
def split(data: LabelledCollection):
|
|
train_val, test = data.split_stratified(train_prop=0.66, random_state=0)
|
|
train, val = train_val.split_stratified(train_prop=0.5, random_state=0)
|
|
return train, val, test
|
|
|
|
|
|
def gen_classifiers():
|
|
yield 'LR', LogisticRegression()
|
|
#yield 'NB', GaussianNB()
|
|
#yield 'SVM(rbf)', SVC()
|
|
#yield 'SVM(linear)', LinearSVC()
|
|
|
|
|
|
def gen_datasets()-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
|
|
for dataset_name in UCI_MULTICLASS_DATASETS:
|
|
dataset = fetch_UCIMulticlassLabelledCollection(dataset_name)
|
|
yield dataset_name, split(dataset)
|
|
|
|
|
|
def gen_CAP(h, acc_fn)->[str, ClassifierAccuracyPrediction]:
|
|
yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
|
|
yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
|
|
yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
|
|
yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
|
|
|
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
|
|
acc_fn = None
|
|
# yield 'Naive', NaiveCAP(h, acc_fn)
|
|
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
|
|
#yield 'CT-PPSh-ACC', ContTableWithHTransferCAP(h, acc_fn, ACC)
|
|
yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
|
|
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
|
|
yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
|
|
|
|
def gen_acc_measure():
|
|
yield 'vanilla_accuracy', vanilla_acc_fn
|
|
yield 'macro-F1', macrof1
|
|
|
|
|
|
def true_acc(h:BaseEstimator, acc_fn: callable, U: LabelledCollection):
|
|
y_pred = h.predict(U.X)
|
|
y_true = U.y
|
|
conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_)
|
|
return acc_fn(conf_table)
|
|
|
|
|
|
def vanilla_acc_fn(cont_table):
|
|
return np.diag(cont_table).sum() / cont_table.sum()
|
|
|
|
|
|
def _f1_bin(tp, fp, fn):
|
|
if tp + fp + fn == 0:
|
|
return 1
|
|
else:
|
|
return (2 * tp) / (2 * tp + fp + fn)
|
|
|
|
|
|
def macrof1(cont_table):
|
|
n = cont_table.shape[0]
|
|
|
|
if n==2:
|
|
tp = cont_table[1,1]
|
|
fp = cont_table[0,1]
|
|
fn = cont_table[1,0]
|
|
return _f1_bin(tp, fp, fn)
|
|
|
|
f1_per_class = []
|
|
for i in range(n):
|
|
tp = cont_table[i,i]
|
|
fp = cont_table[:,i].sum() - tp
|
|
fn = cont_table[i,:].sum() - tp
|
|
f1_per_class.append(_f1_bin(tp, fp, fn))
|
|
return np.mean(f1_per_class)
|
|
|
|
|
|
def microf1(cont_table):
|
|
n = cont_table.shape[0]
|
|
|
|
if n == 2:
|
|
tp = cont_table[1, 1]
|
|
fp = cont_table[0, 1]
|
|
fn = cont_table[1, 0]
|
|
return _f1_bin(tp, fp, fn)
|
|
|
|
tp, fp, fn = 0, 0, 0
|
|
for i in range(n):
|
|
tp += cont_table[i, i]
|
|
fp += cont_table[:, i] - tp
|
|
fn += cont_table[i, :] - tp
|
|
return _f1_bin(tp, fp, fn)
|
|
|
|
|
|
def cap_errors(true_acc, estim_acc):
|
|
true_acc = np.asarray(true_acc)
|
|
estim_acc = np.asarray(estim_acc)
|
|
#return (true_acc - estim_acc)**2
|
|
return np.abs(true_acc - estim_acc) |