added DOC and ATC
This commit is contained in:
parent
b9fed349f0
commit
86d9ce849c
|
@ -1,7 +1,10 @@
|
||||||
from commons import *
|
from ClassifierAccuracy.util.commons import *
|
||||||
|
from ClassifierAccuracy.util.plotting import plot_diagonal
|
||||||
|
|
||||||
PROBLEM = 'multiclass'
|
PROBLEM = 'multiclass'
|
||||||
basedir = PROBLEM
|
ORACLE = False
|
||||||
|
basedir = PROBLEM+('-oracle' if ORACLE else '')
|
||||||
|
|
||||||
|
|
||||||
if PROBLEM == 'binary':
|
if PROBLEM == 'binary':
|
||||||
qp.environ['SAMPLE_SIZE'] = 1000
|
qp.environ['SAMPLE_SIZE'] = 1000
|
||||||
|
@ -31,15 +34,15 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier
|
||||||
# instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
|
# instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
|
||||||
# must be nested in the acc-for
|
# must be nested in the acc-for
|
||||||
for acc_name, acc_fn in gen_acc_measure():
|
for acc_name, acc_fn in gen_acc_measure():
|
||||||
for (method_name, method) in gen_CAP(h, acc_fn):
|
for (method_name, method) in gen_CAP(h, acc_fn, with_oracle=ORACLE):
|
||||||
result_path = getpath(basedir, cls_name, acc_name, dataset_name, method_name)
|
result_path = getpath(basedir, cls_name, acc_name, dataset_name, method_name)
|
||||||
if os.path.exists(result_path):
|
if os.path.exists(result_path):
|
||||||
print(f'\t{method_name}-{acc_name} exists, skipping')
|
print(f'\t{method_name}-{acc_name} exists, skipping')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f'\t{method_name}-{acc_name} computing...')
|
print(f'\t{method_name} computing...')
|
||||||
method, t_train = fit_method(method, V)
|
method, t_train = fit_method(method, V)
|
||||||
estim_accs, t_test_ave = predictionsCAP(method, test_prot)
|
estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE)
|
||||||
save_json_result(result_path, true_accs[acc_name], estim_accs, t_train, t_test_ave)
|
save_json_result(result_path, true_accs[acc_name], estim_accs, t_train, t_test_ave)
|
||||||
|
|
||||||
# instances of CAPContingencyTable instead are generic, and the evaluation measure can
|
# instances of CAPContingencyTable instead are generic, and the evaluation measure can
|
||||||
|
@ -52,7 +55,7 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier
|
||||||
print(f'\tmethod {method_name} computing...')
|
print(f'\tmethod {method_name} computing...')
|
||||||
|
|
||||||
method, t_train = fit_method(method, V)
|
method, t_train = fit_method(method, V)
|
||||||
estim_accs_dict, t_test_ave = predictionsCAPcont_table(method, test_prot, gen_acc_measure)
|
estim_accs_dict, t_test_ave = predictionsCAPcont_table(method, test_prot, gen_acc_measure, ORACLE)
|
||||||
for acc_name in estim_accs_dict.keys():
|
for acc_name in estim_accs_dict.keys():
|
||||||
result_path = getpath(basedir, cls_name, acc_name, dataset_name, method_name)
|
result_path = getpath(basedir, cls_name, acc_name, dataset_name, method_name)
|
||||||
save_json_result(result_path, true_accs[acc_name], estim_accs_dict[acc_name], t_train, t_test_ave)
|
save_json_result(result_path, true_accs[acc_name], estim_accs_dict[acc_name], t_train, t_test_ave)
|
||||||
|
@ -63,11 +66,9 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier
|
||||||
print('generating plots')
|
print('generating plots')
|
||||||
for (cls_name, _), (acc_name, _) in itertools.product(gen_classifiers(), gen_acc_measure()):
|
for (cls_name, _), (acc_name, _) in itertools.product(gen_classifiers(), gen_acc_measure()):
|
||||||
methods = get_method_names()
|
methods = get_method_names()
|
||||||
results = open_results(basedir, cls_name, acc_name, method_name=methods)
|
plot_diagonal(basedir, cls_name, acc_name)
|
||||||
plot_diagonal(cls_name, acc_name, results, base_dir=f'plots/{basedir}/all')
|
|
||||||
for dataset_name, _ in gen_datasets(only_names=True):
|
for dataset_name, _ in gen_datasets(only_names=True):
|
||||||
results = open_results(basedir, cls_name, acc_name, dataset_name=dataset_name, method_name=methods)
|
plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name)
|
||||||
plot_diagonal(cls_name, acc_name, results, base_dir=f'plots/{basedir}/{dataset_name}')
|
|
||||||
|
|
||||||
print('generating tables')
|
print('generating tables')
|
||||||
gen_tables(basedir, datasets=[d for d,_ in gen_datasets(only_names=True)])
|
gen_tables(basedir, datasets=[d for d,_ in gen_datasets(only_names=True)])
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from commons import gen_tables
|
from ClassifierAccuracy.util.commons import gen_tables
|
||||||
|
|
||||||
gen_tables()
|
gen_tables()
|
|
@ -2,7 +2,7 @@ from copy import deepcopy
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression, LinearRegression
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from sklearn import clone
|
from sklearn import clone
|
||||||
|
@ -29,11 +29,15 @@ class ClassifierAccuracyPrediction(ABC):
|
||||||
def fit(self, val: LabelledCollection):
|
def fit(self, val: LabelledCollection):
|
||||||
...
|
...
|
||||||
|
|
||||||
def predict(self, X):
|
@abstractmethod
|
||||||
|
def predict(self, X, oracle_prev=None):
|
||||||
"""
|
"""
|
||||||
Evaluates the accuracy function on the predicted contingency table
|
Evaluates the accuracy function on the predicted contingency table
|
||||||
|
|
||||||
:param X: test data
|
:param X: test data
|
||||||
|
:param oracle_prev: np.ndarray with the class prevalence of the test set as estimated by
|
||||||
|
an oracle. This is meant to test the effect of the errors in CAP that are explained by
|
||||||
|
the errors in quantification performance
|
||||||
:return: float
|
:return: float
|
||||||
"""
|
"""
|
||||||
return ...
|
return ...
|
||||||
|
@ -51,28 +55,30 @@ class CAPContingencyTable(ClassifierAccuracyPrediction):
|
||||||
self.h = h
|
self.h = h
|
||||||
self.acc = acc
|
self.acc = acc
|
||||||
|
|
||||||
@abstractmethod
|
def predict(self, X, oracle_prev=None):
|
||||||
def fit(self, val: LabelledCollection):
|
|
||||||
...
|
|
||||||
|
|
||||||
def predict(self, X):
|
|
||||||
"""
|
"""
|
||||||
Evaluates the accuracy function on the predicted contingency table
|
Evaluates the accuracy function on the predicted contingency table
|
||||||
|
|
||||||
:param X: test data
|
:param X: test data
|
||||||
|
:param oracle_prev: np.ndarray with the class prevalence of the test set as estimated by
|
||||||
|
an oracle. This is meant to test the effect of the errors in CAP that are explained by
|
||||||
|
the errors in quantification performance
|
||||||
:return: float
|
:return: float
|
||||||
"""
|
"""
|
||||||
cont_table = self.predict_ct(X)
|
cont_table = self.predict_ct(X, oracle)
|
||||||
raw_acc = self.acc(cont_table)
|
raw_acc = self.acc(cont_table)
|
||||||
norm_acc = np.clip(raw_acc, 0, 1)
|
norm_acc = np.clip(raw_acc, 0, 1)
|
||||||
return norm_acc
|
return norm_acc
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict_ct(self, X):
|
def predict_ct(self, X, oracle_prev=None):
|
||||||
"""
|
"""
|
||||||
Predicts the contingency table for the test data
|
Predicts the contingency table for the test data
|
||||||
|
|
||||||
:param X: test data
|
:param X: test data
|
||||||
|
:param oracle_prev: np.ndarray with the class prevalence of the test set as estimated by
|
||||||
|
an oracle. This is meant to test the effect of the errors in CAP that are explained by
|
||||||
|
the errors in quantification performance
|
||||||
:return: a contingency table
|
:return: a contingency table
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
@ -92,13 +98,14 @@ class NaiveCAP(CAPContingencyTable):
|
||||||
self.cont_table = confusion_matrix(y_true, y_pred=y_hat, labels=val.classes_)
|
self.cont_table = confusion_matrix(y_true, y_pred=y_hat, labels=val.classes_)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def predict_ct(self, test):
|
def predict_ct(self, test, oracle_prev=None):
|
||||||
"""
|
"""
|
||||||
This method disregards the test set, under the assumption that it is IID wrt the training. This meaning that
|
This method disregards the test set, under the assumption that it is IID wrt the training. This meaning that
|
||||||
the confusion matrix for the test data should coincide with the one computed for training (using any cross
|
the confusion matrix for the test data should coincide with the one computed for training (using any cross
|
||||||
validation strategy).
|
validation strategy).
|
||||||
|
|
||||||
:param test: test collection (ignored)
|
:param test: test collection (ignored)
|
||||||
|
:param oracle_prev: ignored
|
||||||
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
||||||
"""
|
"""
|
||||||
return self.cont_table
|
return self.cont_table
|
||||||
|
@ -133,17 +140,23 @@ class ContTableTransferCAP(CAPContingencyTableQ):
|
||||||
def fit(self, val: LabelledCollection):
|
def fit(self, val: LabelledCollection):
|
||||||
y_hat = self.h.predict(val.X)
|
y_hat = self.h.predict(val.X)
|
||||||
y_true = val.y
|
y_true = val.y
|
||||||
self.cont_table = confusion_matrix(y_true, y_pred=y_hat, labels=val.classes_)
|
self.cont_table = confusion_matrix(y_true, y_pred=y_hat, labels=val.classes_, normalize='all')
|
||||||
self.train_prev = val.prevalence()
|
self.train_prev = val.prevalence()
|
||||||
self.quantifier_fit(val)
|
self.quantifier_fit(val)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def predict_ct(self, test):
|
def predict_ct(self, test, oracle_prev=None):
|
||||||
"""
|
"""
|
||||||
:param test: test collection (ignored)
|
:param test: test collection (ignored)
|
||||||
|
:param oracle_prev: np.ndarray with the class prevalence of the test set as estimated by
|
||||||
|
an oracle. This is meant to test the effect of the errors in CAP that are explained by
|
||||||
|
the errors in quantification performance
|
||||||
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
||||||
"""
|
"""
|
||||||
prev_hat = self.q.quantify(test)
|
if oracle_prev is None:
|
||||||
|
prev_hat = self.q.quantify(test)
|
||||||
|
else:
|
||||||
|
prev_hat = oracle_prev
|
||||||
adjustment = prev_hat / self.train_prev
|
adjustment = prev_hat / self.train_prev
|
||||||
return self.cont_table * adjustment[:, np.newaxis]
|
return self.cont_table * adjustment[:, np.newaxis]
|
||||||
|
|
||||||
|
@ -212,9 +225,12 @@ class NsquaredEquationsCAP(CAPContingencyTableQ):
|
||||||
|
|
||||||
return A, b
|
return A, b
|
||||||
|
|
||||||
def predict_ct(self, test):
|
def predict_ct(self, test, oracle_prev):
|
||||||
"""
|
"""
|
||||||
:param test: test collection (ignored)
|
:param test: test collection (ignored)
|
||||||
|
:param oracle_prev: np.ndarray with the class prevalence of the test set as estimated by
|
||||||
|
an oracle. This is meant to test the effect of the errors in CAP that are explained by
|
||||||
|
the errors in quantification performance
|
||||||
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -222,7 +238,10 @@ class NsquaredEquationsCAP(CAPContingencyTableQ):
|
||||||
|
|
||||||
h_label_preds = self.h.predict(test)
|
h_label_preds = self.h.predict(test)
|
||||||
cc_prev_estim = F.prevalence_from_labels(h_label_preds, self.h.classes_)
|
cc_prev_estim = F.prevalence_from_labels(h_label_preds, self.h.classes_)
|
||||||
q_prev_estim = self.q.quantify(test)
|
if oracle_prev is None:
|
||||||
|
q_prev_estim = self.q.quantify(test)
|
||||||
|
else:
|
||||||
|
q_prev_estim = oracle_prev
|
||||||
|
|
||||||
A = self.A
|
A = self.A
|
||||||
b = self.partial_b
|
b = self.partial_b
|
||||||
|
@ -255,13 +274,14 @@ class NsquaredEquationsCAP(CAPContingencyTableQ):
|
||||||
|
|
||||||
class SebastianiCAP(ClassifierAccuracyPrediction):
|
class SebastianiCAP(ClassifierAccuracyPrediction):
|
||||||
|
|
||||||
def __init__(self, h, acc_fn, q_class, n_val_samples=500, alpha=0.3):
|
def __init__(self, h, acc_fn, q_class, n_val_samples=500, alpha=0.3, predict_train_prev=True):
|
||||||
self.h = h
|
self.h = h
|
||||||
self.acc = acc_fn
|
self.acc = acc_fn
|
||||||
self.q = q_class(h)
|
self.q = q_class(h)
|
||||||
self.n_val_samples = n_val_samples
|
self.n_val_samples = n_val_samples
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
self.sample_size = qp.environ['SAMPLE_SIZE']
|
self.sample_size = qp.environ['SAMPLE_SIZE']
|
||||||
|
self.predict_train_prev = predict_train_prev
|
||||||
|
|
||||||
def fit(self, val: LabelledCollection):
|
def fit(self, val: LabelledCollection):
|
||||||
v2, v1 = val.split_stratified(train_prop=0.5)
|
v2, v1 = val.split_stratified(train_prop=0.5)
|
||||||
|
@ -272,11 +292,17 @@ class SebastianiCAP(ClassifierAccuracyPrediction):
|
||||||
self.sigma_acc = [self.true_acc(sigma_i) for sigma_i in gen_samples()]
|
self.sigma_acc = [self.true_acc(sigma_i) for sigma_i in gen_samples()]
|
||||||
|
|
||||||
# precompute prevalence predictions on samples
|
# precompute prevalence predictions on samples
|
||||||
gen_samples.on_preclassified_instances(self.q.classify(v2.X), in_place=True)
|
if self.predict_train_prev:
|
||||||
self.sigma_pred_prevs = [self.q.aggregate(sigma_i.X) for sigma_i in gen_samples()]
|
gen_samples.on_preclassified_instances(self.q.classify(v2.X), in_place=True)
|
||||||
|
self.sigma_pred_prevs = [self.q.aggregate(sigma_i.X) for sigma_i in gen_samples()]
|
||||||
|
else:
|
||||||
|
self.sigma_pred_prevs = [sigma_i.prevalence() for sigma_i in gen_samples()]
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X, oracle_prev=None):
|
||||||
test_pred_prev = self.q.quantify(X)
|
if oracle_prev is None:
|
||||||
|
test_pred_prev = self.q.quantify(X)
|
||||||
|
else:
|
||||||
|
test_pred_prev = oracle_prev
|
||||||
|
|
||||||
if self.alpha > 0:
|
if self.alpha > 0:
|
||||||
# select samples from V2 with predicted prevalence close to the predicted prevalence for U
|
# select samples from V2 with predicted prevalence close to the predicted prevalence for U
|
||||||
|
@ -316,8 +342,11 @@ class PabloCAP(ClassifierAccuracyPrediction):
|
||||||
label_predictions = self.h.predict(val.X)
|
label_predictions = self.h.predict(val.X)
|
||||||
self.pre_classified = LabelledCollection(instances=label_predictions, labels=val.labels)
|
self.pre_classified = LabelledCollection(instances=label_predictions, labels=val.labels)
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X, oracle_prev=None):
|
||||||
pred_prev = F.smooth(self.q.quantify(X))
|
if oracle_prev is None:
|
||||||
|
pred_prev = F.smooth(self.q.quantify(X))
|
||||||
|
else:
|
||||||
|
pred_prev = oracle_prev
|
||||||
X_size = X.shape[0]
|
X_size = X.shape[0]
|
||||||
acc_estim = []
|
acc_estim = []
|
||||||
for _ in range(self.n_val_samples):
|
for _ in range(self.n_val_samples):
|
||||||
|
@ -334,25 +363,83 @@ class PabloCAP(ClassifierAccuracyPrediction):
|
||||||
raise ValueError('unknown aggregation function')
|
raise ValueError('unknown aggregation function')
|
||||||
|
|
||||||
|
|
||||||
|
def get_posteriors_from_h(h, X):
|
||||||
|
if hasattr(h, 'predict_proba'):
|
||||||
|
P = h.predict_proba(X)
|
||||||
|
else:
|
||||||
|
n_classes = len(h.classes_)
|
||||||
|
dec_scores = h.decision_function(X)
|
||||||
|
if n_classes == 1:
|
||||||
|
dec_scores = np.vstack([-dec_scores, dec_scores]).T
|
||||||
|
P = scipy.special.softmax(dec_scores, axis=1)
|
||||||
|
return P
|
||||||
|
|
||||||
|
|
||||||
|
def max_conf(P, keepdims=False):
|
||||||
|
mc = P.max(axis=1, keepdims=keepdims)
|
||||||
|
return mc
|
||||||
|
|
||||||
|
|
||||||
|
def neg_entropy(P, keepdims=False):
|
||||||
|
ne = scipy.stats.entropy(P, axis=1)
|
||||||
|
if keepdims:
|
||||||
|
ne = ne.reshape(-1, 1)
|
||||||
|
return ne
|
||||||
|
|
||||||
|
|
||||||
class QuAcc:
|
class QuAcc:
|
||||||
|
|
||||||
def _get_X_dot(self, X):
|
def _get_X_dot(self, X):
|
||||||
h = self.h
|
h = self.h
|
||||||
if hasattr(h, 'predict_proba'):
|
|
||||||
P = h.predict_proba(X)[:, 1:]
|
|
||||||
else:
|
|
||||||
n_classes = len(h.classes_)
|
|
||||||
P = h.decision_function(X).reshape(-1, n_classes)
|
|
||||||
|
|
||||||
X_dot = safehstack(X, P)
|
P = get_posteriors_from_h(h, X)
|
||||||
|
|
||||||
|
add_covs = []
|
||||||
|
|
||||||
|
if self.add_posteriors:
|
||||||
|
add_covs.append(P[:, 1:])
|
||||||
|
|
||||||
|
if self.add_maxconf:
|
||||||
|
mc = max_conf(P, keepdims=True)
|
||||||
|
add_covs.append(mc)
|
||||||
|
|
||||||
|
if self.add_negentropy:
|
||||||
|
ne = neg_entropy(P, keepdims=True)
|
||||||
|
add_covs.append(ne)
|
||||||
|
|
||||||
|
if self.add_maxinfsoft:
|
||||||
|
lgP = np.log(P)
|
||||||
|
mis = np.max(lgP -lgP.mean(axis=1, keepdims=True), axis=1, keepdims=True)
|
||||||
|
add_covs.append(mis)
|
||||||
|
|
||||||
|
if len(add_covs)>0:
|
||||||
|
X_dot = np.hstack(add_covs)
|
||||||
|
|
||||||
|
if self.add_X:
|
||||||
|
X_dot = safehstack(X, add_covs)
|
||||||
|
|
||||||
return X_dot
|
return X_dot
|
||||||
|
|
||||||
|
|
||||||
class QuAcc1xN2(CAPContingencyTableQ, QuAcc):
|
class QuAcc1xN2(CAPContingencyTableQ, QuAcc):
|
||||||
|
|
||||||
def __init__(self, h: BaseEstimator, acc: callable, q_class: AggregativeQuantifier):
|
def __init__(self,
|
||||||
|
h: BaseEstimator,
|
||||||
|
acc: callable,
|
||||||
|
q_class: AggregativeQuantifier,
|
||||||
|
add_X=True,
|
||||||
|
add_posteriors=True,
|
||||||
|
add_maxconf=False,
|
||||||
|
add_negentropy=False,
|
||||||
|
add_maxinfsoft=False):
|
||||||
self.h = h
|
self.h = h
|
||||||
self.acc = acc
|
self.acc = acc
|
||||||
self.q = EmptySaveQuantifier(q_class)
|
self.q = EmptySaveQuantifier(q_class)
|
||||||
|
self.add_X = add_X
|
||||||
|
self.add_posteriors = add_posteriors
|
||||||
|
self.add_maxconf = add_maxconf
|
||||||
|
self.add_negentropy = add_negentropy
|
||||||
|
self.add_maxinfsoft = add_maxinfsoft
|
||||||
|
|
||||||
def fit(self, val: LabelledCollection):
|
def fit(self, val: LabelledCollection):
|
||||||
pred_labels = self.h.predict(val.X)
|
pred_labels = self.h.predict(val.X)
|
||||||
|
@ -367,17 +454,30 @@ class QuAcc1xN2(CAPContingencyTableQ, QuAcc):
|
||||||
val_dot = LabelledCollection(X_dot, y_dot, classes=classes_dot)
|
val_dot = LabelledCollection(X_dot, y_dot, classes=classes_dot)
|
||||||
self.q.fit(val_dot)
|
self.q.fit(val_dot)
|
||||||
|
|
||||||
def predict_ct(self, X):
|
def predict_ct(self, X, oracle_prev=None):
|
||||||
X_dot = self._get_X_dot(X)
|
X_dot = self._get_X_dot(X)
|
||||||
return self.q.quantify(X_dot)
|
return self.q.quantify(X_dot)
|
||||||
|
|
||||||
|
|
||||||
class QuAccNxN(CAPContingencyTableQ, QuAcc):
|
class QuAccNxN(CAPContingencyTableQ, QuAcc):
|
||||||
|
|
||||||
def __init__(self, h: BaseEstimator, acc: callable, q_class: AggregativeQuantifier):
|
def __init__(self,
|
||||||
|
h: BaseEstimator,
|
||||||
|
acc: callable,
|
||||||
|
q_class: AggregativeQuantifier,
|
||||||
|
add_X=True,
|
||||||
|
add_posteriors=True,
|
||||||
|
add_maxconf=False,
|
||||||
|
add_negentropy=False,
|
||||||
|
add_maxinfsoft=False):
|
||||||
self.h = h
|
self.h = h
|
||||||
self.acc = acc
|
self.acc = acc
|
||||||
self.q_class = q_class
|
self.q_class = q_class
|
||||||
|
self.add_X = add_X
|
||||||
|
self.add_posteriors = add_posteriors
|
||||||
|
self.add_maxconf = add_maxconf
|
||||||
|
self.add_negentropy = add_negentropy
|
||||||
|
self.add_maxinfsoft = add_maxinfsoft
|
||||||
|
|
||||||
def fit(self, val: LabelledCollection):
|
def fit(self, val: LabelledCollection):
|
||||||
pred_labels = self.h.predict(val.X)
|
pred_labels = self.h.predict(val.X)
|
||||||
|
@ -394,7 +494,7 @@ class QuAccNxN(CAPContingencyTableQ, QuAcc):
|
||||||
q_i.fit(data_i)
|
q_i.fit(data_i)
|
||||||
self.q.append(q_i)
|
self.q.append(q_i)
|
||||||
|
|
||||||
def predict_ct(self, X):
|
def predict_ct(self, X, oracle_prev=None):
|
||||||
classes = self.h.classes_
|
classes = self.h.classes_
|
||||||
pred_labels = self.h.predict(X)
|
pred_labels = self.h.predict(X)
|
||||||
X_dot = self._get_X_dot(X)
|
X_dot = self._get_X_dot(X)
|
||||||
|
@ -449,3 +549,194 @@ class EmptySaveQuantifier(BaseQuantifier):
|
||||||
def num_non_empty_classes(self):
|
def num_non_empty_classes(self):
|
||||||
return len(self.old_class_idx)
|
return len(self.old_class_idx)
|
||||||
|
|
||||||
|
|
||||||
|
# Baselines:
|
||||||
|
class ATC(ClassifierAccuracyPrediction):
|
||||||
|
|
||||||
|
VALID_FUNCTIONS = {'maxconf', 'neg_entropy'}
|
||||||
|
|
||||||
|
def __init__(self, h, acc_fn, scoring_fn='maxconf'):
|
||||||
|
assert scoring_fn in ATC.VALID_FUNCTIONS, \
|
||||||
|
f'unknown scoring function, use any from {ATC.VALID_FUNCTIONS}'
|
||||||
|
#assert acc_fn == 'vanilla_accuracy', \
|
||||||
|
# 'use acc_fn=="vanilla_accuracy"; other metris are not yet tested in ATC'
|
||||||
|
self.h = h
|
||||||
|
self.acc_fn = acc_fn
|
||||||
|
self.scoring_fn = scoring_fn
|
||||||
|
|
||||||
|
def get_scores(self, P):
|
||||||
|
if self.scoring_fn == 'maxconf':
|
||||||
|
scores = max_conf(P)
|
||||||
|
else:
|
||||||
|
scores = neg_entropy(P)
|
||||||
|
return scores
|
||||||
|
|
||||||
|
def fit(self, val: LabelledCollection):
|
||||||
|
P = get_posteriors_from_h(self.h, val.X)
|
||||||
|
pred_labels = np.argmax(P, axis=1)
|
||||||
|
true_labels = val.y
|
||||||
|
scores = self.get_scores(P)
|
||||||
|
_, self.threshold = self.__find_ATC_threshold(scores=scores, labels=(pred_labels==true_labels))
|
||||||
|
|
||||||
|
def predict(self, X, oracle_prev=None):
|
||||||
|
P = get_posteriors_from_h(self.h, X)
|
||||||
|
scores = self.get_scores(P)
|
||||||
|
#assert self.acc_fn == 'vanilla_accuracy', \
|
||||||
|
# 'use acc_fn=="vanilla_accuracy"; other metris are not yet tested in ATC'
|
||||||
|
return self.__get_ATC_acc(self.threshold, scores)
|
||||||
|
|
||||||
|
def __find_ATC_threshold(self, scores, labels):
|
||||||
|
# code copy-pasted from https://github.com/saurabhgarg1996/ATC_code/blob/master/ATC_helper.py
|
||||||
|
sorted_idx = np.argsort(scores)
|
||||||
|
|
||||||
|
sorted_scores = scores[sorted_idx]
|
||||||
|
sorted_labels = labels[sorted_idx]
|
||||||
|
|
||||||
|
fp = np.sum(labels == 0)
|
||||||
|
fn = 0.0
|
||||||
|
|
||||||
|
min_fp_fn = np.abs(fp - fn)
|
||||||
|
thres = 0.0
|
||||||
|
for i in range(len(labels)):
|
||||||
|
if sorted_labels[i] == 0:
|
||||||
|
fp -= 1
|
||||||
|
else:
|
||||||
|
fn += 1
|
||||||
|
|
||||||
|
if np.abs(fp - fn) < min_fp_fn:
|
||||||
|
min_fp_fn = np.abs(fp - fn)
|
||||||
|
thres = sorted_scores[i]
|
||||||
|
|
||||||
|
return min_fp_fn, thres
|
||||||
|
|
||||||
|
def __get_ATC_acc(self, thres, scores):
|
||||||
|
# code copy-pasted from https://github.com/saurabhgarg1996/ATC_code/blob/master/ATC_helper.py
|
||||||
|
return np.mean(scores >= thres)
|
||||||
|
|
||||||
|
|
||||||
|
class DoC(ClassifierAccuracyPrediction):
|
||||||
|
|
||||||
|
def __init__(self, h, sample_size, num_samples=100):
|
||||||
|
self.h = h
|
||||||
|
self.sample_size = sample_size
|
||||||
|
self.num_samples = num_samples
|
||||||
|
|
||||||
|
def _get_post_stats(self, X, y):
|
||||||
|
P = get_posteriors_from_h(self.h, X)
|
||||||
|
mc = max_conf(P)
|
||||||
|
pred_labels = np.argmax(P, axis=-1)
|
||||||
|
acc = (y == pred_labels).mean()
|
||||||
|
return mc, acc
|
||||||
|
|
||||||
|
def _doc(self, mc1, mc2):
|
||||||
|
return mc2.mean() - mc1.mean()
|
||||||
|
|
||||||
|
def train_regression(self, v2_mcs, v2_accs):
|
||||||
|
docs = [self._doc(self.v1_mc, v2_mc_i) for v2_mc_i in v2_mcs]
|
||||||
|
target = [self.v1_acc - v2_acc_i for v2_acc_i in v2_accs]
|
||||||
|
docs = np.asarray(docs).reshape(-1,1)
|
||||||
|
target = np.asarray(target)
|
||||||
|
lin_reg = LinearRegression()
|
||||||
|
return lin_reg.fit(docs, target)
|
||||||
|
|
||||||
|
def predict_regression(self, test_mc):
|
||||||
|
docs = np.asarray([self._doc(self.v1_mc, test_mc)]).reshape(-1, 1)
|
||||||
|
pred_acc = self.reg_model.predict(docs)
|
||||||
|
return self.v1_acc - pred_acc
|
||||||
|
|
||||||
|
def fit(self, val: LabelledCollection):
|
||||||
|
v1, v2 = val.split_stratified(train_prop=0.5, random_state=0)
|
||||||
|
|
||||||
|
self.v1_mc, self.v1_acc = self._get_post_stats(*v1.Xy)
|
||||||
|
|
||||||
|
v2_prot = UPP(v2, sample_size=self.sample_size, repeats=self.num_samples, return_type='labelled_collection')
|
||||||
|
v2_stats = [self._get_post_stats(*sample.Xy) for sample in v2_prot()]
|
||||||
|
v2_mcs, v2_accs = list(zip(*v2_stats))
|
||||||
|
|
||||||
|
self.reg_model = self.train_regression(v2_mcs, v2_accs)
|
||||||
|
|
||||||
|
def predict(self, X, oracle_prev=None):
|
||||||
|
P = get_posteriors_from_h(self.h, X)
|
||||||
|
mc = max_conf(P)
|
||||||
|
acc_pred = self.predict_regression(mc)[0]
|
||||||
|
return acc_pred
|
||||||
|
|
||||||
|
"""
|
||||||
|
def doc(self,
|
||||||
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
|
predict_method="predict_proba"):
|
||||||
|
|
||||||
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
f1_average = "binary" if validation.n_classes == 2 else "macro"
|
||||||
|
|
||||||
|
val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED)
|
||||||
|
val1_probs = c_model_predict(val1.X)
|
||||||
|
val1_mc = np.max(val1_probs, axis=-1)
|
||||||
|
val1_preds = np.argmax(val1_probs, axis=-1)
|
||||||
|
val1_acc = metrics.accuracy_score(val1.y, val1_preds)
|
||||||
|
val1_f1 = metrics.f1_score(val1.y, val1_preds, average=f1_average)
|
||||||
|
val2_protocol = APP(
|
||||||
|
val2,
|
||||||
|
n_prevalences=21,
|
||||||
|
repeats=100,
|
||||||
|
return_type="labelled_collection",
|
||||||
|
)
|
||||||
|
val2_prot_mc = []
|
||||||
|
val2_prot_preds = []
|
||||||
|
val2_prot_y = []
|
||||||
|
for v2 in val2_protocol():
|
||||||
|
_probs = c_model_predict(v2.X)
|
||||||
|
_mc = np.max(_probs, axis=-1)
|
||||||
|
_preds = np.argmax(_probs, axis=-1)
|
||||||
|
val2_prot_mc.append(_mc)
|
||||||
|
val2_prot_preds.append(_preds)
|
||||||
|
val2_prot_y.append(v2.y)
|
||||||
|
|
||||||
|
val_scores = np.array([doclib.get_doc(val1_mc, v2_mc) for v2_mc in val2_prot_mc])
|
||||||
|
val_targets_acc = np.array(
|
||||||
|
[
|
||||||
|
val1_acc - metrics.accuracy_score(v2_y, v2_preds)
|
||||||
|
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
reg_acc = LinearRegression().fit(val_scores[:, np.newaxis], val_targets_acc)
|
||||||
|
val_targets_f1 = np.array(
|
||||||
|
[
|
||||||
|
val1_f1 - metrics.f1_score(v2_y, v2_preds, average=f1_average)
|
||||||
|
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
reg_f1 = LinearRegression().fit(val_scores[:, np.newaxis], val_targets_f1)
|
||||||
|
|
||||||
|
report = EvaluationReport(name="doc")
|
||||||
|
for test in protocol():
|
||||||
|
test_probs = c_model_predict(test.X)
|
||||||
|
test_preds = np.argmax(test_probs, axis=-1)
|
||||||
|
test_mc = np.max(test_probs, axis=-1)
|
||||||
|
acc_score = (
|
||||||
|
val1_acc
|
||||||
|
- reg_acc.predict(np.array([[doclib.get_doc(val1_mc, test_mc)]]))[0]
|
||||||
|
)
|
||||||
|
f1_score = (
|
||||||
|
val1_f1 - reg_f1.predict(np.array([[doclib.get_doc(val1_mc, test_mc)]]))[0]
|
||||||
|
)
|
||||||
|
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
||||||
|
meta_f1 = abs(
|
||||||
|
f1_score - metrics.f1_score(test.y, test_preds, average=f1_average)
|
||||||
|
)
|
||||||
|
report.append_row(
|
||||||
|
test.prevalence(),
|
||||||
|
acc=meta_acc,
|
||||||
|
acc_score=acc_score,
|
||||||
|
f1=meta_f1,
|
||||||
|
f1_score=f1_score,
|
||||||
|
)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
def get_doc(probs1, probs2):
|
||||||
|
return np.mean(probs2) - np.mean(probs1)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
|
@ -14,4 +14,19 @@ A Classifier Accuracy Prediction (CAP) method is method tha receives as input:
|
||||||
And implements:
|
And implements:
|
||||||
- fit: trains the CAP
|
- fit: trains the CAP
|
||||||
- predict: predicts the evaluation measure on unseen data (provided, calls predict_ct and acc_func)
|
- predict: predicts the evaluation measure on unseen data (provided, calls predict_ct and acc_func)
|
||||||
- predict_ct: predicts the contingency table
|
- predict_ct: predicts the contingency table
|
||||||
|
|
||||||
|
Important:
|
||||||
|
- When the quantifiers' iperparameters are optimized, we should make sure that the
|
||||||
|
classifier is not being reused, or that the iperparameters do no include any from
|
||||||
|
the underlying classifier
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
- Add additional covariates [done, check]
|
||||||
|
- Add model selection for CAP
|
||||||
|
- Add Doc
|
||||||
|
- Add ATC
|
||||||
|
- Add APP in training and adapt plots and tables
|
||||||
|
- Add plots: error by drift, etc
|
||||||
|
- Add characterization of classifiers in terms of accuracy and use this as a variable
|
||||||
|
analyzing results
|
|
@ -3,23 +3,30 @@ import json
|
||||||
import os
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from os import makedirs
|
|
||||||
from os.path import join
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import time
|
from time import time
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from sklearn.datasets import fetch_rcv1
|
from sklearn.datasets import fetch_rcv1
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
|
from ClassifierAccuracy.models_multiclass import *
|
||||||
|
from quapy.method.aggregative import EMQ, ACC, KDEyML
|
||||||
|
|
||||||
from quapy.method.aggregative import EMQ, ACC
|
|
||||||
from models_multiclass import *
|
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS
|
from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS
|
||||||
from quapy.data.datasets import fetch_reviews
|
from quapy.data.datasets import fetch_reviews
|
||||||
|
|
||||||
|
|
||||||
def gen_classifiers():
|
def gen_classifiers():
|
||||||
|
param_grid = {
|
||||||
|
'C': np.logspace(-4, -4, 9),
|
||||||
|
'class_weight': ['balanced', None]
|
||||||
|
}
|
||||||
|
|
||||||
yield 'LR', LogisticRegression()
|
yield 'LR', LogisticRegression()
|
||||||
|
#yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
|
||||||
#yield 'NB', GaussianNB()
|
#yield 'NB', GaussianNB()
|
||||||
#yield 'SVM(rbf)', SVC()
|
#yield 'SVM(rbf)', SVC()
|
||||||
#yield 'SVM(linear)', LinearSVC()
|
#yield 'SVM(linear)', LinearSVC()
|
||||||
|
@ -27,6 +34,8 @@ def gen_classifiers():
|
||||||
|
|
||||||
def gen_multi_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
|
def gen_multi_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]:
|
||||||
for dataset_name in UCI_MULTICLASS_DATASETS:
|
for dataset_name in UCI_MULTICLASS_DATASETS:
|
||||||
|
if dataset_name == 'wine-quality':
|
||||||
|
continue
|
||||||
if only_names:
|
if only_names:
|
||||||
yield dataset_name, None
|
yield dataset_name, None
|
||||||
else:
|
else:
|
||||||
|
@ -56,21 +65,31 @@ def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledColle
|
||||||
yield cat, (L, V, U)
|
yield cat, (L, V, U)
|
||||||
|
|
||||||
|
|
||||||
def gen_CAP(h, acc_fn)->[str, ClassifierAccuracyPrediction]:
|
def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]:
|
||||||
#yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
|
#yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
|
||||||
yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ)
|
yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
|
||||||
|
#yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
|
||||||
#yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
|
#yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
|
||||||
#yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
|
#yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
|
||||||
yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
#yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
||||||
|
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
|
||||||
|
#yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
|
||||||
|
yield 'DoC', DoC(h, sample_size=qp.environ['SAMPLE_SIZE'])
|
||||||
|
|
||||||
|
|
||||||
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
|
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
|
||||||
acc_fn = None
|
acc_fn = None
|
||||||
yield 'Naive', NaiveCAP(h, acc_fn)
|
yield 'Naive', NaiveCAP(h, acc_fn)
|
||||||
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
|
#yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
|
#yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
|
||||||
|
yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
|
||||||
|
#yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
|
||||||
|
#yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
|
#yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
|
||||||
|
yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True)
|
||||||
|
#yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True)
|
||||||
|
#yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
#yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
#yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
|
||||||
#yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
|
#yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
|
||||||
#yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
|
#yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
|
||||||
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
|
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
|
||||||
|
@ -100,17 +119,23 @@ def fit_method(method, V):
|
||||||
return method, t_train
|
return method, t_train
|
||||||
|
|
||||||
|
|
||||||
def predictionsCAP(method, test_prot):
|
def predictionsCAP(method, test_prot, oracle=False):
|
||||||
tinit = time()
|
tinit = time()
|
||||||
estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
|
if not oracle:
|
||||||
|
estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
|
||||||
|
else:
|
||||||
|
estim_accs = [method.predict(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()]
|
||||||
t_test_ave = (time() - tinit) / test_prot.total()
|
t_test_ave = (time() - tinit) / test_prot.total()
|
||||||
return estim_accs, t_test_ave
|
return estim_accs, t_test_ave
|
||||||
|
|
||||||
|
|
||||||
def predictionsCAPcont_table(method, test_prot, gen_acc_measure):
|
def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False):
|
||||||
estim_accs_dict = {}
|
estim_accs_dict = {}
|
||||||
tinit = time()
|
tinit = time()
|
||||||
estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
|
if not oracle:
|
||||||
|
estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
|
||||||
|
else:
|
||||||
|
estim_tables = [method.predict_ct(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()]
|
||||||
for acc_name, acc_fn in gen_acc_measure():
|
for acc_name, acc_fn in gen_acc_measure():
|
||||||
estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
|
estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
|
||||||
t_test_ave = (time() - tinit) / test_prot.total()
|
t_test_ave = (time() - tinit) / test_prot.total()
|
||||||
|
@ -184,35 +209,6 @@ def cap_errors(true_acc, estim_acc):
|
||||||
return np.abs(true_acc - estim_acc)
|
return np.abs(true_acc - estim_acc)
|
||||||
|
|
||||||
|
|
||||||
def plot_diagonal(cls_name, measure_name, results, base_dir='plots'):
|
|
||||||
|
|
||||||
makedirs(base_dir, exist_ok=True)
|
|
||||||
makedirs(join(base_dir, measure_name), exist_ok=True)
|
|
||||||
|
|
||||||
# Create scatter plot
|
|
||||||
plt.figure(figsize=(10, 10))
|
|
||||||
plt.xlim(0, 1)
|
|
||||||
plt.ylim(0, 1)
|
|
||||||
plt.plot([0, 1], [0, 1], color='black', linestyle='--')
|
|
||||||
|
|
||||||
for method_name in results.keys():
|
|
||||||
xs = results[method_name]['true_acc']
|
|
||||||
ys = results[method_name]['estim_acc']
|
|
||||||
err = cap_errors(xs, ys).mean()
|
|
||||||
#pear_cor, _ = 0, 0 #pearsonr(xs, ys)
|
|
||||||
plt.scatter(xs, ys, label=f'{method_name} {err:.3f}', alpha=0.6)
|
|
||||||
|
|
||||||
plt.legend()
|
|
||||||
|
|
||||||
# Add labels and title
|
|
||||||
plt.xlabel(f'True {measure_name}')
|
|
||||||
plt.ylabel(f'Estimated {measure_name}')
|
|
||||||
|
|
||||||
# Display the plot
|
|
||||||
# plt.show()
|
|
||||||
plt.savefig(join(base_dir, measure_name, 'diagonal_'+cls_name+'.png'))
|
|
||||||
|
|
||||||
|
|
||||||
def getpath(basedir, cls_name, acc_name, dataset_name, method_name):
|
def getpath(basedir, cls_name, acc_name, dataset_name, method_name):
|
||||||
return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
|
return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
|
||||||
|
|
||||||
|
@ -275,7 +271,7 @@ def gen_tables(basedir, datasets):
|
||||||
classifiers = [classifier for classifier, _ in gen_classifiers()]
|
classifiers = [classifier for classifier, _ in gen_classifiers()]
|
||||||
measures = [measure for measure, _ in gen_acc_measure()]
|
measures = [measure for measure, _ in gen_acc_measure()]
|
||||||
|
|
||||||
os.makedirs('tables', exist_ok=True)
|
os.makedirs('./tables', exist_ok=True)
|
||||||
|
|
||||||
tex_doc = """
|
tex_doc = """
|
||||||
\\documentclass[10pt,a4paper]{article}
|
\\documentclass[10pt,a4paper]{article}
|
|
@ -151,6 +151,8 @@ class LabelledCollection:
|
||||||
indexes_sample = []
|
indexes_sample = []
|
||||||
for class_, n_requested in n_requests.items():
|
for class_, n_requested in n_requests.items():
|
||||||
n_candidates = len(self.index[class_])
|
n_candidates = len(self.index[class_])
|
||||||
|
#print(n_candidates)
|
||||||
|
#print(n_requested, 'rq')
|
||||||
index_sample = self.index[class_][
|
index_sample = self.index[class_][
|
||||||
np.random.choice(n_candidates, size=n_requested, replace=True)
|
np.random.choice(n_candidates, size=n_requested, replace=True)
|
||||||
] if n_requested > 0 else []
|
] if n_requested > 0 else []
|
||||||
|
|
|
@ -211,8 +211,9 @@ class GridSearchQ(BaseQuantifier):
|
||||||
self._sout(f'error={status}')
|
self._sout(f'error={status}')
|
||||||
|
|
||||||
def fit(self, training: LabelledCollection):
|
def fit(self, training: LabelledCollection):
|
||||||
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
"""
|
||||||
the error metric.
|
Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||||
|
the error metric.
|
||||||
|
|
||||||
:param training: the training set on which to optimize the hyperparameters
|
:param training: the training set on which to optimize the hyperparameters
|
||||||
:return: self
|
:return: self
|
||||||
|
|
Loading…
Reference in New Issue