Compare commits

...

2 Commits

Author SHA1 Message Date
Lorenzo Volpi bd2ea3334b gitignore updated 2024-04-11 13:00:42 +02:00
Lorenzo Volpi 0778450f7d gitignore updated 2024-04-11 12:58:19 +02:00
4 changed files with 22 additions and 422 deletions

37
.gitignore vendored
View File

@ -1,30 +1,37 @@
*.code-workspace
quavenv/*
*.pdf
#virtual envs
quavenv/*
.venv/*
#vscode config
.vscode/*
__pycache__/*
baselines/__pycache__/*
baselines/densratio/__pycache__/*
qcdash/__pycache__/*
qcpanel/__pycache__/*
quacc/__pycache__/*
quacc/*/__pycache__/*
tests/__pycache__/*
tests/*/__pycache__/*
tests/*/*/__pycache__/*
htmlcov/*
test*.py
#cache files
*__pycache__*
# coverage files
htmlcov/*
*.coverage
.coverage
scp_sync.py
#results
out/*
output/*
# !output/main/
#test stuff
test*.py
#pyenv
.python-version
poetry.lock
#poetry
poetry.lock
scp_sync.py
selected_gs.py
accuracy_predictiion*.py

View File

@ -1,90 +0,0 @@
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import quapy as qp
from method.kdey import KDEyML, KDEyCS, KDEyHD
from quapy.protocol import APP
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
datasets = qp.datasets.UCI_DATASETS
# target = 'f1'
target = 'acc'
errors = []
# dataset_name = datasets[-2]
for dataset_name in datasets:
if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
continue
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
print(f'dataset name = {dataset_name}')
print(f'#train = {len(train)}')
print(f'#test = {len(test)}')
cls = LogisticRegression()
train, val = train.split_stratified(random_state=0)
cls.fit(*train.Xy)
y_val = val.labels
y_hat_val = cls.predict(val.instances)
for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
print('='*80)
y_hat = cls.predict(sample.instances)
y = sample.labels
if target == 'acc':
acc = (y_hat==y).mean()
else:
acc = f1_score(y, y_hat, zero_division=0)
q = EMQ(cls)
q.fit(train, fit_classifier=False)
# q = EMQ(cls)
# q.fit(train, val_split=val, fit_classifier=False)
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
p_hat = q.quantify(sample.instances)
cont_table_hat = p_hat * M_hat
tp = cont_table_hat[1,1]
tn = cont_table_hat[0,0]
fn = cont_table_hat[0,1]
fp = cont_table_hat[1,0]
if target == 'acc':
acc_hat = (tp+tn)
else:
den = (2*tp + fn + fp)
if den > 0:
acc_hat = 2*tp / den
else:
acc_hat = 0
error = abs(acc - acc_hat)
errors.append(error)
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('M-true:\n', M_true)
print('M-hat:\n', M_hat)
print('cont_table:\n', cont_table_hat)
print(f'classifier accuracy={acc:.3f}')
print(f'estimated accuracy={acc_hat:.3f}')
print(f'estimation error={error:.4f}')
print('process end')
print('='*80)
print(f'mean error = {np.mean(errors)}')
print(f'std error = {np.std(errors)}')

View File

@ -1,269 +0,0 @@
import numpy as np
import scipy.special
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import quapy as qp
from quapy.protocol import APP
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
from sklearn import clone
import quapy.functional as F
# datasets = qp.datasets.UCI_DATASETS
datasets = ['imdb']
# target = 'f1'
target = 'acc'
errors = []
def method_1(cls, train, val, sample, y=None, y_hat=None):
"""
Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
by relying on the PPS assumptions.
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
y_val = val.labels
y_hat_val = cls.predict(val.instances)
# q = EMQ(LogisticRegression(class_weight='balanced'))
# q.fit(val, fit_classifier=True)
q = EMQ(cls)
q.fit(train, fit_classifier=False)
# q = KDEyML(cls)
# q.fit(train, val_split=val, fit_classifier=False)
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
p_hat = q.quantify(sample.instances)
cont_table_hat = p_hat * M_hat
# cont_table_hat = np.clip(cont_table_hat, 0, 1)
# cont_table_hat = cont_table_hat / cont_table_hat.sum()
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('M-true:\n', M_true)
print('M-hat:\n', M_hat)
print('cont_table:\n', cont_table_hat)
print('cont_table Sum :\n', cont_table_hat.sum())
tp = cont_table_hat[1, 1]
tn = cont_table_hat[0, 0]
fn = cont_table_hat[0, 1]
fp = cont_table_hat[1, 0]
return tn, fn, fp, tp
def method_2(cls, train, val, sample, y=None, y_hat=None):
"""
Assume P and Q are the training and test distributions
Solves the following system of linear equations:
tp + fp = CC (the classify & count estimate, observed)
fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
tp + fp + fn + tn = 1 (trivial)
There are 4 unknowns and 3 equations. The fourth required one is established
by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
in Q (unknown, hereafter tpr_Q) should
be the same. This means:
tpr = tpr_Q = tp / (tp + fn)
after some manipulation:
tp (tpr-1) + fn (tpr) = 0 <-- our last equation
Note that the last equation relies on the estimate tpr. It is likely that, the more
positives we have, the more reliable this estimate is. This suggests that, in cases
in which we have more negatives in the validation set than positives, it might be
convenient to resort to the true negative rate (tnr) instead. This gives rise to
the alternative fourth equation:
tn (tnr-1) + fp (tnr) = 0
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
y_val = val.labels
y_hat_val = cls.predict(val.instances)
q = ACC(cls)
q.fit(train, val_split=val, fit_classifier=False)
p_hat = q.quantify(sample.instances)
pos_prev = p_hat[1]
# pos_prev = sample.prevalence()[1]
cc = CC(cls)
cc.fit(train, fit_classifier=False)
cc_prev = cc.quantify(sample.instances)[1]
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
cont_table_true = sample.prevalence() * M_true
if val.prevalence()[1] > 0.5:
# in this case, the tpr might be a more reliable estimate than tnr
tpr_hat = M_hat[1, 1]
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[0, tpr_hat, 0, tpr_hat - 1]
])
else:
# in this case, the tnr might be a more reliable estimate than tpr
tnr_hat = M_hat[0, 0]
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[tnr_hat-1, 0, tnr_hat, 0]
])
b = np.asarray(
[cc_prev, pos_prev, 1, 0]
)
tn, fn, fp, tp = np.linalg.solve(A, b)
cont_table_estim = np.asarray([
[tn, fn],
[fp, tp]
])
# if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
# cont_table_estim = scipy.special.softmax(cont_table_estim)
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('true_cont_table:\n', cont_table_true)
print('estim_cont_table:\n', cont_table_estim)
# print('true_tpr', M_true[1,1])
# print('estim_tpr', tpr_hat)
return tn, fn, fp, tp
def method_3(cls, train, val, sample, y=None, y_hat=None):
"""
This is just method 2 but without involving any quapy's quantifier.
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
classes = val.classes_
y_val = val.labels
y_hat_val = cls.predict(val.instances)
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
y_hat_test = cls.predict(sample.instances)
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
tpr_hat = M_hat[1,1]
fpr_hat = M_hat[1,0]
tnr_hat = M_hat[0,0]
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
pos_prev_val = val.prevalence()[1]
if pos_prev_val > 0.5:
# in this case, the tpr might be a more reliable estimate than tnr
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[0, tpr_hat, 0, tpr_hat - 1]
])
else:
# in this case, the tnr might be a more reliable estimate than tpr
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[tnr_hat-1, 0, tnr_hat, 0]
])
b = np.asarray(
[pos_prev_cc, pos_prev_test_hat, 1, 0]
)
tn, fn, fp, tp = np.linalg.solve(A, b)
return tn, fn, fp, tp
def cls_eval_from_counters(tn, fn, fp, tp):
if target == 'acc':
acc_hat = (tp + tn)
else:
den = (2 * tp + fn + fp)
if den > 0:
acc_hat = 2 * tp / den
else:
acc_hat = 0
return acc_hat
def cls_eval_from_labels(y, y_hat):
if target == 'acc':
acc = (y_hat == y).mean()
else:
acc = f1_score(y, y_hat, zero_division=0)
return acc
for dataset_name in datasets:
train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
for train in train_prot():
if np.product(train.prevalence()) == 0:
# skip experiments with no positives or no negatives in training
continue
cls = LogisticRegression(class_weight='balanced')
train, val = train.split_stratified(train_prop=0.5, random_state=0)
print(f'dataset name = {dataset_name}')
print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
cls.fit(*train.Xy)
for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')():
print('='*80)
y_hat = cls.predict(sample.instances)
y = sample.labels
acc_true = cls_eval_from_labels(y, y_hat)
tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
error = abs(acc_true - acc_hat)
errors.append(error)
print(f'classifier accuracy={acc_true:.3f}')
print(f'estimated accuracy={acc_hat:.3f}')
print(f'estimation error={error:.4f}')
print('process end')
print('='*80)
print(f'mean error = {np.mean(errors)}')
print(f'std error = {np.std(errors)}')

View File

@ -1,48 +0,0 @@
import numpy as np
from quacc.evaluation.report import DatasetReport
datasets = [
"imdb/imdb.pickle",
"rcv1_CCAT/rcv1_CCAT.pickle",
"rcv1_GCAT/rcv1_GCAT.pickle",
"rcv1_MCAT/rcv1_MCAT.pickle",
]
gs = {
"sld_lr_gs": [
"bin_sld_lr_gs",
"mul_sld_lr_gs",
"m3w_sld_lr_gs",
],
"kde_lr_gs": [
"bin_kde_lr_gs",
"mul_kde_lr_gs",
"m3w_kde_lr_gs",
],
}
for dst in datasets:
dr = DatasetReport.unpickle("output/main/" + dst)
print(f"{dst}\n")
for name, methods in gs.items():
print(f"{name}")
sel_methods = [
{k: v for k, v in cr.fit_scores.items() if k in methods} for cr in dr.crs
]
best_methods = [
list(ms.keys())[np.argmin(list(ms.values()))] for ms in sel_methods
]
m_cnt = []
for m in methods:
m_cnt.append((np.array(best_methods) == m).nonzero()[0].shape[0])
m_cnt = np.array(m_cnt)
m_freq = m_cnt / len(best_methods)
for n in methods:
print(n, end="\t")
print()
for v in m_freq:
print(f"{v*100:.2f}", end="\t")
print("\n\n")