57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
import warnings
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
import quapy as qp
|
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
|
from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
|
|
from method.confidence import ConfidenceIntervals
|
|
from quapy.functional import strprev
|
|
from quapy.method.aggregative import KDEyML
|
|
from quapy.protocol import UPP
|
|
import quapy.functional as F
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
from scipy.stats import dirichlet
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
qp.environ["SAMPLE_SIZE"] = 500
|
|
cls = LogisticRegression()
|
|
bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0)
|
|
|
|
datasets = qp.datasets.UCI_BINARY_DATASETS
|
|
train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test
|
|
|
|
# train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
|
|
|
|
with qp.util.temp_seed(0):
|
|
print('fitting KDEy')
|
|
bayes_kdey.fit(*train.Xy)
|
|
|
|
shifted = test.sampling(500, *[0.2, 0.8])
|
|
# shifted = test.sampling(500, *test.prevalence()[::-1])
|
|
# shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
|
|
prev_hat = bayes_kdey.predict(shifted.X)
|
|
mae = qp.error.mae(shifted.prevalence(), prev_hat)
|
|
print(f'true_prev={strprev(shifted.prevalence())}')
|
|
print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
|
|
|
|
prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X)
|
|
|
|
mae = qp.error.mae(shifted.prevalence(), prev_hat)
|
|
print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}')
|
|
print(f'CI={conf_interval}')
|
|
print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
|
|
print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%')
|
|
|
|
if train.n_classes == 3:
|
|
plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
|
|
# plot_prev_points_matplot(samples)
|
|
|
|
# report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
|
|
# print(report.mean(numeric_only=True))
|
|
|
|
|