QuaPy/BayesianKDEy/quick_experiment_bayesian_k...

57 lines
2.2 KiB
Python

import warnings
from sklearn.linear_model import LogisticRegression
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
from method.confidence import ConfidenceIntervals
from quapy.functional import strprev
from quapy.method.aggregative import KDEyML
from quapy.protocol import UPP
import quapy.functional as F
import numpy as np
from tqdm import tqdm
from scipy.stats import dirichlet
if __name__ == '__main__':
qp.environ["SAMPLE_SIZE"] = 500
cls = LogisticRegression()
bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0)
datasets = qp.datasets.UCI_BINARY_DATASETS
train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test
# train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
with qp.util.temp_seed(0):
print('fitting KDEy')
bayes_kdey.fit(*train.Xy)
shifted = test.sampling(500, *[0.2, 0.8])
# shifted = test.sampling(500, *test.prevalence()[::-1])
# shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
prev_hat = bayes_kdey.predict(shifted.X)
mae = qp.error.mae(shifted.prevalence(), prev_hat)
print(f'true_prev={strprev(shifted.prevalence())}')
print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X)
mae = qp.error.mae(shifted.prevalence(), prev_hat)
print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}')
print(f'CI={conf_interval}')
print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%')
if train.n_classes == 3:
plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
# plot_prev_points_matplot(samples)
# report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
# print(report.mean(numeric_only=True))