import warnings from sklearn.linear_model import LogisticRegression import quapy as qp from BayesianKDEy._bayeisan_kdey import BayesianKDEy from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot from method.confidence import ConfidenceIntervals from quapy.functional import strprev from quapy.method.aggregative import KDEyML from quapy.protocol import UPP import quapy.functional as F import numpy as np from tqdm import tqdm from scipy.stats import dirichlet if __name__ == '__main__': qp.environ["SAMPLE_SIZE"] = 500 cls = LogisticRegression() bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0) datasets = qp.datasets.UCI_BINARY_DATASETS train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test # train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test with qp.util.temp_seed(0): print('fitting KDEy') bayes_kdey.fit(*train.Xy) shifted = test.sampling(500, *[0.2, 0.8]) # shifted = test.sampling(500, *test.prevalence()[::-1]) # shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes)) prev_hat = bayes_kdey.predict(shifted.X) mae = qp.error.mae(shifted.prevalence(), prev_hat) print(f'true_prev={strprev(shifted.prevalence())}') print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}') prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X) mae = qp.error.mae(shifted.prevalence(), prev_hat) print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}') print(f'CI={conf_interval}') print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}') print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%') if train.n_classes == 3: plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence()) # plot_prev_points_matplot(samples) # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True) # print(report.mean(numeric_only=True))