step rate adaptation
This commit is contained in:
parent
3dba708fe4
commit
ccb634fae5
|
|
@ -11,7 +11,7 @@ from tqdm import tqdm
|
|||
from scipy.stats import dirichlet
|
||||
|
||||
|
||||
def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
|
||||
def bayesian(kdey, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
|
||||
"""
|
||||
Bayes:
|
||||
P(prev|data) = P(data|prev) P(prev) / P(data)
|
||||
|
|
@ -26,6 +26,7 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
|
|||
return np.exp(kde.score_samples(X))
|
||||
|
||||
X = probabilistic_classifier.predict_proba(data)
|
||||
kdes = kdey.mix_densities
|
||||
test_densities = np.asarray([pdf(kde_i, X) for kde_i in kdes])
|
||||
|
||||
def log_likelihood(prev, epsilon=1e-10):
|
||||
|
|
@ -44,9 +45,9 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
|
|||
def log_prior(prev):
|
||||
return 0
|
||||
|
||||
def sample_neighbour(prev):
|
||||
dir_noise = np.random.normal(scale=0.05, size=len(prev))
|
||||
# neighbour = F.normalize_prevalence(prev + dir_noise, method='clip')
|
||||
def sample_neighbour(prev, step_size=0.05):
|
||||
# random-walk Metropolis-Hastings
|
||||
dir_noise = np.random.normal(scale=step_size, size=len(prev))
|
||||
neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
|
||||
return neighbour
|
||||
|
||||
|
|
@ -54,22 +55,33 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
|
|||
current_prev = F.uniform_prevalence(n_classes) if init is None else init
|
||||
current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
|
||||
|
||||
# Metropolis-Hastings
|
||||
# Metropolis-Hastings with adaptive rate
|
||||
step_size = 0.05
|
||||
target_acceptance = 0.3
|
||||
adapt_rate = 0.01
|
||||
acceptance_history = []
|
||||
|
||||
samples = []
|
||||
for _ in tqdm(range(MAX_ITER), total=MAX_ITER):
|
||||
proposed_prev = sample_neighbour(current_prev)
|
||||
for i in tqdm(range(MAX_ITER), total=MAX_ITER):
|
||||
proposed_prev = sample_neighbour(current_prev, step_size)
|
||||
|
||||
# probability of acceptance
|
||||
proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
|
||||
acceptance = proposed_likelihood - current_likelihood
|
||||
|
||||
# decide acceptance
|
||||
if np.log(np.random.rand()) < acceptance:
|
||||
# accept
|
||||
accepted = np.log(np.random.rand()) < acceptance
|
||||
if accepted:
|
||||
current_prev = proposed_prev
|
||||
current_likelihood = proposed_likelihood
|
||||
|
||||
samples.append(current_prev)
|
||||
acceptance_history.append(1. if accepted else 0.)
|
||||
|
||||
if i < warmup and i%10==0 and len(acceptance_history)>=100:
|
||||
recent_accept_rate = np.mean(acceptance_history[-100:])
|
||||
# print(f'{i=} recent_accept_rate={recent_accept_rate:.4f} (current step_size={step_size:.4f})')
|
||||
step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
|
||||
|
||||
# remove "warmup" initial iterations
|
||||
samples = np.asarray(samples[warmup:])
|
||||
|
|
@ -81,31 +93,34 @@ if __name__ == '__main__':
|
|||
cls = LogisticRegression()
|
||||
kdey = KDEyML(cls)
|
||||
|
||||
train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean', standardize=True).train_test
|
||||
train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
|
||||
|
||||
with qp.util.temp_seed(2):
|
||||
print('fitting KDEy')
|
||||
kdey.fit(*train.Xy)
|
||||
|
||||
# shifted = test.sampling(500, *[0.7, 0.1, 0.2])
|
||||
shifted = test.sampling(500, *test.prevalence()[::-1])
|
||||
# shifted = test.sampling(500, *test.prevalence()[::-1])
|
||||
shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
|
||||
prev_hat = kdey.predict(shifted.X)
|
||||
mae = qp.error.mae(shifted.prevalence(), prev_hat)
|
||||
print(f'true_prev={strprev(shifted.prevalence())}, prev_hat={strprev(prev_hat)}, {mae=:.4f}')
|
||||
print(f'true_prev={strprev(shifted.prevalence())}')
|
||||
print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
|
||||
|
||||
kdes = kdey.mix_densities
|
||||
h = kdey.classifier
|
||||
samples = bayesian(kdes, shifted.X, h, init=None, MAX_ITER=5_000, warmup=1_000)
|
||||
samples = bayesian(kdey, shifted.X, h, init=None, MAX_ITER=5_000, warmup=3_000)
|
||||
|
||||
print(f'mean posterior {strprev(samples.mean(axis=0))}')
|
||||
conf_interval = ConfidenceIntervals(samples, confidence_level=0.95)
|
||||
print()
|
||||
mae = qp.error.mae(shifted.prevalence(), conf_interval.point_estimate())
|
||||
print(f'mean posterior {strprev(samples.mean(axis=0))}, {mae=:.4f}')
|
||||
print(f'CI={conf_interval}')
|
||||
print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
|
||||
print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.20f}%')
|
||||
|
||||
if train.n_classes == 3:
|
||||
plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
|
||||
# plot_prev_points_matplot(samples)
|
||||
|
||||
|
||||
# report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
|
||||
# print(report.mean(numeric_only=True))
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
|
|||
|
||||
# Plot
|
||||
fig, ax = plt.subplots(figsize=(6, 6))
|
||||
ax.scatter(*cartesian(prevs), s=50, alpha=0.05, edgecolors='none', label='samples')
|
||||
ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples')
|
||||
ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
|
||||
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
|
||||
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
|
||||
|
|
|
|||
Loading…
Reference in New Issue