reduction kdey

This commit is contained in:
Alejandro Moreo Fernandez 2024-09-25 13:34:34 +02:00
parent 4a3b18b3a3
commit da006ee89a
2 changed files with 51 additions and 16 deletions

View File

@ -13,6 +13,7 @@ import quapy.functional as F
from sklearn.metrics.pairwise import rbf_kernel from sklearn.metrics.pairwise import rbf_kernel
from scipy import optimize from scipy import optimize
from tqdm import tqdm from tqdm import tqdm
import quapy.functional as F
epsilon = 1e-10 epsilon = 1e-10
@ -102,6 +103,7 @@ class KDEyMLauto(KDEyML):
bounds = [(0.00001, 1)] bounds = [(0.00001, 1)]
r = optimize.minimize(neg_loglikelihood_bandwidth, x0=[current_bandwidth], method='SLSQP', bounds=bounds) r = optimize.minimize(neg_loglikelihood_bandwidth, x0=[current_bandwidth], method='SLSQP', bounds=bounds)
print(f'iterations-bandwidth={r.nit}') print(f'iterations-bandwidth={r.nit}')
assert r.success, 'Process did not converge!'
return r.x[0] return r.x[0]
def optim_minimize_both(self, current_bandwidth, current_prev, tr_posteriors, tr_y, te_posteriors, classes): def optim_minimize_both(self, current_bandwidth, current_prev, tr_posteriors, tr_y, te_posteriors, classes):
@ -120,6 +122,7 @@ class KDEyMLauto(KDEyML):
prevalence_bandwidth = np.append(current_prev, current_bandwidth) prevalence_bandwidth = np.append(current_prev, current_bandwidth)
r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints) r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints)
print(f'iterations-both={r.nit}') print(f'iterations-both={r.nit}')
assert r.success, 'Process did not converge!'
prev_band = r.x prev_band = r.x
current_prevalence = prev_band[:-1] current_prevalence = prev_band[:-1]
current_bandwidth = prev_band[-1] current_bandwidth = prev_band[-1]
@ -141,6 +144,7 @@ class KDEyMLauto(KDEyML):
prevalence_bandwidth = np.concatenate((current_prev, current_bandwidth)) prevalence_bandwidth = np.concatenate((current_prev, current_bandwidth))
r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints) r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints)
print(f'iterations-both-fine={r.nit}') print(f'iterations-both-fine={r.nit}')
assert r.success, 'Process did not converge!'
prev_band = r.x prev_band = r.x
current_prevalence = prev_band[:n_classes] current_prevalence = prev_band[:n_classes]
current_bandwidth = prev_band[n_classes:] current_bandwidth = prev_band[n_classes:]
@ -213,7 +217,7 @@ class KDEyMLauto(KDEyML):
init_prev = np.full(fill_value=1 / n_classes, shape=(n_classes,)) init_prev = np.full(fill_value=1 / n_classes, shape=(n_classes,))
def neglikelihood_band(bandwidth): def neglikelihood_band(bandwidth):
mix_densities = self.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth) mix_densities = self.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth[0])
test_densities = [self.pdf(kde_i, te_posteriors) for kde_i in mix_densities] test_densities = [self.pdf(kde_i, te_posteriors) for kde_i in mix_densities]
def neg_loglikelihood_prev(prev): def neg_loglikelihood_prev(prev):
@ -225,10 +229,11 @@ class KDEyMLauto(KDEyML):
return neglikelihood return neglikelihood
bounds = [(0.0001, 1)] bounds = [(0.0001, 0.2)]
r = optimize.minimize(neglikelihood_band, x0=[0.001], method='SLSQP', bounds=bounds) r = optimize.minimize(neglikelihood_band, x0=[0.001], method='SLSQP', bounds=bounds)
best_band = r.x[0] best_band = r.x[0]
assert r.success, 'Process did not converge!'
print(f'solved in nit={r.nit}') print(f'solved in nit={r.nit}')
return best_band return best_band
@ -247,8 +252,9 @@ def optim_minimize(loss: Callable, init_prev: np.ndarray, return_loss=False):
# solutions are bounded to those contained in the unit-simplex # solutions are bounded to those contained in the unit-simplex
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1] bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1 constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
r = optimize.minimize(loss, x0=init_prev, method='SLSQP', bounds=bounds, constraints=constraints, tol=1e-10) r = optimize.minimize(loss, x0=init_prev, method='SLSQP', bounds=bounds, constraints=constraints)
# print(f'iterations-prevalence={r.nit}') # print(f'iterations-prevalence={r.nit}')
assert r.success, 'Process did not converge!'
if return_loss: if return_loss:
return r.x, r.fun return r.x, r.fun
else: else:
@ -299,27 +305,36 @@ class KDEyMLauto2(KDEyML):
if self.target == 'likelihood+': if self.target == 'likelihood+':
def neg_loglikelihood_band_(bandwidth): def neg_loglikelihood_band_(bandwidth):
bandwidth=bandwidth[0]
mix_densities = self.get_mixture_components(*train.Xy, train.classes_, bandwidth) mix_densities = self.get_mixture_components(*train.Xy, train.classes_, bandwidth)
loss_accum = 0 loss_accum = 0
for (sample, prevtrue) in prot():
test_densities2 = [self.pdf(kde_i, sample) for kde_i in mix_densities]
for (sample, prev) in tqdm(prot(), total=repeats): def neg_loglikelihood_prev(prev):
test_densities = [self.pdf(kde_i, sample) for kde_i in mix_densities] test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip(prev, test_densities2))
def neg_loglikelihood_prev_(prev):
test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip(prev, test_densities))
test_loglikelihood = np.log(test_mixture_likelihood + epsilon) test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
return -np.sum(test_loglikelihood) nll = -np.sum(test_loglikelihood)
# print(f'\t\tprev={F.strprev(prev)} got {nll=}')
return nll
pred_prev, loss_val = optim_minimize(neg_loglikelihood_prev_, init_prev, return_loss=True) init_prev = np.full(fill_value=1 / n_classes, shape=(n_classes,))
loss_accum += loss_val pred_prev, neglikelihood = optim_minimize(neg_loglikelihood_prev, init_prev, return_loss=True)
# print(f'\t\tprev={F.strprev(pred_prev)} (true={F.strprev(prev)}) got {neglikelihood=}')
loss_accum += neglikelihood
print(f'\t{bandwidth=:.8f} got {loss_accum=:.8f}')
return loss_accum return loss_accum
bounds = [tuple((0.0001, 0.2))] bounds = [tuple((0.0001, 0.2))]
init_bandwidth = 0.05 init_bandwidth = 0.1
r = optimize.minimize(neg_loglikelihood_band_, x0=[init_bandwidth], method='SLSQP', bounds=bounds) r = optimize.minimize(neg_loglikelihood_band_, x0=[init_bandwidth], method='Nelder-Mead', bounds=bounds, tol=1)
best_band = r.x[0] best_band = r.x[0]
best_loss_val = r.fun
nit = r.nit nit = r.nit
assert r.success, 'Process did not converge!'
#found bandwidth=0.00994664 after nit=3 iterations loss_val=-212247.24305)
else: else:
best_band = None best_band = None
@ -350,5 +365,24 @@ class KDEyMLauto2(KDEyML):
best_band = bandwidth best_band = bandwidth
nit=20 nit=20
print(f'found bandwidth={best_band:.4f} after {nit=} iterations') # (loss_val={best_loss_val:.5f})') print(f'found bandwidth={best_band:.8f} after {nit=} iterations loss_val={best_loss_val:.5f})')
self.bandwidth_ = best_band self.bandwidth_ = best_band
class KDEyMLred(KDEyML):
def __init__(self, classifier: BaseEstimator=None, val_split=5, bandwidth=0.1, random_state=None, reduction=100, max_reduced=500):
self.classifier = qp._get_classifier(classifier)
self.val_split = val_split
self.bandwidth = KDEBase._check_bandwidth(bandwidth)
self.reduction = reduction
self.max_reduced = max_reduced
self.random_state = random_state
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
n_classes = classif_predictions.n_classes
tr_length = min(self.reduction * n_classes, self.max_reduced)
if len(classif_predictions) > tr_length:
classif_predictions = classif_predictions.sampling(tr_length)
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.classes_, self.bandwidth)
return self

View File

@ -7,7 +7,7 @@ import numpy as np
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
import quapy as qp import quapy as qp
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2 from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred
from quapy.method.aggregative import PACC, EMQ, KDEyML from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP from quapy.protocol import UPP
@ -35,10 +35,11 @@ METHODS = [
('PACC', PACC(newLR()), wrap_hyper(logreg_grid)), ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
('EMQ', EMQ(newLR()), wrap_hyper(logreg_grid)), ('EMQ', EMQ(newLR()), wrap_hyper(logreg_grid)),
('KDEy-ML', KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}), ('KDEy-ML', KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
('KDEy-MLred', KDEyMLred(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
('KDEy-ML-scott', KDEyML(newLR(), bandwidth='scott'), wrap_hyper(logreg_grid)), ('KDEy-ML-scott', KDEyML(newLR(), bandwidth='scott'), wrap_hyper(logreg_grid)),
('KDEy-ML-silver', KDEyML(newLR(), bandwidth='silverman'), wrap_hyper(logreg_grid)), ('KDEy-ML-silver', KDEyML(newLR(), bandwidth='silverman'), wrap_hyper(logreg_grid)),
('KDEy-ML-autoLike', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood'), wrap_hyper(logreg_grid)), ('KDEy-ML-autoLike', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood'), wrap_hyper(logreg_grid)),
('KDEy-ML-autoLike+', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood+'), wrap_hyper(logreg_grid)), # ('KDEy-ML-autoLike+', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood+'), wrap_hyper(logreg_grid)), <-- no funciona
('KDEy-ML-autoAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mae'), wrap_hyper(logreg_grid)), ('KDEy-ML-autoAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mae'), wrap_hyper(logreg_grid)),
('KDEy-ML-autoRAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae'), wrap_hyper(logreg_grid)), ('KDEy-ML-autoRAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae'), wrap_hyper(logreg_grid)),
] ]