QuaPy/distribution_matching/method_kdey_closed_efficien...

from cgi import test
import os
import sys
from typing import Union, Callable
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
from scipy.stats import multivariate_normal
import quapy as qp
from quapy.data import LabelledCollection
from quapy.protocol import APP, UPP
from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
    DistributionMatching, _get_divergence
import scipy
from scipy import optimize
from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
from time import time
from sklearn.metrics.pairwise import rbf_kernel


def gram_matrix_mix_sum(bandwidth, X, Y=None, reduce=True):
    # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
    # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are 
    # two "scalar matrices" (h^2) I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)
    variance = 2 * (bandwidth**2)
    nRows,nD = X.shape
    gamma = 1/(2*variance)
    gram = rbf_kernel(X, Y, gamma=gamma)

    norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD)))
    gram *= norm_factor
    if Y is None:
        # ignores the diagonal
        aggr = (2 * np.triu(gram, 1)).sum()
    else:
        aggr = gram.sum()
    return aggr


class KDEyclosed_efficient(AggregativeProbabilisticQuantifier):

    def __init__(self, classifier: BaseEstimator, val_split=0.4, bandwidth=0.1, n_jobs=None, random_state=0):
        self.classifier = classifier
        self.val_split = val_split
        self.bandwidth = bandwidth
        self.n_jobs = n_jobs
        self.random_state=random_state

    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
        """

        :param data: the training set
        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
         to estimate the parameters
        """

        # print('[fit] enter')
        if val_split is None:
            val_split = self.val_split

        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
        )

        assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), \
            'label name gaps not allowed in current implementation'

        n = data.n_classes
        h = self.bandwidth

        P = posteriors
        counts_inv = 1 / (data.counts())

        nD = P.shape[1]
        C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))
        tr_tr_sums = np.zeros(shape=(n,n), dtype=float)
        self.tr_C = []
        for i in range(n):
            for j in range(n):
                if i > j:
                    tr_tr_sums[i,j] = tr_tr_sums[j,i]
                else:
                    if i == j:
                        tr_tr_sums[i, j] = gram_matrix_mix_sum(h, P[y == i])
                        self.tr_C.append(C * sum(y == i))
                    else:
                        block = gram_matrix_mix_sum(h, P[y == i], P[y == j])
                        tr_tr_sums[i, j] = block
        self.tr_C = np.asarray(self.tr_C)
        self.Ptr = posteriors
        self.ytr = y
        self.tr_tr_sums = tr_tr_sums
        self.counts_inv = counts_inv

        return self


    def aggregate(self, posteriors: np.ndarray):

        # print('[aggregate] enter')

        Ptr = self.Ptr
        Pte = posteriors

        K,nD = Pte.shape
        Kinv = (1/K)
        h = self.bandwidth
        n = Ptr.shape[1]
        y = self.ytr
        tr_tr_sums = self.tr_tr_sums

        C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))
        partC = 0.5*np.log(gram_matrix_mix_sum(h, Pte) * Kinv * Kinv + C*Kinv)


        tr_te_sums = np.zeros(shape=n, dtype=float)
        for i in range(n):
            tr_te_sums[i] = gram_matrix_mix_sum(h, Ptr[y==i], Pte) * self.counts_inv[i] * Kinv

        def match(alpha):
            partA = -np.log((alpha * tr_te_sums).sum())
            alpha_l = alpha * self.counts_inv
            partB = 0.5 * np.log((alpha_l[:,np.newaxis] * tr_tr_sums * alpha_l).sum() + (self.tr_C*(alpha_l**2)).sum())
            return partA + partB + partC

        # print('[aggregate] starts search')

        # the initial point is set as the uniform distribution
        uniform_distribution = np.full(fill_value=1 / n, shape=(n,))
        # uniform_distribution = [0.2, 0.8]

        # solutions are bounded to those contained in the unit-simplex
        bounds = tuple((0, 1) for _ in range(n))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        # print('[aggregate] end')
        return r.x
KDE with closed form working 2023-10-13 17:34:26 +02:00			`from cgi import test`
			`import os`
			`import sys`
			`from typing import Union, Callable`
			`import numpy as np`
			`from sklearn.base import BaseEstimator`
			`from sklearn.linear_model import LogisticRegression`
			`import pandas as pd`
			`from sklearn.model_selection import GridSearchCV`
			`from sklearn.neighbors import KernelDensity`
			`from scipy.stats import multivariate_normal`
			`import quapy as qp`
			`from quapy.data import LabelledCollection`
			`from quapy.protocol import APP, UPP`
			`from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \`
			`DistributionMatching, _get_divergence`
			`import scipy`
			`from scipy import optimize`
			`from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional`
			`from time import time`
			`from sklearn.metrics.pairwise import rbf_kernel`


			`def gram_matrix_mix_sum(bandwidth, X, Y=None, reduce=True):`
			`# this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))`
			`# to contain pairwise evaluations of N(x\|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are`
			`# two "scalar matrices" (h^2) I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)`
			`variance = 2 * (bandwidth**2)`
			`nRows,nD = X.shape`
			`gamma = 1/(2*variance)`
			`gram = rbf_kernel(X, Y, gamma=gamma)`

			`norm_factor = 1/np.sqrt(((2np.pi)nD) (variance**(nD)))`
			`gram *= norm_factor`
			`if Y is None:`
			`# ignores the diagonal`
			`aggr = (2 * np.triu(gram, 1)).sum()`
			`else:`
			`aggr = gram.sum()`
			`return aggr`


			`class KDEyclosed_efficient(AggregativeProbabilisticQuantifier):`

			`def __init__(self, classifier: BaseEstimator, val_split=0.4, bandwidth=0.1, n_jobs=None, random_state=0):`
			`self.classifier = classifier`
			`self.val_split = val_split`
			`self.bandwidth = bandwidth`
			`self.n_jobs = n_jobs`
			`self.random_state=random_state`

			`def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):`
			`"""`

			`:param data: the training set`
			`:param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)`
			`:param val_split: either a float in (0,1) indicating the proportion of training instances to use for`
			`validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection`
			`indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV`
			`to estimate the parameters`
			`"""`

			`# print('[fit] enter')`
			`if val_split is None:`
			`val_split = self.val_split`

			`self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(`
			`data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs`
			`)`

			`assert all(sorted(np.unique(y)) == np.arange(data.n_classes)), \`
			`'label name gaps not allowed in current implementation'`

			`n = data.n_classes`
			`h = self.bandwidth`

			`P = posteriors`
			`counts_inv = 1 / (data.counts())`

			`nD = P.shape[1]`
			`C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))`
			`tr_tr_sums = np.zeros(shape=(n,n), dtype=float)`
			`self.tr_C = []`
			`for i in range(n):`
			`for j in range(n):`
			`if i > j:`
			`tr_tr_sums[i,j] = tr_tr_sums[j,i]`
			`else:`
			`if i == j:`
			`tr_tr_sums[i, j] = gram_matrix_mix_sum(h, P[y == i])`
			`self.tr_C.append(C * sum(y == i))`
			`else:`
			`block = gram_matrix_mix_sum(h, P[y == i], P[y == j])`
			`tr_tr_sums[i, j] = block`
			`self.tr_C = np.asarray(self.tr_C)`
			`self.Ptr = posteriors`
			`self.ytr = y`
			`self.tr_tr_sums = tr_tr_sums`
			`self.counts_inv = counts_inv`

			`return self`


			`def aggregate(self, posteriors: np.ndarray):`

			`# print('[aggregate] enter')`

			`Ptr = self.Ptr`
			`Pte = posteriors`

			`K,nD = Pte.shape`
			`Kinv = (1/K)`
			`h = self.bandwidth`
			`n = Ptr.shape[1]`
			`y = self.ytr`
			`tr_tr_sums = self.tr_tr_sums`

			`C = ((2 * np.pi) ** (-nD / 2)) * (self.bandwidth ** (-nD))`
			`partC = 0.5np.log(gram_matrix_mix_sum(h, Pte) Kinv * Kinv + C*Kinv)`


			`tr_te_sums = np.zeros(shape=n, dtype=float)`
			`for i in range(n):`
			`tr_te_sums[i] = gram_matrix_mix_sum(h, Ptr[y==i], Pte) * self.counts_inv[i] * Kinv`

			`def match(alpha):`
			`partA = -np.log((alpha * tr_te_sums).sum())`
			`alpha_l = alpha * self.counts_inv`
			`partB = 0.5 * np.log((alpha_l[:,np.newaxis] * tr_tr_sums * alpha_l).sum() + (self.tr_C(alpha_l*2)).sum())`
			`return partA + partB + partC`

			`# print('[aggregate] starts search')`

			`# the initial point is set as the uniform distribution`
			`uniform_distribution = np.full(fill_value=1 / n, shape=(n,))`
			`# uniform_distribution = [0.2, 0.8]`

			`# solutions are bounded to those contained in the unit-simplex`
			`bounds = tuple((0, 1) for _ in range(n)) # values in [0,1]`
			`constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1`
			`r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)`
			`# print('[aggregate] end')`
			`return r.x`