changes to rbf and white to allow new parameter gradient structure

2026-05-08 03:22:38 +02:00 · 2014-01-24 14:06:16 +00:00 · 2014-01-24 14:06:16 +00:00 · 7b5e8a9ffc
commit 7b5e8a9ffc
parent ad5a967b39
6 changed files with 43 additions and 46 deletions
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@ -24,7 +24,7 @@ class ExactGaussianInference(object):
        """
        find a matrix L which satisfies LL^T = YY^T.
-        Note that L may have fewer columns than Y, else L=Y. 
+        Note that L may have fewer columns than Y, else L=Y.
        """
        N, D = Y.shape
        if (N>D):
@ -33,22 +33,26 @@ class ExactGaussianInference(object):
            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
            raise NotImplementedError, 'TODO' #TODO
-    def inference(self, K, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        """
        YYT_factor = self.get_YYTfactor(Y)
        K = kern.K(X)
        Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
        alpha, _ = dpotrs(LW, YYT_factor, lower=1)
        dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
        log_marginal =  0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
-        dL_dtheta_lik = likelihood._gradients(np.diag(dL_dK))
+        dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
-        return Posterior(log_marginal, dL_dK, dL_dtheta_lik, LW, alpha, K)
+        kern.update_gradients_full(dL_dK)
        likelihood.update_gradients(np.diag(dL_dK))
        return Posterior(log_marginal, dL_dK, LW, alpha, K)
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -17,7 +17,7 @@ from ..util.linalg import mdot, jitchol, pddet, dpotrs
 from functools import partial as partial_func
 import warnings
-class Laplace(likelihood):
+class LaplaceInference(object):
    """Laplace approximation to a posterior"""
    def __init__(self, data, noise_model, extra_data=None):
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -14,7 +14,7 @@ class Posterior(object):
    schemes and the model classes. 
    """
-    def __init__(self, log_marginal, dL_dK, dL_dtheta_lik, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
+    def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
        """
        log_marginal: log p(Y|X)
        dL_dK: d/dK log p(Y|X)
@ -56,7 +56,7 @@ class Posterior(object):
        if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
            pass # we have sufficient to compute the posterior
        else:
-            raise ValueError, "insufficient onformation to compute the posterior"
+            raise ValueError, "insufficient information to compute the posterior"
        #option 1:
        self._woodbury_chol = woodbury_chol
--- a/GPy/kern/parts/rbf.py
+++ b/GPy/kern/parts/rbf.py
@ -80,6 +80,9 @@ class RBF(Kernpart):
        self._X, self._X2 = np.empty(shape=(2, 1))
        self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
    def K(self, X, X2, target):
        self._K_computations(X, X2)
        target += self.variance * self._K_dvar
@ -87,6 +90,20 @@ class RBF(Kernpart):
    def Kdiag(self, X, target):
        np.add(target, self.variance, target)
    def psi0(self, Z, mu, S, target):
        target += self.variance
    def psi1(self, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        target += self._psi1
    def psi2(self, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        target += self._psi2
    def update_gradients_full(self, dL_dK, X):
        self._K_computations(X, X2)
        self.variance.gradient = np.sum(self._K_dvar * dL_dK)
@ -150,9 +167,7 @@ class RBF(Kernpart):
        else:
            self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
-
+    def _gradients_X(self, dL_dK, X, X2, target):
    def dK_dX(self, dL_dK, X, X2, target):
        #if self._X is None or X.base is not self._X.base or X2 is not None:
        self._K_computations(X, X2)
        if X2 is None:
@ -165,22 +180,13 @@ class RBF(Kernpart):
    def dKdiag_dX(self, dL_dKdiag, X, target):
        pass
    #---------------------------------------#
    #             PSI statistics            #
    #---------------------------------------#
    def psi0(self, Z, mu, S, target):
        target += self.variance
    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
        pass
    def psi1(self, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        target += self._psi1
    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        denominator = (self.lengthscale2 * (self._psi1_denom))
@ -193,10 +199,6 @@ class RBF(Kernpart):
        target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
        target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
    def psi2(self, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        target += self._psi2
    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
        self._psi_computations(Z, mu, S)
        term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
@ -283,6 +285,7 @@ class RBF(Kernpart):
            num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
            X, X2 = param_to_array(X, X2)
            weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        return target
--- a/GPy/kern/parts/white.py
+++ b/GPy/kern/parts/white.py
@ -19,18 +19,7 @@ class White(Kernpart):
        self.input_dim = input_dim
        self.variance = Param('variance', variance)
        self.add_parameters(self.variance)
 #         self._set_params(np.array([variance]).flatten())
        self._psi1 = 0 # TODO: more elegance here
 #     def _get_params(self):
 #         return self.variance
 #  
 #     def _set_params(self,x):
 #         assert x.shape==(1,)
 #         self.variance = x
 #  
 #     def _get_param_names(self):
 #         return ['variance']
    def K(self,X,X2,target):
        if X2 is None:
@ -39,14 +28,19 @@ class White(Kernpart):
    def Kdiag(self,X,target):
        target += self.variance
-    def dK_dtheta(self,dL_dK,X,X2,target):
+    def update_gradients_full(self, dL_dK, X):
-        if X2 is None:
+        self.variance.gradient = np.trace(dL_dK)
-            target += np.trace(dL_dK)
+
    def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
        raise NotImplementedError
    def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
        raise NotImplementedError
    def dKdiag_dtheta(self,dL_dKdiag,X,target):
        target += np.sum(dL_dKdiag)
-    def dK_dX(self,dL_dK,X,X2,target):        
+    def gradients_X(self,dL_dK,X,X2,target):
        pass
    def dKdiag_dX(self,dL_dKdiag,X,target):
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -52,12 +52,8 @@ class Gaussian(Likelihood):
    def covariance_matrix(self, Y, Y_metadata=None):
        return np.eye(Y.shape[0]) * self.variance
-    def _gradients(self, partial):
+    def set_gradients(self, partial):
-        """
+        self.variance.gradient = np.sum(partial)
        Return the derivative of the log marginal likelihood wrt self.variance,
        given the appropriate partial derivative
        """
        return np.sum(partial)
    def _preprocess_values(self, Y):
        """