[grads x] diagonal entries fixed and add kernel adjusted

2026-06-11 15:15:15 +02:00 · 2016-06-08 13:45:32 +01:00 · 2016-06-08 13:45:32 +01:00 · 0c6e3bc88f
commit 0c6e3bc88f
parent 787168a394
4 changed files with 44 additions and 24 deletions
--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@ -101,22 +101,40 @@ class Linear(Kern):
            #return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
            return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)

-    def gradients_XX(self, dL_dK, X, X2=None, cov=True):
-        #if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
+    def gradients_XX(self, dL_dK, X, X2=None):
+        """
+        Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
+
+        returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
+        the returned array is of shape [NxNxQxQ].
+
+        ..math:
+            \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
+
+        ..returns:
+            dL2_dXdX2:  [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
+                        Thus, we return the second derivative in X2.
+        """
        if X2 is None:
-            return 2*self.variances
-        else:
-            return self.variances
+            X2 = X
+        return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
+        #if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
+        #if X2 is None:
+        #    return np.ones(np.repeat(X.shape, 2)) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
+        #else:
+        #    return np.ones((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]


    def gradients_X_diag(self, dL_dKdiag, X):
        return 2.*self.variances*dL_dKdiag[:,None]*X

-    def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
-        dims = X.shape
-        if cov:
-            dims += (X.shape[1],)
-        return 2*np.ones(dims)*self.variances
+    def gradients_XX_diag(self, dL_dKdiag, X):
+        return np.zeros((X.shape[0], X.shape[1], X.shape[1]))
+
+        #dims = X.shape
+        #if cov:
+        #    dims += (X.shape[1],)
+        #return 2*np.ones(dims)*self.variances

    def input_sensitivity(self, summarize=True):
        return np.ones(self.input_dim) * self.variances
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@ -39,6 +39,8 @@ class RBF(Stationary):
    def dK2_drdr(self, r):
        return (r**2-1)*self.K_of_r(r)

+    def dK2_drdr_diag(self):
+        return -self.variance # as the diagonal of r is always filled with zeros
    def __getstate__(self):
        dc = super(RBF, self).__getstate__()
        if self.useGPU:
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@ -25,18 +25,13 @@ class Static(Kern):
    def gradients_X_diag(self, dL_dKdiag, X):
        return np.zeros(X.shape)

-    def gradients_XX(self, dL_dK, X, X2=None, cov=True):
+    def gradients_XX(self, dL_dK, X, X2=None):
        if X2 is None:
            X2 = X
-        if cov:
-            return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
-        else:
-            return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
+        return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
+
    def gradients_XX_diag(self, dL_dKdiag, X, cov=False):
-        if cov:
-            return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
-        else:
-            return np.zeros(X.shape, dtype=np.float64)
+        return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)

    def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        return np.zeros(Z.shape)
@ -195,7 +190,7 @@ class Fixed(Static):

    def update_gradients_diag(self, dL_dKdiag, X):
        self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
-    
+
    def psi2(self, Z, variational_posterior):
        return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)

@ -259,5 +254,4 @@ class Precomputed(Fixed):

    def update_gradients_diag(self, dL_dKdiag, X):
        self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))
-        
-        
+
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -85,6 +85,11 @@ class Stationary(Kern):
    def dK2_drdr(self, r):
        raise NotImplementedError("implement second derivative of covariance wrt r to use this method")

+    @Cache_this(limit=3, ignore_args=())
+    def dK2_drdr_diag(self):
+        "Second order derivative of K in r_{i,i}. The diagonal entries are always zero, so we do not give it here."
+        raise NotImplementedError("implement second derivative of covariance wrt r_diag to use this method")
+
    @Cache_this(limit=3, ignore_args=())
    def K(self, X, X2=None):
        """
@ -253,7 +258,8 @@ class Stationary(Kern):
        dist = X[:,None,:] - X2[None,:,:]
        dist = (dist[:,:,:,None]*dist[:,:,None,:])
        I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
-        grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
+        grad = (((dL_dK*(tmp1*invdist2 - tmp2))[:,:,None,None] * dist)/l2[None,None,:,None]
+                - (dL_dK*tmp1)[:,:,None,None] * I)/l2[None,None,None,:]
        return grad

    def gradients_XX_diag(self, dL_dK_diag, X):
@ -270,7 +276,7 @@ class Stationary(Kern):
        assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"

        l4 =  np.ones(X.shape[1])*self.lengthscale**2
-        return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
+        return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
        #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)

    def _gradients_X_pure(self, dL_dK, X, X2=None):