From 0c6e3bc88f325280af5bcfa01bc83564afe1b113 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 8 Jun 2016 13:45:32 +0100
Subject: [PATCH] [grads x] diagonal entries fixed and add kernel adjusted

---
 GPy/kern/src/linear.py     | 38 ++++++++++++++++++++++++++++----------
 GPy/kern/src/rbf.py        |  2 ++
 GPy/kern/src/static.py     | 18 ++++++------------
 GPy/kern/src/stationary.py | 10 ++++++++--
 4 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/GPy/kern/src/linear.py b/GPy/kern/src/linear.py
index 9d9d5933..e7089fe1 100644
--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@@ -101,22 +101,40 @@ class Linear(Kern):
             #return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
             return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
 
-    def gradients_XX(self, dL_dK, X, X2=None, cov=True):
-        #if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
+    def gradients_XX(self, dL_dK, X, X2=None):
+        """
+        Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
+
+        returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
+        the returned array is of shape [NxNxQxQ].
+
+        ..math:
+            \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
+
+        ..returns:
+            dL2_dXdX2:  [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
+                        Thus, we return the second derivative in X2.
+        """
         if X2 is None:
-            return 2*self.variances
-        else:
-            return self.variances
+            X2 = X
+        return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
+        #if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
+        #if X2 is None:
+        #    return np.ones(np.repeat(X.shape, 2)) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
+        #else:
+        #    return np.ones((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
 
 
     def gradients_X_diag(self, dL_dKdiag, X):
         return 2.*self.variances*dL_dKdiag[:,None]*X
 
-    def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
-        dims = X.shape
-        if cov:
-            dims += (X.shape[1],)
-        return 2*np.ones(dims)*self.variances
+    def gradients_XX_diag(self, dL_dKdiag, X):
+        return np.zeros((X.shape[0], X.shape[1], X.shape[1]))
+
+        #dims = X.shape
+        #if cov:
+        #    dims += (X.shape[1],)
+        #return 2*np.ones(dims)*self.variances
 
     def input_sensitivity(self, summarize=True):
         return np.ones(self.input_dim) * self.variances
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index ff86561d..7a15abe8 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -39,6 +39,8 @@ class RBF(Stationary):
     def dK2_drdr(self, r):
         return (r**2-1)*self.K_of_r(r)
 
+    def dK2_drdr_diag(self):
+        return -self.variance # as the diagonal of r is always filled with zeros
     def __getstate__(self):
         dc = super(RBF, self).__getstate__()
         if self.useGPU:
diff --git a/GPy/kern/src/static.py b/GPy/kern/src/static.py
index 995f3b5e..5cf4a1c9 100644
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@@ -25,18 +25,13 @@ class Static(Kern):
     def gradients_X_diag(self, dL_dKdiag, X):
         return np.zeros(X.shape)
 
-    def gradients_XX(self, dL_dK, X, X2=None, cov=True):
+    def gradients_XX(self, dL_dK, X, X2=None):
         if X2 is None:
             X2 = X
-        if cov:
-            return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
-        else:
-            return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
+        return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
+
     def gradients_XX_diag(self, dL_dKdiag, X, cov=False):
-        if cov:
-            return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
-        else:
-            return np.zeros(X.shape, dtype=np.float64)
+        return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
 
     def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
         return np.zeros(Z.shape)
@@ -195,7 +190,7 @@ class Fixed(Static):
 
     def update_gradients_diag(self, dL_dKdiag, X):
         self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
-    
+
     def psi2(self, Z, variational_posterior):
         return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
 
@@ -259,5 +254,4 @@ class Precomputed(Fixed):
 
     def update_gradients_diag(self, dL_dKdiag, X):
         self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))
-        
-        
\ No newline at end of file
+
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 141a1347..3bf75a4b 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -85,6 +85,11 @@ class Stationary(Kern):
     def dK2_drdr(self, r):
         raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
 
+    @Cache_this(limit=3, ignore_args=())
+    def dK2_drdr_diag(self):
+        "Second order derivative of K in r_{i,i}. The diagonal entries are always zero, so we do not give it here."
+        raise NotImplementedError("implement second derivative of covariance wrt r_diag to use this method")
+
     @Cache_this(limit=3, ignore_args=())
     def K(self, X, X2=None):
         """
@@ -253,7 +258,8 @@ class Stationary(Kern):
         dist = X[:,None,:] - X2[None,:,:]
         dist = (dist[:,:,:,None]*dist[:,:,None,:])
         I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
-        grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
+        grad = (((dL_dK*(tmp1*invdist2 - tmp2))[:,:,None,None] * dist)/l2[None,None,:,None]
+                - (dL_dK*tmp1)[:,:,None,None] * I)/l2[None,None,None,:]
         return grad
 
     def gradients_XX_diag(self, dL_dK_diag, X):
@@ -270,7 +276,7 @@ class Stationary(Kern):
         assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
 
         l4 =  np.ones(X.shape[1])*self.lengthscale**2
-        return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
+        return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
         #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
 
     def _gradients_X_pure(self, dL_dK, X, X2=None):