From 5d19039d90de84c4392e741c687a1b7772ca4eb4 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 4 May 2016 09:11:04 +0100
Subject: [PATCH] [gradients xx] getting there

---
 GPy/core/gp.py                          |  2 +-
 GPy/kern/src/kernel_slice_operations.py |  4 +-
 GPy/kern/src/stationary.py              | 97 +++++++++++++------------
 3 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 1c615cde..dbe66698 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -449,7 +449,7 @@ class GP(Model):
         :param bool covariance: whether to include the covariance of the wishart embedding.
         :param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
         """
-        G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
+        G = self.predict_wishart_embedding(Xnew, kern, mean, covariance)
         if dimensions is None:
             dimensions = self.get_most_significant_input_dimensions()[:2]
         G = G[:, dimensions][:,:,dimensions]
diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py
index 315f5437..3f9f6ff3 100644
--- a/GPy/kern/src/kernel_slice_operations.py
+++ b/GPy/kern/src/kernel_slice_operations.py
@@ -70,11 +70,11 @@ class _Slice_wrap(object):
                 ret[:, self.k._all_dims_active] = return_val
             elif len(self.shape) == 3: # derivative for X2!=None
                 if self.diag:
-                    ret[:, :, self.k._all_dims_active][:, self.k._all_dims_active] = return_val
+                    ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
                 else:
                     ret[:, :, self.k._all_dims_active] = return_val
             elif len(self.shape) == 4: # second order derivative
-                ret[:, :, self.k._all_dims_active][:, :, :, self.k._all_dims_active] = return_val
+                ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
             return ret
         return return_val
 
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 34256d95..2c93bd68 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -223,7 +223,7 @@ class Stationary(Kern):
         Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
 
         cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
-        cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair 
+        cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
                     or vectors (computationally more efficient if the full covariance matrix is not needed)
         ..math:
             \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
@@ -247,20 +247,21 @@ class Stationary(Kern):
             X2 = X
             tmp1 -= np.eye(X.shape[0])*self.variance
         else:
-            #tmp1[X==X2.T] -= self.variance # Old version, to be removed 
+            #tmp1[X==X2.T] -= self.variance # Old version, to be removed
                                             # (seems to have a bug: it is subtracted to the first X1 anyway)
             tmp1[invdist2==0.] -= self.variance
-        
+
         if cov: # full covariance
             grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
             for q in range(self.input_dim):
+                tmpdist = (X[:,[q]]-X2[:,[q]].T)
                 for r in range(self.input_dim):
-                    tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
+                    tmpdist2 = tmpdist*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
                     if r==q:
                         grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q])
                     else:
                         grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q])
-        else: 
+        else:
             # Diagonal covariance, old code
             grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
             #grad = np.empty(X.shape, dtype=np.float64)
@@ -336,18 +337,18 @@ class Exponential(Stationary):
     def dK_dr(self, r):
         return -self.K_of_r(r)
 
-#    def sde(self): 
-#        """ 
-#        Return the state space representation of the covariance. 
-#        """ 
-#        F  = np.array([[-1/self.lengthscale]]) 
-#        L  = np.array([[1]]) 
-#        Qc = np.array([[2*self.variance/self.lengthscale]]) 
-#        H = np.array([[1]]) 
-#        Pinf = np.array([[self.variance]]) 
-#        # TODO: return the derivatives as well 
-#        
-#        return (F, L, Qc, H, Pinf)  
+#    def sde(self):
+#        """
+#        Return the state space representation of the covariance.
+#        """
+#        F  = np.array([[-1/self.lengthscale]])
+#        L  = np.array([[1]])
+#        Qc = np.array([[2*self.variance/self.lengthscale]])
+#        H = np.array([[1]])
+#        Pinf = np.array([[self.variance]])
+#        # TODO: return the derivatives as well
+#
+#        return (F, L, Qc, H, Pinf)
 
 
 
@@ -416,41 +417,41 @@ class Matern32(Stationary):
         F1lower = np.array([f(lower) for f in F1])[:, None]
         return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
 
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
+    def sde(self):
+        """
+        Return the state space representation of the covariance.
+        """
         variance = float(self.variance.values)
         lengthscale = float(self.lengthscale.values)
-        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
-        L    = np.array([[0], [1]]) 
-        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) 
-        H    = np.array([[1, 0]]) 
-        Pinf = np.array([[variance, 0],  
-        [0,              3.*variance/(lengthscale**2)]]) 
-        # Allocate space for the derivatives 
+        foo  = np.sqrt(3.)/lengthscale
+        F    = np.array([[0, 1], [-foo**2, -2*foo]])
+        L    = np.array([[0], [1]])
+        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
+        H    = np.array([[1, 0]])
+        Pinf = np.array([[variance, 0],
+        [0,              3.*variance/(lengthscale**2)]])
+        # Allocate space for the derivatives
         dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        # The partial derivatives 
-        dFvariance       = np.zeros([2,2]) 
-        dFlengthscale    = np.array([[0,0], 
-        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) 
-        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3]) 
-        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) 
-        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]]) 
-        dPinflengthscale = np.array([[0,0], 
-        [0,-6*variance/lengthscale**3]]) 
-        # Combine the derivatives 
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinfvariance 
-        dPinf[:,:,1] = dPinflengthscale 
+        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2])
+        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
+        # The partial derivatives
+        dFvariance       = np.zeros([2,2])
+        dFlengthscale    = np.array([[0,0],
+        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
+        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3])
+        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
+        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]])
+        dPinflengthscale = np.array([[0,0],
+        [0,-6*variance/lengthscale**3]])
+        # Combine the derivatives
+        dF[:,:,0]    = dFvariance
+        dF[:,:,1]    = dFlengthscale
+        dQc[:,:,0]   = dQcvariance
+        dQc[:,:,1]   = dQclengthscale
+        dPinf[:,:,0] = dPinfvariance
+        dPinf[:,:,1] = dPinflengthscale
 
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
+        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
 
 class Matern52(Stationary):
     """