From aa116517cf3b95c3ac6848af8baa6122a8d24ab3 Mon Sep 17 00:00:00 2001
From: Mark Pullin <marpulli@amazon.com>
Date: Tue, 6 Feb 2018 20:51:59 +0000
Subject: [PATCH] Make predictive_gradients more efficient

---
 GPy/core/gp.py | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index d6f42c1c..8041751b 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -376,13 +376,16 @@ class GP(Model):
 
     def predictive_gradients(self, Xnew, kern=None):
         """
-        Compute the derivatives of the predicted latent function with respect to X*
+        Compute the derivatives of the predicted latent function with respect
+        to X*
 
         Given a set of points at which to predict X* (size [N*,Q]), compute the
         derivatives of the mean and variance. Resulting arrays are sized:
-         dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
+            dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
+            (usually one).
 
-        Note that this is not the same as computing the mean and variance of the derivative of the function!
+        Note that this is not the same as computing the mean and variance of
+        the derivative of the function!
 
          dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
         :param X: The points at which to get the predictive gradients
@@ -393,25 +396,32 @@ class GP(Model):
         """
         if kern is None:
             kern = self.kern
-        mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
+        mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))
 
         for i in range(self.output_dim):
-            mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
+            mean_jac[:, :, i] = kern.gradients_X(
+                self.posterior.woodbury_vector[:, i:i+1].T, Xnew,
+                self._predictive_variable)
 
-        # gradients wrt the diagonal part k_{xx}
-        dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
-        #grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
+        # Gradients wrt the diagonal part k_{xx}
+        dv_dX = kern.gradients_X_diag(np.ones(Xnew.shape[0]), Xnew)
+
+        # Grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
         if self.posterior.woodbury_inv.ndim == 3:
-            tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
-            tmp[:] = dv_dX[:,:,None]
+            var_jac = np.empty(dv_dX.shape +
+                               (self.posterior.woodbury_inv.shape[2],))
+            var_jac[:] = dv_dX[:, :, None]
             for i in range(self.posterior.woodbury_inv.shape[2]):
-                alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
-                tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
+                alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
+                                   self.posterior.woodbury_inv[:, :, i])
+                var_jac[:, :, i] += kern.gradients_X(alpha, Xnew,
+                                                     self._predictive_variable)
         else:
-            tmp = dv_dX
-            alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
-            tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)
-        return mean_jac, tmp
+            var_jac = dv_dX
+            alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
+                               self.posterior.woodbury_inv)
+            var_jac += kern.gradients_X(alpha, Xnew, self._predictive_variable)
+        return mean_jac, var_jac
 
     def predict_jacobian(self, Xnew, kern=None, full_cov=False):
         """