From bf56a2d85e319fe8aa437497721f3365677aa26f Mon Sep 17 00:00:00 2001
From: alessandratosi <alessandra.tosi@eng.ox.ac.uk>
Date: Tue, 3 May 2016 15:16:01 +0100
Subject: [PATCH] fixed covariance computation in predict_jacobian

---
 GPy/core/gp.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 1c615cde..fc37ab47 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -335,8 +335,7 @@ class GP(Model):
         dv_dX += kern.gradients_X(alpha, Xnew, self._predictive_variable)
         return mean_jac, dv_dX
 
-
-    def predict_jacobian(self, Xnew, kern=None, full_cov=True):
+    def predict_jacobian(self, Xnew, kern=None, full_cov=False):
         """
         Compute the derivatives of the posterior of the GP.
 
@@ -354,15 +353,11 @@ class GP(Model):
         :param X: The points at which to get the predictive gradients.
         :type X: np.ndarray (Xnew x self.input_dim)
         :param kern: The kernel to compute the jacobian for.
-        :param boolean full_cov: whether to return the full covariance of the jacobian.
+        :param boolean full_cov: whether to return the cross-covariance terms between 
+        the N* Jacobian vectors
 
         :returns: dmu_dX, dv_dX
         :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
-
-        Note: We always return sum in input_dim gradients, as the off-diagonals
-        in the input_dim are not needed for further calculations.
-        This is a compromise for increase in speed. Mathematically the jacobian would
-        have another dimension in Q.
         """
         if kern is None:
             kern = self.kern
@@ -378,27 +373,28 @@ class GP(Model):
             dK_dXnew_full[i] = kern.gradients_X(one, Xnew, self._predictive_variable[[i]])
 
         if full_cov:
-            dK2_dXdX = kern.gradients_XX(one, Xnew, cov=False)
+            dK2_dXdX = kern.gradients_XX(one, Xnew)
         else:
-            dK2_dXdX = kern.gradients_XX_diag(one, Xnew, cov=False)
+            dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
+            for i in range(Xnew.shape[0]):
+                dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])     
 
         def compute_cov_inner(wi):
             if full_cov:
-                # full covariance gradients:
-                var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+                var_jac = dK2_dXdX - np.einsum('qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) # n,s = Xnew.shape[0], m = pred_var.shape[0]
             else:
-                var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+                var_jac = dK2_dXdX - np.einsum('qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
             return var_jac
 
         if self.posterior.woodbury_inv.ndim == 3: # Missing data:
             if full_cov:
-                var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
+                var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
+                for d in range(self.posterior.woodbury_inv.shape[2]):
+                    var_jac[:, :, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
+            else:
+                var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
                 for d in range(self.posterior.woodbury_inv.shape[2]):
                     var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
-            else:
-                var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
-                for d in range(self.posterior.woodbury_inv.shape[2]):
-                    var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
         else:
             var_jac = compute_cov_inner(self.posterior.woodbury_inv)
         return mean_jac, var_jac
@@ -422,10 +418,11 @@ class GP(Model):
         mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
         mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
         Sigma = np.zeros(mumuT.shape)
-        if var_jac.ndim == 3:
+        if var_jac.ndim == 4: # Missing data
             Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
         else:
-            Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
+            Sigma = self.output_dim*var_jac
+        
         G = 0.
         if mean:
             G += mumuT