Merge branch 'gradientsxx' of github.com:SheffieldML/GPy into gradientsxx

2026-04-30 07:16:23 +02:00 · 2016-05-04 12:26:19 +01:00 · 2016-05-04 12:26:19 +01:00 · da30663228
commit da30663228
parent 5d19039d90 bf56a2d85e
1 changed files with 17 additions and 20 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -335,8 +335,7 @@ class GP(Model):
        dv_dX += kern.gradients_X(alpha, Xnew, self._predictive_variable)
        return mean_jac, dv_dX

-
-    def predict_jacobian(self, Xnew, kern=None, full_cov=True):
+    def predict_jacobian(self, Xnew, kern=None, full_cov=False):
        """
        Compute the derivatives of the posterior of the GP.

@ -354,15 +353,11 @@ class GP(Model):
        :param X: The points at which to get the predictive gradients.
        :type X: np.ndarray (Xnew x self.input_dim)
        :param kern: The kernel to compute the jacobian for.
-        :param boolean full_cov: whether to return the full covariance of the jacobian.
+        :param boolean full_cov: whether to return the cross-covariance terms between 
+        the N* Jacobian vectors

        :returns: dmu_dX, dv_dX
        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
-
-        Note: We always return sum in input_dim gradients, as the off-diagonals
-        in the input_dim are not needed for further calculations.
-        This is a compromise for increase in speed. Mathematically the jacobian would
-        have another dimension in Q.
        """
        if kern is None:
            kern = self.kern
@ -378,27 +373,28 @@ class GP(Model):
            dK_dXnew_full[i] = kern.gradients_X(one, Xnew, self._predictive_variable[[i]])

        if full_cov:
-            dK2_dXdX = kern.gradients_XX(one, Xnew, cov=False)
+            dK2_dXdX = kern.gradients_XX(one, Xnew)
        else:
-            dK2_dXdX = kern.gradients_XX_diag(one, Xnew, cov=False)
+            dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
+            for i in range(Xnew.shape[0]):
+                dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])     

        def compute_cov_inner(wi):
            if full_cov:
-                # full covariance gradients:
-                var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+                var_jac = dK2_dXdX - np.einsum('qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) # n,s = Xnew.shape[0], m = pred_var.shape[0]
            else:
-                var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+                var_jac = dK2_dXdX - np.einsum('qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
            return var_jac

        if self.posterior.woodbury_inv.ndim == 3: # Missing data:
            if full_cov:
-                var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
+                var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
+                for d in range(self.posterior.woodbury_inv.shape[2]):
+                    var_jac[:, :, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
+            else:
+                var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
                for d in range(self.posterior.woodbury_inv.shape[2]):
                    var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
-            else:
-                var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
-                for d in range(self.posterior.woodbury_inv.shape[2]):
-                    var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
        else:
            var_jac = compute_cov_inner(self.posterior.woodbury_inv)
        return mean_jac, var_jac
@ -422,10 +418,11 @@ class GP(Model):
        mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
        mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
        Sigma = np.zeros(mumuT.shape)
-        if var_jac.ndim == 3:
+        if var_jac.ndim == 4: # Missing data
            Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
        else:
-            Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
+            Sigma = self.output_dim*var_jac
+        
        G = 0.
        if mean:
            G += mumuT