From bf56a2d85e319fe8aa437497721f3365677aa26f Mon Sep 17 00:00:00 2001 From: alessandratosi Date: Tue, 3 May 2016 15:16:01 +0100 Subject: [PATCH] fixed covariance computation in predict_jacobian --- GPy/core/gp.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 1c615cde..fc37ab47 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -335,8 +335,7 @@ class GP(Model): dv_dX += kern.gradients_X(alpha, Xnew, self._predictive_variable) return mean_jac, dv_dX - - def predict_jacobian(self, Xnew, kern=None, full_cov=True): + def predict_jacobian(self, Xnew, kern=None, full_cov=False): """ Compute the derivatives of the posterior of the GP. @@ -354,15 +353,11 @@ class GP(Model): :param X: The points at which to get the predictive gradients. :type X: np.ndarray (Xnew x self.input_dim) :param kern: The kernel to compute the jacobian for. - :param boolean full_cov: whether to return the full covariance of the jacobian. + :param boolean full_cov: whether to return the cross-covariance terms between + the N* Jacobian vectors :returns: dmu_dX, dv_dX :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ] - - Note: We always return sum in input_dim gradients, as the off-diagonals - in the input_dim are not needed for further calculations. - This is a compromise for increase in speed. Mathematically the jacobian would - have another dimension in Q. """ if kern is None: kern = self.kern @@ -378,27 +373,28 @@ class GP(Model): dK_dXnew_full[i] = kern.gradients_X(one, Xnew, self._predictive_variable[[i]]) if full_cov: - dK2_dXdX = kern.gradients_XX(one, Xnew, cov=False) + dK2_dXdX = kern.gradients_XX(one, Xnew) else: - dK2_dXdX = kern.gradients_XX_diag(one, Xnew, cov=False) + dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1])) + for i in range(Xnew.shape[0]): + dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:]) def compute_cov_inner(wi): if full_cov: - # full covariance gradients: - var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + var_jac = dK2_dXdX - np.einsum('qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) # n,s = Xnew.shape[0], m = pred_var.shape[0] else: - var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + var_jac = dK2_dXdX - np.einsum('qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) return var_jac if self.posterior.woodbury_inv.ndim == 3: # Missing data: if full_cov: - var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim)) + var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim)) + for d in range(self.posterior.woodbury_inv.shape[2]): + var_jac[:, :, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d]) + else: + var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim)) for d in range(self.posterior.woodbury_inv.shape[2]): var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d]) - else: - var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) - for d in range(self.posterior.woodbury_inv.shape[2]): - var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d]) else: var_jac = compute_cov_inner(self.posterior.woodbury_inv) return mean_jac, var_jac @@ -422,10 +418,11 @@ class GP(Model): mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False) mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac) Sigma = np.zeros(mumuT.shape) - if var_jac.ndim == 3: + if var_jac.ndim == 4: # Missing data Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1) else: - Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac + Sigma = self.output_dim*var_jac + G = 0. if mean: G += mumuT