Make predictive_gradients more efficient

This commit is contained in:
Mark Pullin 2018-02-06 20:51:59 +00:00
parent a6c154753d
commit aa116517cf

View file

@ -376,13 +376,16 @@ class GP(Model):
def predictive_gradients(self, Xnew, kern=None): def predictive_gradients(self, Xnew, kern=None):
""" """
Compute the derivatives of the predicted latent function with respect to X* Compute the derivatives of the predicted latent function with respect
to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized: derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
(usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function! Note that this is not the same as computing the mean and variance of
the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance) dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients :param X: The points at which to get the predictive gradients
@ -393,25 +396,32 @@ class GP(Model):
""" """
if kern is None: if kern is None:
kern = self.kern kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))
for i in range(self.output_dim): for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable) mean_jac[:, :, i] = kern.gradients_X(
self.posterior.woodbury_vector[:, i:i+1].T, Xnew,
self._predictive_variable)
# gradients wrt the diagonal part k_{xx} # Gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew) dv_dX = kern.gradients_X_diag(np.ones(Xnew.shape[0]), Xnew)
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
# Grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
if self.posterior.woodbury_inv.ndim == 3: if self.posterior.woodbury_inv.ndim == 3:
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],)) var_jac = np.empty(dv_dX.shape +
tmp[:] = dv_dX[:,:,None] (self.posterior.woodbury_inv.shape[2],))
var_jac[:] = dv_dX[:, :, None]
for i in range(self.posterior.woodbury_inv.shape[2]): for i in range(self.posterior.woodbury_inv.shape[2]):
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i]) alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable) self.posterior.woodbury_inv[:, :, i])
var_jac[:, :, i] += kern.gradients_X(alpha, Xnew,
self._predictive_variable)
else: else:
tmp = dv_dX var_jac = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv) alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable) self.posterior.woodbury_inv)
return mean_jac, tmp var_jac += kern.gradients_X(alpha, Xnew, self._predictive_variable)
return mean_jac, var_jac
def predict_jacobian(self, Xnew, kern=None, full_cov=False): def predict_jacobian(self, Xnew, kern=None, full_cov=False):
""" """