diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index ff5d49d3..531137ae 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -88,6 +88,8 @@ class Kern(Parameterized): return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2] def gradients_X(self, dL_dK, X, X2): raise NotImplementedError + def gradients_X_X2(self, dL_dK, X, X2): + return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X) def gradients_XX(self, dL_dK, X, X2): raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel") def gradients_XX_diag(self, dL_dKdiag, X): diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 8c06d0c0..f274eef3 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -19,6 +19,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'gradients_X', _slice_gradients_X) + put_clean(dct, 'gradients_X_X2', _slice_gradients_X) put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py index b65fb2e0..c495b77b 100644 --- a/GPy/kern/_src/mlp.py +++ b/GPy/kern/_src/mlp.py @@ -5,6 +5,7 @@ from .kern import Kern from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp import numpy as np +from ...util.linalg import tdot from ...util.caching import Cache_this four_over_tau = 2./np.pi @@ -40,6 +41,7 @@ class MLP(Kern): self.link_parameters(self.variance, self.weight_variance, self.bias_variance) + @Cache_this(limit=20, ignore_args=()) def K(self, X, X2=None): if X2 is None: X_denom = np.sqrt(self._comp_prod(X)+1.) @@ -51,6 +53,7 @@ class MLP(Kern): XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:] return self.variance*four_over_tau*np.arcsin(XTX) + @Cache_this(limit=20, ignore_args=()) def Kdiag(self, X): """Compute the diagonal of the covariance matrix for X.""" X_prod = self._comp_prod(X) @@ -73,6 +76,10 @@ class MLP(Kern): """Derivative of the covariance matrix with respect to X""" return self._comp_grads(dL_dK, X, X2)[3] + def gradients_X_X2(self, dL_dK, X, X2): + """Derivative of the covariance matrix with respect to X""" + return self._comp_grads(dL_dK, X, X2)[3:] + def gradients_X_diag(self, dL_dKdiag, X): """Gradient of diagonal of covariance with respect to X""" return self._comp_grads_diag(dL_dKdiag, X)[3] diff --git a/GPy/kern/_src/psi_comp/gaussherm.py b/GPy/kern/_src/psi_comp/gaussherm.py index afbca545..8e54e6a0 100644 --- a/GPy/kern/_src/psi_comp/gaussherm.py +++ b/GPy/kern/_src/psi_comp/gaussherm.py @@ -80,8 +80,9 @@ class PSICOMP_GH(PSICOMP): dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i] kern.update_gradients_full(dL_dkfu, X, Z) dtheta += kern.gradient - dX += kern.gradients_X(dL_dkfu, X, Z) - dZ += kern.gradients_X(dL_dkfu.T, Z, X) + dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z) + dX += dX_i + dZ += dZ_i dmu += dX dS += dX*self.locs[i]/(2.*S_sq) kern.gradient[:] = dtheta_old