diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index ec09f157..2ad515dd 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -85,13 +85,20 @@ class Add(CombinationKernel): [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] return target - def gradients_XX(self, dL_dK, X, X2): - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) - else: - target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) - [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] - return target + # def gradients_XX(self, dL_dK, X, X2, cov=True): + # if cov==True: # full covarance + # if X2 is None: + # target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1])) + # else: + # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) + # else: # diagonal covariance + # if X2 is None: + # target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) + # else: + # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) + + # [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=True)) for p in self.parts] + # return target def gradients_XX_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index 393cf1c2..b64d145b 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -132,7 +132,7 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X_X2(self, dL_dK, X, X2): return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X) - def gradients_XX(self, dL_dK, X, X2): + def gradients_XX(self, dL_dK, X, X2, cov='False'): """ .. math:: diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 57b34de9..c0d46c0f 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -24,7 +24,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X) - put_clean(dct, 'gradients_XX', _slice_gradients_XX) +# put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) @@ -112,18 +112,23 @@ def _slice_gradients_X(f): return ret return wrap -def _slice_gradients_XX(f): - @wraps(f) - def wrap(self, dL_dK, X, X2=None): - if X2 is None: - N, M = X.shape[0], X.shape[0] - else: - N, M = X.shape[0], X2.shape[0] - with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: - #with _Slice_wrap(self, X, X2, ret_shape=None) as s: - ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) - return ret - return wrap +# def _slice_gradients_XX(f): +# @wraps(f) +# def wrap(self, dL_dK, X, X2=None, cov=True): +# if X2 is None: +# N, M = X.shape[0], X.shape[0] +# else: +# N, M = X.shape[0], X2.shape[0] +# if cov==True: # full covariance +# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s: +# #with _Slice_wrap(self, X, X2, ret_shape=None) as s: +# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) +# else: # diagonal covariance +# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: +# #with _Slice_wrap(self, X, X2, ret_shape=None) as s: +# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) +# return ret +# return wrap def _slice_gradients_X_diag(f): @wraps(f) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 5e137abb..90aa4297 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -218,45 +218,60 @@ class Stationary(Kern): else: return self._gradients_X_pure(dL_dK, X, X2) - def gradients_XX(self, dL_dK, X, X2=None): + def gradients_XX(self, dL_dK, X, X2=None, cov=True): """ Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + cov = Full: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors + cov = Diag: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair + or vectors (computationally more efficient if the full covariance matrix is not needed) ..math: - \frac{\partial^2 K}{\partial X\partial X2} + \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2} ..returns: - dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) - Thus, we return the second derivative in X2. + dL2_dXdX2: [NxMxQ] in the cov=Diag case, or [NxMxQxQ] in the cov=full case, + for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) + Thus, we return the second derivative in X2. """ - # The off diagonals in Q are always zero, this should also be true for the Linear kernel... # According to multivariable chain rule, we can chain the second derivative through r: # d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2: invdist = self._inv_dist(X, X2) invdist2 = invdist**2 - - dL_dr = self.dK_dr_via_X(X, X2) * dL_dK + dL_dr = self.dK_dr_via_X(X, X2) # * dL_dK we perofrm this product later tmp1 = dL_dr * invdist - - dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK + dL_drdr = self.dK2_drdr_via_X(X, X2) # * dL_dK we perofrm this product later tmp2 = dL_drdr * invdist2 - - l2 = np.ones(X.shape[1]) * self.lengthscale**2 + l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2) + print ['l2',l2] if X2 is None: X2 = X tmp1 -= np.eye(X.shape[0])*self.variance else: - tmp1[X==X2.T] -= self.variance - - grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) - #grad = np.empty(X.shape, dtype=np.float64) - for q in range(self.input_dim): - tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 - grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] - #grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] - #np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) - #np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) + #tmp1[X==X2.T] -= self.variance # Old version, to be removed + # (seems to have a bug: it is subtracted to the first X1 anyway) + tmp1[invdist2==0.] -= self.variance + + if cov==True: # full covariance + grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64) + for q in range(self.input_dim): + for r in range(self.input_dim): + tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance + if r==q: + grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q]) + else: + grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q]) + else: + # Diagonal covariance + grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) + #grad = np.empty(X.shape, dtype=np.float64) + for q in range(self.input_dim): + tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 + grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q]) + #grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] + #grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] + #np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) + #np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) return grad def gradients_XX_diag(self, dL_dK, X):