diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index 2ad515dd..9b83d633 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -85,20 +85,20 @@ class Add(CombinationKernel): [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] return target - # def gradients_XX(self, dL_dK, X, X2, cov=True): - # if cov==True: # full covarance - # if X2 is None: - # target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1])) - # else: - # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) - # else: # diagonal covariance - # if X2 is None: - # target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) - # else: - # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) + def gradients_XX(self, dL_dK, X, X2, cov=True): + if cov==True: # full covarance + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1])) + else: + target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) + else: # diagonal covariance + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) + else: + target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) - # [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=True)) for p in self.parts] - # return target + [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov)) for p in self.parts] + return target def gradients_XX_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index b64d145b..37307e6b 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -132,7 +132,7 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X_X2(self, dL_dK, X, X2): return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X) - def gradients_XX(self, dL_dK, X, X2, cov='False'): + def gradients_XX(self, dL_dK, X, X2, cov='True'): """ .. math:: diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index c0d46c0f..ddb16ea1 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -24,7 +24,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X) -# put_clean(dct, 'gradients_XX', _slice_gradients_XX) + put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) @@ -112,23 +112,23 @@ def _slice_gradients_X(f): return ret return wrap -# def _slice_gradients_XX(f): -# @wraps(f) -# def wrap(self, dL_dK, X, X2=None, cov=True): -# if X2 is None: -# N, M = X.shape[0], X.shape[0] -# else: -# N, M = X.shape[0], X2.shape[0] -# if cov==True: # full covariance -# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s: -# #with _Slice_wrap(self, X, X2, ret_shape=None) as s: -# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) -# else: # diagonal covariance -# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: -# #with _Slice_wrap(self, X, X2, ret_shape=None) as s: -# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) -# return ret -# return wrap +def _slice_gradients_XX(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None, cov=True): + if X2 is None: + N, M = X.shape[0], X.shape[0] + else: + N, M = X.shape[0], X2.shape[0] + if cov==True: # full covariance + with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s: + #with _Slice_wrap(self, X, X2, ret_shape=None) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=True)) + else: # diagonal covariance + with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: + #with _Slice_wrap(self, X, X2, ret_shape=None) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=True)) + return ret + return wrap def _slice_gradients_X_diag(f): @wraps(f) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 90aa4297..4de52d91 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -242,7 +242,6 @@ class Stationary(Kern): dL_drdr = self.dK2_drdr_via_X(X, X2) # * dL_dK we perofrm this product later tmp2 = dL_drdr * invdist2 l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2) - print ['l2',l2] if X2 is None: X2 = X @@ -262,7 +261,7 @@ class Stationary(Kern): else: grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q]) else: - # Diagonal covariance + # Diagonal covariance, old code grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) #grad = np.empty(X.shape, dtype=np.float64) for q in range(self.input_dim): diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index ae9aebfb..f47e9805 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -101,7 +101,21 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): def parameters_changed(self): self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) +class Kern_check_d2K_dXdX(Kern_check_model): + """This class allows gradient checks for the secondderivative of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) + self.link_parameter(self.X) + def log_likelihood(self): + return np.sum(self.kernel.gradients_X(self.dL_dK,self.X, self.X2)) + + def parameters_changed(self): + self.X.gradient[:] = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2,cov=True).sum(0).sum(1) + +# class Kern_check_d2Kdiag_dXdX(Kern_check_model): +# """This class allows gradient checks for the secondderivative of a kernel diagonal with respect to X. """ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None): """ @@ -239,6 +253,49 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb assert(result) return False + if verbose: + print("Checking gradients of dK(X, X) wrt X.") + try: + testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_XX not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dK(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + + if verbose: + print("Checking gradients of dK(X, X2) wrt X.") + try: + testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_XX not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dK(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + +# if verbose: +# print("Checking gradients of dKdiag(X, X) wrt X.") + return pass_checks