From af286ba5280614ecc8371b71ed53cc6447d1183b Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Fri, 22 Apr 2016 15:46:30 +0100 Subject: [PATCH] [slicing] fixed slicing for second order derivatives --- GPy/core/gp.py | 4 +- GPy/kern/src/add.py | 6 +- GPy/kern/src/kern.py | 22 ++-- GPy/kern/src/kernel_slice_operations.py | 71 +++++++---- GPy/kern/src/linear.py | 14 ++- GPy/kern/src/static.py | 46 ++++--- GPy/kern/src/stationary.py | 12 +- GPy/testing/kernel_tests.py | 157 +++++++++++++++++++++--- 8 files changed, 250 insertions(+), 82 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 1434573a..1c615cde 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -378,9 +378,9 @@ class GP(Model): dK_dXnew_full[i] = kern.gradients_X(one, Xnew, self._predictive_variable[[i]]) if full_cov: - dK2_dXdX = kern.gradients_XX(one, Xnew) + dK2_dXdX = kern.gradients_XX(one, Xnew, cov=False) else: - dK2_dXdX = kern.gradients_XX_diag(one, Xnew) + dK2_dXdX = kern.gradients_XX_diag(one, Xnew, cov=False) def compute_cov_inner(wi): if full_cov: diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index 7c03d064..bb04495c 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -96,12 +96,12 @@ class Add(CombinationKernel): target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) else: target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) - [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] + [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=cov)) for p in self.parts] return target - def gradients_XX_diag(self, dL_dKdiag, X): + def gradients_XX_diag(self, dL_dKdiag, X, cov=True): target = np.zeros(X.shape) - [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts] + [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X, cov=cov)) for p in self.parts] return target @Cache_this(limit=3, force_kwargs=['which_parts']) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index 37307e6b..6731a1c3 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -15,10 +15,10 @@ class Kern(Parameterized): # This adds input slice support. The rather ugly code for slicing can be # found in kernel_slice_operations # __meataclass__ is ignored in Python 3 - needs to be put in the function definiton - #__metaclass__ = KernCallsViaSlicerMeta - #Here, we use the Python module six to support Py3 and Py2 simultaneously + # __metaclass__ = KernCallsViaSlicerMeta + # Here, we use the Python module six to support Py3 and Py2 simultaneously #=========================================================================== - _support_GPU=False + _support_GPU = False def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw): """ The base class for a kernel: a positive definite function @@ -62,7 +62,7 @@ class Kern(Parameterized): self.psicomp = PSICOMP_GH() def __setstate__(self, state): - self._all_dims_active = np.arange(0, max(state['active_dims'])+1) + self._all_dims_active = np.arange(0, max(state['active_dims']) + 1) super(Kern, self).__setstate__(state) @property @@ -132,14 +132,14 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X_X2(self, dL_dK, X, X2): return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X) - def gradients_XX(self, dL_dK, X, X2, cov='True'): + def gradients_XX(self, dL_dK, X, X2, cov=True): """ .. math:: \\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2} """ raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel") - def gradients_XX_diag(self, dL_dKdiag, X): + def gradients_XX_diag(self, dL_dKdiag, X, cov=True): """ The diagonal of the second derivative w.r.t. X and X2 """ @@ -292,11 +292,11 @@ class Kern(Parameterized): """ assert isinstance(other, Kern), "only kernels can be multiplied to kernels..." from .prod import Prod - #kernels = [] - #if isinstance(self, Prod): kernels.extend(self.parameters) - #else: kernels.append(self) - #if isinstance(other, Prod): kernels.extend(other.parameters) - #else: kernels.append(other) + # kernels = [] + # if isinstance(self, Prod): kernels.extend(self.parameters) + # else: kernels.append(self) + # if isinstance(other, Prod): kernels.extend(other.parameters) + # else: kernels.append(other) return Prod([self, other], name) def _check_input_dim(self, X): diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 921ac518..315f5437 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -25,7 +25,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X) put_clean(dct, 'gradients_XX', _slice_gradients_XX) - put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) + put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'psi0', _slice_psi) @@ -38,15 +38,16 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) class _Slice_wrap(object): - def __init__(self, k, X, X2=None, ret_shape=None): + def __init__(self, k, X, X2=None, diag=False, ret_shape=None): self.k = k + self.diag = diag if ret_shape is None: self.shape = X.shape else: self.shape = ret_shape - assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape) + assert X.ndim == 2, "need at least column vectors as inputs to kernels for now, given X.shape={!s}".format(X.shape) if X2 is not None: - assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape) + assert X2.ndim == 2, "need at least column vectors as inputs to kernels for now, given X2.shape={!s}".format(X2.shape) if (self.k._all_dims_active is not None) and (self.k._sliced_X == 0): self.k._check_active_dims(X) self.X = self.k._slice_X(X) @@ -67,10 +68,13 @@ class _Slice_wrap(object): ret = np.zeros(self.shape) if len(self.shape) == 2: ret[:, self.k._all_dims_active] = return_val - elif len(self.shape) == 3: - ret[:, :, self.k._all_dims_active] = return_val - elif len(self.shape) == 4: - ret[:, :, :, self.k._all_dims_active] = return_val + elif len(self.shape) == 3: # derivative for X2!=None + if self.diag: + ret[:, :, self.k._all_dims_active][:, self.k._all_dims_active] = return_val + else: + ret[:, :, self.k._all_dims_active] = return_val + elif len(self.shape) == 4: # second order derivative + ret[:, :, self.k._all_dims_active][:, :, :, self.k._all_dims_active] = return_val return ret return return_val @@ -114,24 +118,6 @@ def _slice_gradients_X(f): return ret return wrap -def _slice_gradients_XX(f): - @wraps(f) - def wrap(self, dL_dK, X, X2=None, cov=True): - if X2 is None: - N, M = X.shape[0], X.shape[0] - else: - N, M = X.shape[0], X2.shape[0] - if cov: # full covariance - #with _Slice_wrap(self, X, X2, ret_shape=None) as s: - with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s: - ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov)) - else: # diagonal covariance - #with _Slice_wrap(self, X, X2, ret_shape=None) as s: - with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: - ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov)) - return ret - return wrap - def _slice_gradients_X_diag(f): @wraps(f) def wrap(self, dL_dKdiag, X): @@ -140,6 +126,39 @@ def _slice_gradients_X_diag(f): return ret return wrap +def _slice_gradients_XX(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None, cov=True): + if X2 is None: + N, M = X.shape[0], X.shape[0] + Q1 = Q2 = X.shape[1] + else: + N, M = X.shape[0], X2.shape[0] + Q1, Q2 = X.shape[1], X2.shape[1] + if cov: # full covariance + #with _Slice_wrap(self, X, X2, ret_shape=None) as s: + with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov)) + else: # diagonal covariance + #with _Slice_wrap(self, X, X2, ret_shape=None) as s: + with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov)) + return ret + return wrap + +def _slice_gradients_XX_diag(f): + @wraps(f) + def wrap(self, dL_dKdiag, X, cov=True): + N, Q = X.shape + if cov: # full covariance + with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s: + ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov)) + else: # diagonal covariance + with _Slice_wrap(self, X, None, ret_shape=(N, Q)) as s: + ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov)) + return ret + return wrap + def _slice_psi(f): @wraps(f) def wrap(self, Z, variational_posterior): diff --git a/GPy/kern/src/linear.py b/GPy/kern/src/linear.py index cd0fb937..9d9d5933 100644 --- a/GPy/kern/src/linear.py +++ b/GPy/kern/src/linear.py @@ -102,17 +102,21 @@ class Linear(Kern): return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK) def gradients_XX(self, dL_dK, X, X2=None, cov=True): - if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2 + #if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2 if X2 is None: - return 2*np.ones(X.shape)*self.variances + return 2*self.variances else: - return np.ones(X.shape)*self.variances + return self.variances + def gradients_X_diag(self, dL_dKdiag, X): return 2.*self.variances*dL_dKdiag[:,None]*X - def gradients_XX_diag(self, dL_dKdiag, X): - return 2*np.ones(X.shape)*self.variances + def gradients_XX_diag(self, dL_dKdiag, X, cov=True): + dims = X.shape + if cov: + dims += (X.shape[1],) + return 2*np.ones(dims)*self.variances def input_sensitivity(self, summarize=True): return np.ones(self.input_dim) * self.variances diff --git a/GPy/kern/src/static.py b/GPy/kern/src/static.py index 1745dc23..995f3b5e 100644 --- a/GPy/kern/src/static.py +++ b/GPy/kern/src/static.py @@ -6,6 +6,7 @@ from .kern import Kern import numpy as np from ...core.parameterization import Param from paramz.transformations import Logexp +from paramz.caching import Cache_this class Static(Kern): def __init__(self, input_dim, variance, active_dims, name): @@ -28,11 +29,14 @@ class Static(Kern): if X2 is None: X2 = X if cov: - return np.zeros((X.shape[0], X2.shape[0], X.shape[1],X.shape[1]), dtype=np.float64) + return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64) else: return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) - def gradients_XX_diag(self, dL_dKdiag, X): - return np.zeros(X.shape) + def gradients_XX_diag(self, dL_dKdiag, X, cov=False): + if cov: + return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64) + else: + return np.zeros(X.shape, dtype=np.float64) def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return np.zeros(Z.shape) @@ -175,17 +179,23 @@ class Fixed(Static): super(Fixed, self).__init__(input_dim, variance, active_dims, name) self.fixed_K = covariance_matrix def K(self, X, X2): - return self.variance * self.fixed_K + if X2 is None: + return self.variance * self.fixed_K + else: + return np.zeros((X.shape[0], X2.shape[0])) def Kdiag(self, X): return self.variance * self.fixed_K.diagonal() def update_gradients_full(self, dL_dK, X, X2=None): - self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K) + if X2 is None: + self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K) + else: + self.variance.gradient = 0 def update_gradients_diag(self, dL_dKdiag, X): self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K)) - + def psi2(self, Z, variational_posterior): return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64) @@ -227,21 +237,27 @@ class Precomputed(Fixed): :param variance: the variance of the kernel :type variance: float """ + assert input_dim==1, "Precomputed only implemented in one dimension. Use multiple Precomputed kernels to have more dimensions by making use of active_dims" super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name) - def K(self, X, X2=None): + + @Cache_this(limit=2) + def _index(self, X, X2): if X2 is None: - return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')] + i1 = i2 = X.astype('int').flat else: - return self.variance * self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')] + i1, i2 = X.astype('int').flat, X2.astype('int').flat + return self.fixed_K[i1,:][:,i2] + + def K(self, X, X2=None): + return self.variance * self._index(X, X2) def Kdiag(self, X): - return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')].diagonal() + return self.variance * self._index(X,None).diagonal() def update_gradients_full(self, dL_dK, X, X2=None): - if X2 is None: - self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]) - else: - self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')]) + self.variance.gradient = np.einsum('ij,ij', dL_dK, self._index(X, X2)) def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]) \ No newline at end of file + self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None)) + + \ No newline at end of file diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index ae302266..8f6d1804 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -273,17 +273,19 @@ class Stationary(Kern): #np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) return grad - def gradients_XX_diag(self, dL_dK, X): + def gradients_XX_diag(self, dL_dK, X, cov=True): """ - Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + Given the derivative of the objective dL_dK, compute the second derivative of K wrt X: ..math: - \frac{\partial^2 K}{\partial X\partial X2} + \frac{\partial^2 K}{\partial X\partial X} ..returns: - dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] + dL2_dXdX: [NxQ], for X [NxQ] if cov is False, [NxQxQ] if cov is True """ - return np.ones(X.shape) * self.variance/self.lengthscale**2 + if cov: + return np.zeros(X.shape+(X.shape[1],)) + return np.zeros(X.shape)#np.ones(X.shape) * self.variance/self.lengthscale**2 def _gradients_X_pure(self, dL_dK, X, X2=None): invdist = self._inv_dist(X, X2) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 262b7d45..f2b95be8 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -104,7 +104,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): def parameters_changed(self): self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) -class Kern_check_d2K_dXdX(Kern_check_model): +class Kern_check_d2K_dXdX_cov(Kern_check_model): """This class allows gradient checks for the secondderivative of a kernel with respect to X. """ def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) @@ -115,8 +115,55 @@ class Kern_check_d2K_dXdX(Kern_check_model): return np.sum(self.kernel.gradients_X(self.dL_dK,self.X, self.X2)) def parameters_changed(self): - self.X.gradient[:] = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2) + #if self.kernel.name == 'rbf': + # import ipdb;ipdb.set_trace() + grads = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2, cov=True) + self.X.gradient[:] = grads.sum(-1).sum(1) +class Kern_check_d2K_dXdX_no_cov(Kern_check_model): + """This class allows gradient checks for the secondderivative of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) + self.link_parameter(self.X) + + def log_likelihood(self): + return np.sum(self.kernel.gradients_X(self.dL_dK,self.X, self.X2)) + + def parameters_changed(self): + #if self.kernel.name == 'rbf': + # import ipdb;ipdb.set_trace() + grads = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2, cov=False) + self.X.gradient[:] = grads.sum(1) + + +class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model): + """This class allows gradient checks for the secondderivative of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) + self.link_parameter(self.X) + + def log_likelihood(self): + return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(),self.X)) + + def parameters_changed(self): + grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X, cov=True) + self.X.gradient[:] = grads.sum(-1) + +class Kern_check_d2Kdiag_dXdX_no_cov(Kern_check_model): + """This class allows gradient checks for the secondderivative of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) + self.link_parameter(self.X) + + def log_likelihood(self): + return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(),self.X)) + + def parameters_changed(self): + grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X, cov=False) + self.X.gradient[:] = grads # class Kern_check_d2Kdiag_dXdX(Kern_check_model): # """This class allows gradient checks for the secondderivative of a kernel diagonal with respect to X. """ @@ -260,7 +307,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if verbose: print("Checking gradients of dK(X, X) wrt X.") try: - testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None) + testmodel = Kern_check_d2K_dXdX_no_cov(kern, X=X, X2=None) if fixed_X_dims is not None: testmodel.X[:,fixed_X_dims].fix() result = testmodel.checkgrad(verbose=verbose) @@ -276,11 +323,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb assert(result) pass_checks = False return False - + if verbose: print("Checking gradients of dK(X, X2) wrt X.") try: - testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2) + testmodel = Kern_check_d2K_dXdX_no_cov(kern, X=X, X2=X2) if fixed_X_dims is not None: testmodel.X[:,fixed_X_dims].fix() result = testmodel.checkgrad(verbose=verbose) @@ -297,6 +344,87 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb pass_checks = False return False + if verbose: + print("Checking gradients of dK(X, X) wrt X with full cov in dimensions") + try: + testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_X not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + + if verbose: + print("Checking gradients of dK(X, X2) wrt X with full cov in dimensions") + try: + testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=X2) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_X not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dK(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + + + if verbose: + print("Checking gradients of dKdiag(X, X) wrt X.") + try: + testmodel = Kern_check_d2Kdiag_dXdX_no_cov(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_X not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dKdiag(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + + if verbose: + print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions") + try: + testmodel = Kern_check_d2Kdiag_dXdX_cov(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print(("gradients_X not implemented for " + kern.name)) + if result and verbose: + print("Check passed.") + if not result: + print(("Gradient of dKdiag(X, X) wrt X with cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:")) + testmodel.checkgrad(verbose=True) + assert(result) + pass_checks = False + return False + return pass_checks @@ -304,8 +432,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb class KernelGradientTestsContinuous(unittest.TestCase): def setUp(self): self.N, self.D = 10, 5 - self.X = np.random.randn(self.N,self.D) - self.X2 = np.random.randn(self.N+10,self.D) + self.X = np.random.randn(self.N,self.D+1) + self.X2 = np.random.randn(self.N+10,self.D+1) continuous_kerns = ['RBF', 'Linear'] self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns] @@ -354,7 +482,7 @@ class KernelGradientTestsContinuous(unittest.TestCase): def test_Add_dims(self): k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k.randomize() - self.assertRaises(IndexError, k.K, self.X) + self.assertRaises(IndexError, k.K, self.X[:, :self.D]) k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k.randomize() # assert it runs: @@ -369,7 +497,7 @@ class KernelGradientTestsContinuous(unittest.TestCase): self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) def test_RBF(self): - k = GPy.kern.RBF(self.D, ARD=True) + k = GPy.kern.RBF(self.D-1, ARD=True) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) @@ -384,9 +512,8 @@ class KernelGradientTestsContinuous(unittest.TestCase): self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) def test_Fixed(self): - Xall = np.concatenate([self.X, self.X]) - cov = np.dot(Xall, Xall.T) - X = np.arange(self.N).reshape(1,self.N) + cov = np.dot(self.X, self.X.T) + X = np.arange(self.N).reshape(self.N, 1) k = GPy.kern.Fixed(1, cov) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=None, verbose=verbose)) @@ -409,11 +536,11 @@ class KernelGradientTestsContinuous(unittest.TestCase): def test_Precomputed(self): Xall = np.concatenate([self.X, self.X2]) cov = np.dot(Xall, Xall.T) - X = np.arange(self.N).reshape(1,self.N) - X2 = np.arange(self.N,2*self.N+10).reshape(1,self.N+10) + X = np.arange(self.N).reshape(self.N, 1) + X2 = np.arange(self.N,2*self.N+10).reshape(self.N+10, 1) k = GPy.kern.Precomputed(1, cov) k.randomize() - self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=X2, verbose=verbose)) + self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=X2, verbose=verbose, fixed_X_dims=[0])) class KernelTestsMiscellaneous(unittest.TestCase): def setUp(self):