diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index ef171459..7a87b98d 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -174,7 +174,7 @@ class FITC(SparseGP): def dL_dZ(self): dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) - dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) + dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z) dL_dZ += self._dpsi1_dX dL_dZ += self._dKmm_dX return dL_dZ diff --git a/GPy/core/model.py b/GPy/core/model.py index c31ea209..2965cf94 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) @@ -456,8 +456,8 @@ class Model(Parameterized): gradient = self.objective_function_gradients(x) numerical_gradient = (f1 - f2) / (2 * dx) - global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient)) - + global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) + return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance else: # check the gradient of each parameter individually, and do some pretty printing @@ -496,7 +496,7 @@ class Model(Parameterized): gradient = self.objective_function_gradients(x)[i] numerical_gradient = (f1 - f2) / (2 * step) - ratio = (f1 - f2) / (2 * step * gradient) + ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) difference = np.abs((f1 - f2) / 2 / step - gradient) if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 40cfd404..abcdf72c 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -240,7 +240,7 @@ class SparseGP(GPBase): """ The derivative of the bound wrt the inducing inputs Z """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py index 419bc54e..59c6a563 100644 --- a/GPy/core/transformations.py +++ b/GPy/core/transformations.py @@ -18,9 +18,11 @@ class transformation(object): def gradfactor(self, f): """ df_dx evaluated at self.f(x)=f""" raise NotImplementedError + def initialize(self, f): """ produce a sensible initial value for f(x)""" raise NotImplementedError + def __str__(self): raise NotImplementedError @@ -42,15 +44,13 @@ class logexp(transformation): class negative_logexp(transformation): domain = NEGATIVE def f(self, x): - return -logexp.f(x) #np.log(1. + np.exp(x)) + return -logexp.f(x) def finv(self, f): - return logexp.finv(-f) #np.log(np.exp(-f) - 1.) + return logexp.finv(-f) def gradfactor(self, f): return -logexp.gradfactor(-f) - #ef = np.exp(-f) - #return -(ef - 1.) / ef def initialize(self, f): - return -logexp.initialize(f) #np.abs(f) + return -logexp.initialize(f) def __str__(self): return '(-ve)' @@ -82,7 +82,6 @@ class logexp_clipped(logexp): return '(+ve_c)' class exponent(transformation): - # TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code. domain = POSITIVE def f(self, x): return np.where(x-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index f7c0fd67..d788b4f5 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -5,7 +5,6 @@ import numpy as np from kern import kern import parts - def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False): """ Construct an RBF kernel @@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None): part = parts.gibbs.Gibbs(input_dim,variance,mapping) return kern(input_dim, [part]) +def hetero(input_dim, mapping=None, transform=None): + """ + """ + part = parts.hetero.Hetero(input_dim,mapping,transform) + return kern(input_dim, [part]) + def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False): """ Construct a polynomial kernel @@ -135,6 +140,7 @@ def white(input_dim,variance=1.): part = parts.white.White(input_dim,variance) return kern(input_dim, [part]) + def exponential(input_dim,variance=1., lengthscale=None, ARD=False): """ Construct an exponential kernel diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index bddb5036..9143854e 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -12,7 +12,9 @@ from matplotlib.transforms import offset_copy class kern(Parameterized): def __init__(self, input_dim, parts=[], input_slices=None): """ - This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. + This is the main kernel class for GPy. It handles multiple + (additive) kernel functions, and keeps track of various things + like which parameters live where. The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This object contains a list of parts, which are @@ -33,6 +35,11 @@ class kern(Parameterized): self.input_dim = input_dim + part_names = [k.name for k in self.parts] + self.name='' + for name in part_names: + self.name += name + '+' + self.name = self.name[:-1] # deal with input_slices if input_slices is None: self.input_slices = [slice(None) for p in self.parts] @@ -333,10 +340,8 @@ class kern(Parameterized): :type X: np.ndarray (num_samples x input_dim) :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim)""" - if X2 is None: - X2 = X target = np.zeros_like(X) - if X2 is None: + if X2 is None: [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] else: [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] @@ -653,17 +658,85 @@ def kern_test(kern, X=None, X2=None, verbose=False): :param X2: X2 input values to test the covariance function. :type X2: ndarray """ + pass_checks = True if X==None: X = np.random.randn(10, kern.input_dim) if X2==None: X2 = np.random.randn(20, kern.input_dim) - result = [Kern_check_model(kern, X=X).is_positive_definite(), - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose), - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose), - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose), - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose), - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)] - # Need to check - #Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)] - # but currently I think these aren't implemented. - return np.all(result) + if verbose: + print("Checking covariance function is positive definite.") + result = Kern_check_model(kern, X=X).is_positive_definite() + if result and verbose: + print("Check passed.") + if not result: + print("Positive definite check failed for " + kern.name + " covariance function.") + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt theta.") + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt X.") + result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt X.") + result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt X.") + result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + return pass_checks diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/parts/Matern32.py index 60f0b6e9..40da79f0 100644 --- a/GPy/kern/parts/Matern32.py +++ b/GPy/kern/parts/Matern32.py @@ -98,9 +98,13 @@ class Matern32(Kernpart): def dK_dX(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + if X2 is None: + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + + else: + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/parts/Matern52.py index e02cb9bf..4bf4a1a8 100644 --- a/GPy/kern/parts/Matern52.py +++ b/GPy/kern/parts/Matern52.py @@ -98,9 +98,12 @@ class Matern52(Kernpart): def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) + if X2 is None: + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] + ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) + else: + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] + ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) target += np.sum(dK_dX*dL_dK.T[:,:,None],0) diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index 053e280f..4b7c2d9b 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -5,6 +5,8 @@ import exponential import finite_dimensional import fixed import gibbs +import hetero +import hierarchical import independent_outputs import linear import Matern32 @@ -19,8 +21,7 @@ import prod import rational_quadratic import rbfcos import rbf +import rbf_inv import spline import symmetric import white -import hierarchical -import rbf_inv diff --git a/GPy/kern/parts/gibbs.py b/GPy/kern/parts/gibbs.py index 7ddd64f4..f47144e1 100644 --- a/GPy/kern/parts/gibbs.py +++ b/GPy/kern/parts/gibbs.py @@ -9,7 +9,7 @@ import GPy class Gibbs(Kernpart): """ - Gibbs and MacKay non-stationary covariance function. + Gibbs non-stationary covariance function. .. math:: @@ -25,7 +25,10 @@ class Gibbs(Kernpart): with input location. This leads to an additional term in front of the kernel. - The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. + The parameters are :math:`\sigma^2`, the process variance, and + the parameters of l(x) which is a function that can be + specified by the user, by default an multi-layer peceptron is + used. :param input_dim: the number of input dimensions :type input_dim: int @@ -37,6 +40,15 @@ class Gibbs(Kernpart): :type ARD: Boolean :rtype: Kernpart object + See Mark Gibbs's thesis for more details: Gibbs, + M. N. (1997). Bayesian Gaussian Processes for Regression and + Classification. PhD thesis, Department of Physics, University of + Cambridge. Or also see Page 93 of Gaussian Processes for Machine + Learning by Rasmussen and Williams. Although note that we do not + constrain the lengthscale to be positive by default. This allows + anticorrelation to occur. The positive constraint can be included + by the user manually. + """ def __init__(self, input_dim, variance=1., mapping=None, ARD=False): @@ -89,12 +101,18 @@ class Gibbs(Kernpart): """Derivative of the covariance matrix with respect to X.""" # First account for gradients arising from presence of X in exponent. self._K_computations(X, X2) - _K_dist = X[:, None, :] - X2[None, :, :] + if X2 is None: + _K_dist = 2*(X[:, None, :] - X[None, :, :]) + else: + _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) target += np.sum(dK_dX*dL_dK.T[:, :, None], 0) # Now account for gradients arising from presence of X in lengthscale. self._dK_computations(dL_dK) - target += self.mapping.df_dX(self._dL_dl[:, None], X) + if X2 is None: + target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X) + else: + target += self.mapping.df_dX(self._dL_dl[:, None], X) def dKdiag_dX(self, dL_dKdiag, X, target): """Gradient of diagonal of covariance with respect to X.""" @@ -102,7 +120,8 @@ class Gibbs(Kernpart): def dKdiag_dtheta(self, dL_dKdiag, X, target): """Gradient of diagonal of covariance with respect to parameters.""" - pass + target[0] += np.sum(dL_dKdiag) + def _K_computations(self, X, X2=None): diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py index edbdc091..f4b6783e 100644 --- a/GPy/kern/parts/kernpart.py +++ b/GPy/kern/parts/kernpart.py @@ -59,6 +59,45 @@ class Kernpart(object): def dK_dX(self, dL_dK, X, X2, target): raise NotImplementedError + + +class Kernpart_stationary(Kernpart): + def __init__(self, input_dim, lengthscale=None, ARD=False): + self.input_dim = input_dim + self.ARD = ARD + if not ARD: + self.num_params = 2 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" + else: + self.lengthscale = np.ones(1) + else: + self.num_params = self.input_dim + 1 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" + else: + self.lengthscale = np.ones(self.input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._params = np.empty(shape=(3, 1)) + + def _set_params(self, x): + self.lengthscale = x + self.lengthscale2 = np.square(self.lengthscale) + # reset cached results + self._X, self._X2, self._params = np.empty(shape=(3, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + + + def dKdiag_dtheta(self, dL_dKdiag, X, target): + # For stationary covariances, derivative of diagonal elements + # wrt lengthscale is 0. + target[0] += np.sum(dL_dKdiag) + + class Kernpart_inner(Kernpart): def __init__(self,input_dim): """ @@ -74,3 +113,5 @@ class Kernpart_inner(Kernpart): # initialize cache self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._X, self._X2, self._params = np.empty(shape=(3, 1)) + + diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py index e20270ad..ffcbcf5e 100644 --- a/GPy/kern/parts/linear.py +++ b/GPy/kern/parts/linear.py @@ -99,7 +99,10 @@ class Linear(Kernpart): target += tmp.sum() def dK_dX(self, dL_dK, X, X2, target): - target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + if X2 is None: + target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + else: + target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) def dKdiag_dX(self,dL_dKdiag,X,target): target += 2.*self.variances*dL_dKdiag[:,None]*X diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/parts/mlp.py index 72fd376c..f4825f3d 100644 --- a/GPy/kern/parts/mlp.py +++ b/GPy/kern/parts/mlp.py @@ -110,9 +110,13 @@ class MLP(Kernpart): arg = self._K_asin_arg numer = self._K_numer denom = self._K_denom - vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1. denom3 = denom*denom*denom - target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) + if X2 is not None: + vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1. + target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) + else: + vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1. + target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) def dKdiag_dX(self, dL_dKdiag, X, target): """Gradient of diagonal of covariance with respect to X""" diff --git a/GPy/kern/parts/poly.py b/GPy/kern/parts/poly.py index b01f3a01..cdc65210 100644 --- a/GPy/kern/parts/poly.py +++ b/GPy/kern/parts/poly.py @@ -103,7 +103,10 @@ class POLY(Kernpart): """Derivative of the covariance matrix with respect to X""" self._K_computations(X, X2) arg = self._K_poly_arg - target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1) + if X2 is None: + target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1) + else: + target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1) def dKdiag_dX(self, dL_dKdiag, X, target): """Gradient of diagonal of covariance with respect to X""" diff --git a/GPy/kern/parts/rational_quadratic.py b/GPy/kern/parts/rational_quadratic.py index d92b43db..61473f9c 100644 --- a/GPy/kern/parts/rational_quadratic.py +++ b/GPy/kern/parts/rational_quadratic.py @@ -70,10 +70,12 @@ class RationalQuadratic(Kernpart): def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist2 = np.square((X-X2.T)/self.lengthscale) - - dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) + if X2 is None: + dist2 = np.square((X-X.T)/self.lengthscale) + dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) + else: + dist2 = np.square((X-X2.T)/self.lengthscale) + dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) target += np.sum(dL_dK*dX,1)[:,np.newaxis] def dKdiag_dX(self,dL_dKdiag,X,target): diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 345134bd..1b65133f 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -138,7 +138,10 @@ class RBF(Kernpart): def dK_dX(self, dL_dK, X, X2, target): self._K_computations(X, X2) - _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. + if X2 is None: + _K_dist = 2*(X[:, None, :] - X[None, :, :]) + else: + _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/parts/rbf_inv.py index 649da044..0433e96c 100644 --- a/GPy/kern/parts/rbf_inv.py +++ b/GPy/kern/parts/rbf_inv.py @@ -133,7 +133,10 @@ class RBFInv(RBF): def dK_dX(self, dL_dK, X, X2, target): self._K_computations(X, X2) - _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. + if X2 is None: + _K_dist = 2*(X[:, None, :] - X[None, :, :]) + else: + _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) diff --git a/GPy/models/bcgplvm.py b/GPy/models/bcgplvm.py index b6246c32..9f5866c3 100644 --- a/GPy/models/bcgplvm.py +++ b/GPy/models/bcgplvm.py @@ -44,7 +44,7 @@ class BCGPLVM(GPLVM): GP._set_params(self, x[self.mapping.num_params:]) def _log_likelihood_gradients(self): - dL_df = 2.*self.kern.dK_dX(self.dL_dK, self.X) + dL_df = self.kern.dK_dX(self.dL_dK, self.X) dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y) return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self))) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index c2a7768c..ad78d51f 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -61,7 +61,7 @@ class GPLVM(GP): GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): - dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X) + dL_dX = self.kern.dK_dX(self.dL_dK, self.X) return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self))) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 65a8da77..3f81f472 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -5,7 +5,7 @@ import unittest import numpy as np import GPy - +verbose = False class KernelTests(unittest.TestCase): def test_kerneltie(self): @@ -19,25 +19,54 @@ class KernelTests(unittest.TestCase): self.assertTrue(m.checkgrad()) def test_rbfkernel(self): - verbose = False - kern = GPy.kern.rbf(5) - self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite()) - self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose)) - self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose)) - self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose)) + kern = GPy.kern.rbf(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_rbf_invkernel(self): + kern = GPy.kern.rbf_inv(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_Matern32kernel(self): + kern = GPy.kern.Matern32(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_Matern52kernel(self): + kern = GPy.kern.Matern52(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_linearkernel(self): + kern = GPy.kern.linear(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_periodic_exponentialkernel(self): + kern = GPy.kern.periodic_exponential(1) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_periodic_Matern32kernel(self): + kern = GPy.kern.periodic_Matern32(1) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_periodic_Matern52kernel(self): + kern = GPy.kern.periodic_Matern52(1) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_rational_quadratickernel(self): + kern = GPy.kern.rational_quadratic(1) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_gibbskernel(self): - verbose = False kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1)) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + def test_heterokernel(self): + kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp()) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + def test_mlpkernel(self): - verbose = False kern = GPy.kern.mlp(5) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_polykernel(self): - verbose = False kern = GPy.kern.poly(5, degree=4) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) @@ -48,9 +77,8 @@ class KernelTests(unittest.TestCase): X = np.random.rand(30, 4) K = np.dot(X, X.T) kernel = GPy.kern.fixed(4, K) - Y = np.ones((30,1)) - m = GPy.models.GPRegression(X,Y,kernel=kernel) - self.assertTrue(m.checkgrad()) + kern = GPy.kern.poly(5, degree=4) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_coregionalisation(self): X1 = np.random.rand(50,1)*8 @@ -63,9 +91,8 @@ class KernelTests(unittest.TestCase): k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalise(2,1) - k = k1.prod(k2,tensor=True) - m = GPy.models.GPRegression(X,Y,kernel=k) - self.assertTrue(m.checkgrad()) + kern = k1**k2 + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))