diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 327bf69c..81dce75f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs try: from constructors import rbf_sympy, sympykern # these depend on sympy except: diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 9c2464a7..f2e4b57b 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from prod import prod as prodpart -from prod_orthogonal import prod_orthogonal as prod_orthogonalpart from symmetric import symmetric as symmetric_part from coregionalise import coregionalise as coregionalise_part from rational_quadratic import rational_quadratic as rational_quadraticpart @@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper) return kern(D, [part]) -def prod(k1,k2): +def prod(k1,k2,tensor=False): """ Construct a product kernel over D from two kernels over D @@ -263,19 +262,8 @@ def prod(k1,k2): :type k1, k2: kernpart :rtype: kernel object """ - part = prodpart(k1,k2) - return kern(k1.D, [part]) - -def prod_orthogonal(k1,k2): - """ - Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2. - - :param k1, k2: the kernels to multiply - :type k1, k2: kernpart - :rtype: kernel object - """ - part = prod_orthogonalpart(k1,k2) - return kern(k1.D+k2.D, [part]) + part = prodpart(k1,k2,tensor) + return kern(part.D, [part]) def symmetric(k): """ diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 5075b428..7383ecf4 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -7,7 +7,6 @@ import pylab as pb from ..core.parameterised import parameterised from kernpart import kernpart import itertools -from prod_orthogonal import prod_orthogonal from prod import prod from ..util.linalg import symmetrify @@ -84,96 +83,72 @@ class kern(parameterised): count += p.Nparam def __add__(self, other): - assert self.D == other.D - newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) - # transfer constraints: - newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] - newkern.constraints = self.constraints + other.constraints - newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] - newkern.fixed_values = self.fixed_values + other.fixed_values - newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] - return newkern + """ + Shortcut for `add`. + """ + return self.add(other) - def add(self, other): + def add(self, other,tensor=False): """ Add another kernel to this one. Both kernels are defined on the same _space_ :param other: the other kernel to be added :type other: GPy.kern """ - return self + other + if tensor: + D = self.D + other.D + self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices] + other_input_indices = [sl.indices(other.D) for sl in other.input_slices] + other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices] - def add_orthogonal(self, other): - """ - Add another kernel to this one. Both kernels are defined on separate spaces - :param other: the other kernel to be added - :type other: GPy.kern - """ - # deal with input slices - D = self.D + other.D - self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.D) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices] + newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) - newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) - - # transfer constraints: - newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] - newkern.constraints = self.constraints + other.constraints - newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] - newkern.fixed_values = self.fixed_values + other.fixed_values - newkern.constraints = self.constraints + other.constraints - newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers - newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] + # transfer constraints: + newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] + newkern.constraints = self.constraints + other.constraints + newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] + newkern.fixed_values = self.fixed_values + other.fixed_values + newkern.constraints = self.constraints + other.constraints + newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] + else: + assert self.D == other.D + newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) + # transfer constraints: + newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] + newkern.constraints = self.constraints + other.constraints + newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] + newkern.fixed_values = self.fixed_values + other.fixed_values + newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] return newkern def __mul__(self, other): """ - Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces. + Shortcut for `prod`. """ return self.prod(other) - def prod(self, other): + def prod(self, other,tensor=False): """ - multiply two kernels defined on the same spaces. + multiply two kernels (either on the same space, or on the tensor product of the input space) :param other: the other kernel to be added :type other: GPy.kern """ K1 = self.copy() K2 = other.copy() - newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)] - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.D, [False] * K2.D + for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices): + s1, s2 = [False]*K1.D, [False]*K2.D s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] + slices += [s1+s2] + + newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] + + if tensor: + newkern = kern(K1.D + K2.D, newkernparts, slices) + else: + newkern = kern(K1.D, newkernparts, slices) - newkern = kern(K1.D, newkernparts, slices) newkern._follow_constrains(K1, K2) - - return newkern - - def prod_orthogonal(self, other): - """ - multiply two kernels. Both kernels are defined on separate spaces. - :param other: the other kernel to be added - :type other: GPy.kern - """ - K1 = self.copy() - K2 = other.copy() - - newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)] - - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.D, [False] * K2.D - s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] - - newkern = kern(K1.D + K2.D, newkernparts, slices) - newkern._follow_constrains(K1, K2) - return newkern def _follow_constrains(self, K1, K2): @@ -469,9 +444,9 @@ class kern(parameterised): return target_mu, target_S - def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs): - if which_functions == 'all': - which_functions = [True] * self.Nparts + def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): + if which_parts == 'all': + which_parts = [True] * self.Nparts if self.D == 1: if x is None: x = np.zeros((1, 1)) @@ -488,7 +463,7 @@ class kern(parameterised): raise ValueError, "Bad limits for plotting" Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None] - Kx = self.K(Xnew, x, slices2=which_functions) + Kx = self.K(Xnew, x, which_parts) pb.plot(Xnew, Kx, *args, **kwargs) pb.xlim(xmin, xmax) pb.xlabel("x") @@ -514,7 +489,7 @@ class kern(parameterised): xg = np.linspace(xmin[0], xmax[0], resolution) yg = np.linspace(xmin[1], xmax[1], resolution) Xnew = np.vstack((xx.flatten(), yy.flatten())).T - Kx = self.K(Xnew, x, slices2=which_functions) + Kx = self.K(Xnew, x, which_parts) Kx = Kx.reshape(resolution, resolution).T pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) pb.xlim(xmin[0], xmax[0]) diff --git a/GPy/kern/prod.py b/GPy/kern/prod.py index c16d6034..f61906bb 100644 --- a/GPy/kern/prod.py +++ b/GPy/kern/prod.py @@ -4,108 +4,108 @@ from kernpart import kernpart import numpy as np import hashlib -#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb) class prod(kernpart): """ - Computes the product of 2 kernels that are defined on the same space + Computes the product of 2 kernels :param k1, k2: the kernels to multiply :type k1, k2: kernpart + :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces + :type tensor: Boolean :rtype: kernel object """ - def __init__(self,k1,k2): - assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension" - self.D = k1.D + def __init__(self,k1,k2,tensor=False): self.Nparam = k1.Nparam + k2.Nparam self.name = k1.name + '' + k2.name self.k1 = k1 self.k2 = k2 + if tensor: + self.D = k1.D + k2.D + self.slice1 = slice(0,self.k1.D) + self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D) + else: + assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension." + self.D = k1.D + self.slice1 = slice(0,self.D) + self.slice2 = slice(0,self.D) + + self._X, self._X2, self._params = np.empty(shape=(3,1)) self._set_params(np.hstack((k1._get_params(),k2._get_params()))) def _get_params(self): """return the value of the parameters.""" - return self.params + return np.hstack((self.k1._get_params(), self.k2._get_params())) def _set_params(self,x): """set the value of the parameters.""" self.k1._set_params(x[:self.k1.Nparam]) self.k2._set_params(x[self.k1.Nparam:]) - self.params = x def _get_param_names(self): """return parameter names.""" return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()] def K(self,X,X2,target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: - target1 = np.zeros((X.shape[0],X2.shape[0])) - target2 = np.zeros((X.shape[0],X2.shape[0])) - else: - target1 = np.zeros((X.shape[0],X.shape[0])) - target2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X,X2,target1) - self.k2.K(X,X2,target2) - target += target1 * target2 - - def Kdiag(self,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros((X.shape[0],)) - target2 = np.zeros((X.shape[0],)) - self.k1.Kdiag(X,target1) - self.k2.Kdiag(X,target2) - target += target1 * target2 + self._K_computations(X,X2) + target += self._K1 * self._K2 def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - K1 = np.zeros((X.shape[0],X2.shape[0])) - K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X,X2,K1) - self.k2.K(X,X2,K2) + self._K_computations(X,X2) + if X2 is None: + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:]) + else: + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:]) - k1_target = np.zeros(self.k1.Nparam) - k2_target = np.zeros(self.k2.Nparam) - self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target) - self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target) + def Kdiag(self,X,target): + """Compute the diagonal of the covariance matrix associated to X.""" + target1 = np.zeros(X.shape[0]) + target2 = np.zeros(X.shape[0]) + self.k1.Kdiag(X[:,self.slice1],target1) + self.k2.Kdiag(X[:,self.slice2],target2) + target += target1 * target2 - target[:self.k1.Nparam] += k1_target - target[self.k1.Nparam:] += k2_target + def dKdiag_dtheta(self,dL_dKdiag,X,target): + K1 = np.zeros(X.shape[0]) + K2 = np.zeros(X.shape[0]) + self.k1.Kdiag(X[:,self.slice1],K1) + self.k2.Kdiag(X[:,self.slice2],K2) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:]) def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - K1 = np.zeros((X.shape[0],X2.shape[0])) - K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X,X2,K1) - self.k2.K(X,X2,K2) + self._K_computations(X,X2) + self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target) + self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target) - self.k1.dK_dX(dL_dK*K2, X, X2, target) - self.k2.dK_dX(dL_dK*K1, X, X2, target) + def dKdiag_dX(self, dL_dKdiag, X, target): + K1 = np.zeros(X.shape[0]) + K2 = np.zeros(X.shape[0]) + self.k1.Kdiag(X[:,self.slice1],K1) + self.k2.Kdiag(X[:,self.slice2],K2) - def dKdiag_dX(self,dL_dKdiag,X,target): - target1 = np.zeros((X.shape[0],)) - target2 = np.zeros((X.shape[0],)) - self.k1.Kdiag(X,target1) - self.k2.Kdiag(X,target2) + self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target) + self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target) - self.k1.dKdiag_dX(dL_dKdiag*target2, X, target) - self.k2.dKdiag_dX(dL_dKdiag*target1, X, target) - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros((X.shape[0],)) - target2 = np.zeros((X.shape[0],)) - self.k1.Kdiag(X,target1) - self.k2.Kdiag(X,target2) - - k1_target = np.zeros(self.k1.Nparam) - k2_target = np.zeros(self.k2.Nparam) - self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target) - self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target) - - target[:self.k1.Nparam] += k1_target - target[self.k1.Nparam:] += k2_target + def _K_computations(self,X,X2): + if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): + self._X = X.copy() + self._params == self._get_params().copy() + if X2 is None: + self._X2 = None + self._K1 = np.zeros((X.shape[0],X.shape[0])) + self._K2 = np.zeros((X.shape[0],X.shape[0])) + self.k1.K(X[:,self.slice1],None,self._K1) + self.k2.K(X[:,self.slice2],None,self._K2) + else: + self._X2 = X2.copy() + self._K1 = np.zeros((X.shape[0],X2.shape[0])) + self._K2 = np.zeros((X.shape[0],X2.shape[0])) + self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) + self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) diff --git a/doc/Figures/tuto_kern_overview_mANOVA.png b/doc/Figures/tuto_kern_overview_mANOVA.png index f7ff5f50..255a3a80 100644 Binary files a/doc/Figures/tuto_kern_overview_mANOVA.png and b/doc/Figures/tuto_kern_overview_mANOVA.png differ diff --git a/doc/Figures/tuto_kern_overview_mANOVAdec.png b/doc/Figures/tuto_kern_overview_mANOVAdec.png index 6a1b7b73..45b7ae2a 100644 Binary files a/doc/Figures/tuto_kern_overview_mANOVAdec.png and b/doc/Figures/tuto_kern_overview_mANOVAdec.png differ diff --git a/doc/Figures/tuto_kern_overview_multadd.png b/doc/Figures/tuto_kern_overview_multadd.png index a6aa888a..72b6797b 100644 Binary files a/doc/Figures/tuto_kern_overview_multadd.png and b/doc/Figures/tuto_kern_overview_multadd.png differ diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst index 27f895ba..391881d8 100644 --- a/doc/tuto_kernel_overview.rst +++ b/doc/tuto_kernel_overview.rst @@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds * a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)` * a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)` -These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example :: +These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example :: k1 = GPy.kern.rbf(1,1.,2.) k2 = GPy.kern.Matern32(1, 0.5, 0.2) # Product of kernels - k_prod = k1.prod(k2) - k_prodorth = k1.prod_orthogonal(k2) + k_prod = k1.prod(k2) # By default, tensor=False + k_prodtens = k1.prod(k2,tensor=True) # Sum of kernels - k_add = k1.add(k2) - k_addorth = k1.add_orthogonal(k2) + k_add = k1.add(k2) # By default, tensor=False + k_addtens = k1.add(k2,tensor=True) .. # plots pb.figure(figsize=(8,8)) @@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog k_prod.plot() pb.title('prod') pb.subplot(2,2,2) - k_prodorth.plot() - pb.title('prod_orthogonal') + k_prodtens.plot() + pb.title('tensor prod') pb.subplot(2,2,3) k_add.plot() - pb.title('add') + pb.title('sum') pb.subplot(2,2,4) - k_addorth.plot() - pb.title('add_orthogonal') + k_addtens.plot() + pb.title('tensor sum') pb.subplots_adjust(wspace=0.3, hspace=0.3) .. figure:: Figures/tuto_kern_overview_multadd.png :align: center :height: 500px -A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay :: +A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay :: k1 = GPy.kern.rbf(1,1.,2) k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5) @@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope :align: center :height: 300px -In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code :: +In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code :: k = (k1+k2)*(k1+k2) print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name @@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo k_cst = GPy.kern.bias(1,variance=1.) k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) - Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat) + Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True) print Kanova Printing the resulting kernel outputs the following :: @@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``: pb.subplot(1,5,2) pb.ylabel("= ",rotation='horizontal',fontsize='30') pb.subplot(1,5,3) - m.plot(which_functions=[False,True,False,False]) + m.plot(which_parts=[False,True,False,False]) pb.ylabel("cst +",rotation='horizontal',fontsize='30') pb.subplot(1,5,4) - m.plot(which_functions=[False,False,True,False]) + m.plot(which_parts=[False,False,True,False]) pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.subplot(1,5,5) pb.ylabel("+ ",rotation='horizontal',fontsize='30') - m.plot(which_functions=[False,False,False,True]) + m.plot(which_parts=[False,False,False,True]) .. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight') @@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``: :height: 250px -.. import pylab as pb +.. # code + import pylab as pb import numpy as np import GPy pb.ion()