diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index d8239910..96abab39 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -42,3 +42,4 @@ from .src.sde_standard_periodic import sde_StdPeriodic from .src.sde_static import sde_White, sde_Bias from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad from .src.sde_brownian import sde_Brownian +from .src.multioutput_kern import MultioutputKern \ No newline at end of file diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index b9971b8c..c08489e2 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -185,6 +185,9 @@ class Kern(Parameterized): def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError + + def reset_gradients(self): + raise NotImplementedError def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): """ @@ -345,7 +348,7 @@ class CombinationKernel(Kern): A combination kernel combines (a list of) kernels and works on those. Examples are the HierarchicalKernel or Add and Prod kernels. """ - def __init__(self, kernels, name, extra_dims=[]): + def __init__(self, kernels, name, extra_dims=[], link_parameters=True): """ Abstract super class for combination kernels. A combination kernel combines (a list of) kernels and works on those. @@ -369,7 +372,8 @@ class CombinationKernel(Kern): self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int) self.extra_dims = extra_dims - self.link_parameters(*kernels) + if link_parameters: + self.link_parameters(*kernels) def _to_dict(self): input_dict = super(CombinationKernel, self)._to_dict() diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py new file mode 100644 index 00000000..b7feaadb --- /dev/null +++ b/GPy/kern/src/multioutput_kern.py @@ -0,0 +1,131 @@ +from .kern import Kern, CombinationKernel +import numpy as np +from functools import reduce, partial +from .independent_outputs import index_to_slices +from paramz.caching import Cache_this + +class ZeroKern(Kern): + def __init__(self): + super(ZeroKern, self).__init__(1, None, name='ZeroKern',useGPU=False) + + def K(self, X ,X2=None): + if X2 is None: + X2 = X + return np.zeros((X.shape[0],X2.shape[0])) + + def update_gradients_full(self,dL_dK, X, X2=None): + return np.zeros(dL_dK.shape) + + def gradients_X(self,dL_dK, X, X2=None): + return np.zeros((X.shape[0],X.shape[1])) + +class MultioutputKern(CombinationKernel): + """ + Multioutput kernel is a meta class for combining different kernels for multioutput GPs. + + As an example let us have inputs x1 for output 1 with covariance k1 and x2 for output 2 with covariance k2. + In addition, we need to define the cross covariances k12(x1,x2) and k21(x2,x1). Then the kernel becomes: + k([x1,x2],[x1,x2]) = [k1(x1,x1) k12(x1, x2); k21(x2, x1), k2(x2,x2)] + + For the kernel, the kernels of outputs are given as list in param "kernels" and cross covariances are + given in param "cross_covariances" as a dictionary of tuples (i,j) as keys. If no cross covariance is given, + it defaults to zero, as in k12(x1,x2)=0. + + In the cross covariance dictionary, the value needs to be a struct with elements + -'kernel': a member of Kernel class that stores the hyper parameters to be updated when optimizing the GP + -'K': function defining the cross covariance + -'update_gradients_full': a function to be used for updating gradients + -'gradients_X': gives a gradient of the cross covariance with respect to the first input + """ + def __init__(self, kernels, cross_covariances={}, name='MultioutputKern'): + #kernels contains a list of kernels as input, + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + + # The combination kernel ALLWAYS puts the extra dimension last. + # Thus, the index dimension of this kernel is always the last dimension + # after slicing. This is why the index_dim is just the last column: + self.index_dim = -1 + + super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False) + + nl = len(kernels) + #build covariance structure + covariance = [[None for i in range(nl)] for j in range(nl)] + linked = [] + for i in range(0,nl): + unique=True + for j in range(0,nl): + if i==j or (kernels[i] is kernels[j]): + covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} + if i>j: + unique=False + elif cross_covariances.get((i,j)) is not None: #cross covariance is given + covariance[i][j] = cross_covariances.get((i,j)) + else: # zero covariance structure + kern = ZeroKern() + covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X} + if unique is True: + linked.append(i) + self.covariance = covariance + self.link_parameters(*[kernels[i] for i in linked]) + + @Cache_this(limit=3, ignore_args=()) + def K(self, X ,X2=None): + if X2 is None: + X2 = X + slices = index_to_slices(X[:,self.index_dim]) + slices2 = index_to_slices(X2[:,self.index_dim]) + target = np.zeros((X.shape[0], X2.shape[0])) + [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] + return target + + @Cache_this(limit=3, ignore_args=()) + def Kdiag(self,X): + slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern + target = np.zeros(X.shape[0]) + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] + return target + + def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): + gradient = cov_struct['kern'].gradient.copy() + cov_struct['update_gradients_full'](dL_dK, X, X2) + cov_struct['kern'].gradient += gradient + + def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): + gradient = kern.gradient.copy() + kern.update_gradients_diag(dL_dKdiag, X) + kern.gradient += gradient + + def reset_gradients(self): + for kern in self.kern: kern.reset_gradients() + + def update_gradients_full(self,dL_dK, X, X2=None): + self.reset_gradients() + slices = index_to_slices(X[:,self.index_dim]) + if X2 is not None: + slices2 = index_to_slices(X2[:,self.index_dim]) + [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + else: + [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + + def update_gradients_diag(self, dL_dKdiag, X): + self.reset_gradients() + slices = index_to_slices(X[:,self.index_dim]) + [[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] + + def gradients_X(self,dL_dK, X, X2=None): + slices = index_to_slices(X[:,self.index_dim]) + target = np.zeros((X.shape[0], X.shape[1]) ) + if X2 is not None: + slices2 = index_to_slices(X2[:,self.index_dim]) + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + else: + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + return target \ No newline at end of file diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 4e8ddb77..81129a75 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -171,6 +171,13 @@ class Stationary(Kern): ret[:] = self.variance return ret + def reset_gradients(self): + self.variance.gradient = 0. + if not self.ARD: + self.lengthscale.gradient = 0. + else: + self.lengthscale.gradient = np.zeros(self.input_dim) + def update_gradients_diag(self, dL_dKdiag, X): """ Given the derivative of the objective with respect to the diagonal of @@ -182,7 +189,7 @@ class Stationary(Kern): self.variance.gradient = np.sum(dL_dKdiag) self.lengthscale.gradient = 0. - def update_gradients_full(self, dL_dK, X, X2=None): + def update_gradients_full(self, dL_dK, X, X2=None, reset=True): """ Given the derivative of the objective wrt the covariance matrix (dL_dK), compute the gradient wrt the parameters of this kernel, @@ -632,7 +639,7 @@ class RatQuad(Stationary): def dK_dr(self, r): r2 = np.square(r) # return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.) - return-self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.)) + return -self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.)) def update_gradients_full(self, dL_dK, X, X2=None): super(RatQuad, self).update_gradients_full(dL_dK, X, X2) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 053fce35..e1c9d934 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -482,6 +482,17 @@ class KernelGradientTestsContinuous(unittest.TestCase): k = GPy.kern.StdPeriodic(self.D) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + + def test_MultioutputKern(self): + k1 = GPy.kern.RBF(self.D, ARD=True) + k1.randomize() + k2 = GPy.kern.RBF(self.D, ARD=True) + k2.randomize() + + k = GPy.kern.MultioutputKern([k1, k2]) + Xt,_,_ = GPy.util.multioutput.build_XY([self.X, self.X]) + X2t,_,_ = GPy.util.multioutput.build_XY([self.X2, self.X2]) + self.assertTrue(check_kernel_gradient_functions(k, X=Xt, X2=X2t, verbose=verbose, fixed_X_dims=-1)) def test_Precomputed(self): Xall = np.concatenate([self.X, self.X2])