From 397f3ead2cb40fd3486134b4b671ebce426ee237 Mon Sep 17 00:00:00 2001 From: Siivola Eero Date: Wed, 27 Dec 2017 15:25:50 +0200 Subject: [PATCH 1/5] Multioutput kernel + initial test --- GPy/kern/__init__.py | 1 + GPy/kern/src/kern.py | 3 +++ GPy/testing/kernel_tests.py | 7 +++++++ 3 files changed, 11 insertions(+) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index d8239910..96abab39 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -42,3 +42,4 @@ from .src.sde_standard_periodic import sde_StdPeriodic from .src.sde_static import sde_White, sde_Bias from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad from .src.sde_brownian import sde_Brownian +from .src.multioutput_kern import MultioutputKern \ No newline at end of file diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index b9971b8c..bac85d58 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -206,6 +206,9 @@ class Kern(Parameterized): dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0] self.gradient[:] = dtheta + def reset_gradients(self): + raise NotImplementedError + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, psi0=None, psi1=None, psi2=None): """ diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 053fce35..0d4db63b 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -482,6 +482,13 @@ class KernelGradientTestsContinuous(unittest.TestCase): k = GPy.kern.StdPeriodic(self.D) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + + def test_MultioutputKern(self): + k1 = GPy.kern.RBF(self.D-1, ARD=True) + k1.randomize() + k2 = GPy.kern.RBF(self.D-1, ARD=True) + k2.randomize() + k = GPy.kern.MultioutputKern([k1,k2],) def test_Precomputed(self): Xall = np.concatenate([self.X, self.X2]) From 09a96fe8d7267710e302eed32b140e7b8907369f Mon Sep 17 00:00:00 2001 From: Siivola Eero Date: Wed, 27 Dec 2017 15:35:55 +0200 Subject: [PATCH 2/5] Multioutput kernel + initial test --- GPy/kern/src/multioutput_kern.py | 88 ++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 GPy/kern/src/multioutput_kern.py diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py new file mode 100644 index 00000000..4614ef5a --- /dev/null +++ b/GPy/kern/src/multioutput_kern.py @@ -0,0 +1,88 @@ +from .kern import Kern, CombinationKernel +import numpy as np +from functools import reduce, partial +from GPy.util.multioutput import index_to_slices +from paramz.caching import Cache_this + +class MultioutputKern(CombinationKernel): + def __init__(self, kernels, cross_covariances, name='MultioutputKern'): + #kernels contains a list of kernels as input, + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + + # The combination kernel ALLWAYS puts the extra dimension last. + # Thus, the index dimension of this kernel is always the last dimension + # after slicing. This is why the index_dim is just the last column: + self.index_dim = -1 + + super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_params=False) + + nl = len(kernels) + #build covariance structure + covariance = [[None for i in range(nl)] for j in range(nl)] + linked = [] + for i in range(0,nl): + unique=True + for j in range(0,nl): + if i==j or (kernels[i] is kernels[j]): + covariance[i][j] = {'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} + if i>j: + unique=False + elif cross_covariances.get((i,j)) is not None: #cross covariance is given + covariance[i][j] = cross_covariances.get((i,j)) + else: # zero matrix + covariance[i][j] = {'K': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'update_gradients_full': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'gradients_X': lambda x, x2: np.zeros((x.shape[0],x.shape[1]))} + if unique is True: + linked.append(i) + self.covariance = covariance + self.link_parameters(*[kernels[i] for i in linked]) + + @Cache_this(limit=3, ignore_args=()) + def K(self, X ,X2=None): + if X2 is None: + X2 = X + slices = index_to_slices(X[:,self.index_dim]) + slices2 = index_to_slices(X2[:,self.index_dim]) + target = np.zeros((X.shape[0], X2.shape[0])) + [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] + return target + + @Cache_this(limit=3, ignore_args=()) + def Kdiag(self,X): + slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern + target = np.zeros(X.shape[0]) + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] + return target + + def reset_gradients(self): + for kern in self.kern: kern.reset_gradients() + + def update_gradients_full(self,dL_dK,X,X2=None, reset=True): + if reset: + self.reset_gradients() + if X2 is None: + X2 = X + slices = index_to_slices(X[:,self.index_dim]) + slices2 = index_to_slices(X2[:,self.index_dim]) + [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:], False) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + + def update_gradients_diag(self, dL_dKdiag, X): + for kern in self.kerns: kern.reset_gradients() + slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern + [[ self.kerns[i].update_gradients_diag(dL_dKdiag[slices[i][k]], X[slices[i][k],:], False) for k in range(len(slices[i]))] for i in range(len(slices))] + + def gradients_X(self,dL_dK, X, X2=None): + if X2 is None: + X2 = X + slices = index_to_slices(X[:,self.index_dim]) + slices2 = index_to_slices(X2[:,self.index_dim]) + target = np.zeros((X.shape[0], X.shape[1]) ) + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + return target \ No newline at end of file From e0ec5077211a4424b063a29f770f92b107f538cf Mon Sep 17 00:00:00 2001 From: Eero Siivola Date: Thu, 28 Dec 2017 18:06:34 +0200 Subject: [PATCH 3/5] Added multioutput kern and tests --- GPy/kern/src/kern.py | 8 +++-- GPy/kern/src/kernel_slice_operations.py | 8 ++--- GPy/kern/src/multioutput_kern.py | 39 +++++++++++++------------ GPy/kern/src/rbf.py | 8 ++--- GPy/kern/src/stationary.py | 27 +++++++++++------ GPy/testing/kernel_tests.py | 11 +++++-- 6 files changed, 60 insertions(+), 41 deletions(-) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index bac85d58..6a7aea19 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -185,6 +185,9 @@ class Kern(Parameterized): def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError + + def reset_gradients(self): + raise NotImplementedError def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): """ @@ -348,7 +351,7 @@ class CombinationKernel(Kern): A combination kernel combines (a list of) kernels and works on those. Examples are the HierarchicalKernel or Add and Prod kernels. """ - def __init__(self, kernels, name, extra_dims=[]): + def __init__(self, kernels, name, extra_dims=[], link_parameters=True): """ Abstract super class for combination kernels. A combination kernel combines (a list of) kernels and works on those. @@ -372,7 +375,8 @@ class CombinationKernel(Kern): self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int) self.extra_dims = extra_dims - self.link_parameters(*kernels) + if link_parameters: + self.link_parameters(*kernels) def _to_dict(self): input_dict = super(CombinationKernel, self)._to_dict() diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 327436e7..50a1e2e8 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -97,17 +97,17 @@ def _slice_Kdiag(f): def _slice_update_gradients_full(f): @wraps(f) - def wrap(self, dL_dK, X, X2=None): + def wrap(self, dL_dK, X, X2=None, *a, **kw): with _Slice_wrap(self, X, X2) as s: - ret = f(self, dL_dK, s.X, s.X2) + ret = f(self, dL_dK, s.X, s.X2, *a, **kw) return ret return wrap def _slice_update_gradients_diag(f): @wraps(f) - def wrap(self, dL_dKdiag, X): + def wrap(self, dL_dKdiag, X, *a, **kw): with _Slice_wrap(self, X, None) as s: - ret = f(self, dL_dKdiag, s.X) + ret = f(self, dL_dKdiag, s.X, *a, **kw) return ret return wrap diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py index 4614ef5a..c196e2cd 100644 --- a/GPy/kern/src/multioutput_kern.py +++ b/GPy/kern/src/multioutput_kern.py @@ -1,11 +1,11 @@ from .kern import Kern, CombinationKernel import numpy as np from functools import reduce, partial -from GPy.util.multioutput import index_to_slices +from .independent_outputs import index_to_slices from paramz.caching import Cache_this class MultioutputKern(CombinationKernel): - def __init__(self, kernels, cross_covariances, name='MultioutputKern'): + def __init__(self, kernels, cross_covariances={}, name='MultioutputKern'): #kernels contains a list of kernels as input, if not isinstance(kernels, list): self.single_kern = True @@ -20,7 +20,7 @@ class MultioutputKern(CombinationKernel): # after slicing. This is why the index_dim is just the last column: self.index_dim = -1 - super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_params=False) + super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False) nl = len(kernels) #build covariance structure @@ -36,7 +36,7 @@ class MultioutputKern(CombinationKernel): elif cross_covariances.get((i,j)) is not None: #cross covariance is given covariance[i][j] = cross_covariances.get((i,j)) else: # zero matrix - covariance[i][j] = {'K': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'update_gradients_full': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'gradients_X': lambda x, x2: np.zeros((x.shape[0],x.shape[1]))} + covariance[i][j] = {'K': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'update_gradients_full': lambda dl_dk, x, x2, reset: np.zeros(dl_dk.shape), 'gradients_X': lambda dl_dk, x, x2: np.zeros((x.shape[0],x.shape[1]))} if unique is True: linked.append(i) self.covariance = covariance @@ -63,26 +63,27 @@ class MultioutputKern(CombinationKernel): def reset_gradients(self): for kern in self.kern: kern.reset_gradients() - def update_gradients_full(self,dL_dK,X,X2=None, reset=True): - if reset: - self.reset_gradients() - if X2 is None: - X2 = X + def update_gradients_full(self,dL_dK, X, X2=None, reset=True): + if reset: self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) - slices2 = index_to_slices(X2[:,self.index_dim]) - [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:], False) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] - - def update_gradients_diag(self, dL_dKdiag, X): - for kern in self.kerns: kern.reset_gradients() + if X2 is not None: + slices2 = index_to_slices(X2[:,self.index_dim]) + [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:], False) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + else: + [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:] , False) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + + def update_gradients_diag(self, dL_dKdiag, X, reset=True): + if reset: self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) kerns = itertools.repeat(self.kern) if self.single_kern else self.kern - [[ self.kerns[i].update_gradients_diag(dL_dKdiag[slices[i][k]], X[slices[i][k],:], False) for k in range(len(slices[i]))] for i in range(len(slices))] + [[ self.kern[i].update_gradients_diag(dL_dKdiag[slices[i][k]], X[slices[i][k],:], False) for k in range(len(slices[i]))] for i in range(len(slices))] def gradients_X(self,dL_dK, X, X2=None): - if X2 is None: - X2 = X slices = index_to_slices(X[:,self.index_dim]) - slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X.shape[1]) ) - [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + if X2 is not None: + slices2 = index_to_slices(X2[:,self.index_dim]) + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + else: + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] return target \ No newline at end of file diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py index 0b6730d8..479e1357 100644 --- a/GPy/kern/src/rbf.py +++ b/GPy/kern/src/rbf.py @@ -107,10 +107,10 @@ class RBF(Stationary): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:] - def update_gradients_diag(self, dL_dKdiag, X): - super(RBF,self).update_gradients_diag(dL_dKdiag, X) + def update_gradients_diag(self, dL_dKdiag, X, reset=True): + super(RBF,self).update_gradients_diag(dL_dKdiag, X, reset=reset) if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) - def update_gradients_full(self, dL_dK, X, X2=None): - super(RBF,self).update_gradients_full(dL_dK, X, X2) + def update_gradients_full(self, dL_dK, X, X2=None, reset=True): + super(RBF,self).update_gradients_full(dL_dK, X, X2, reset=reset) if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 4e8ddb77..cd09a4a2 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -171,7 +171,14 @@ class Stationary(Kern): ret[:] = self.variance return ret - def update_gradients_diag(self, dL_dKdiag, X): + def reset_gradients(self): + self.variance.gradient = 0. + if not self.ARD: + self.lengthscale.gradient = 0. + else: + self.lengthscale.gradient = np.zeros(self.input_dim) + + def update_gradients_diag(self, dL_dKdiag, X, reset=True): """ Given the derivative of the objective with respect to the diagonal of the covariance matrix, compute the derivative wrt the parameters of @@ -179,16 +186,18 @@ class Stationary(Kern): See also update_gradients_full """ - self.variance.gradient = np.sum(dL_dKdiag) - self.lengthscale.gradient = 0. + if reset: self.reset_gradients() + self.variance.gradient += np.sum(dL_dKdiag) + self.lengthscale.gradient += 0. - def update_gradients_full(self, dL_dK, X, X2=None): + def update_gradients_full(self, dL_dK, X, X2=None, reset=True): """ Given the derivative of the objective wrt the covariance matrix (dL_dK), compute the gradient wrt the parameters of this kernel, and store in the parameters object as e.g. self.variance.gradient """ - self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance + if reset: self.reset_gradients() + self.variance.gradient += np.sum(self.K(X, X2)* dL_dK)/self.variance #now the lengthscale gradient(s) dL_dr = self.dK_dr_via_X(X, X2) * dL_dK @@ -197,12 +206,12 @@ class Stationary(Kern): tmp = dL_dr*self._inv_dist(X, X2) if X2 is None: X2 = X if config.getboolean('cython', 'working'): - self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2) + self.lengthscale.gradient += self._lengthscale_grads_cython(tmp, X, X2) else: - self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2) + self.lengthscale.gradient += self._lengthscale_grads_pure(tmp, X, X2) else: r = self._scaled_dist(X, X2) - self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale + self.lengthscale.gradient += -np.sum(dL_dr*r)/self.lengthscale def update_gradients_direct(self, dL_dVar, dL_dLen): @@ -632,7 +641,7 @@ class RatQuad(Stationary): def dK_dr(self, r): r2 = np.square(r) # return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.) - return-self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.)) + return -self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.)) def update_gradients_full(self, dL_dK, X, X2=None): super(RatQuad, self).update_gradients_full(dL_dK, X, X2) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 0d4db63b..f903c9aa 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -484,11 +484,16 @@ class KernelGradientTestsContinuous(unittest.TestCase): self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) def test_MultioutputKern(self): - k1 = GPy.kern.RBF(self.D-1, ARD=True) + k1 = GPy.kern.RBF(self.D, ARD=True) k1.randomize() - k2 = GPy.kern.RBF(self.D-1, ARD=True) + k2 = GPy.kern.RBF(self.D, ARD=True) k2.randomize() - k = GPy.kern.MultioutputKern([k1,k2],) + + k = GPy.kern.MultioutputKern([k1, k2]) + Xt,_,_ = GPy.util.multioutput.build_XY([self.X, self.X]) + X2t,_,_ = GPy.util.multioutput.build_XY([self.X2, self.X2]) + self.assertTrue(check_kernel_gradient_functions(k, X=Xt, X2=X2t, verbose=verbose, fixed_X_dims=-1)) + def test_Precomputed(self): Xall = np.concatenate([self.X, self.X2]) From d7e7ed5987addb2a4869ec12b748180de9494f6b Mon Sep 17 00:00:00 2001 From: Eero Siivola Date: Tue, 23 Jan 2018 15:21:42 +0200 Subject: [PATCH 4/5] Changed the structure of multioutput kernel so that it doesn't change the API of Kernels + documented the class --- GPy/kern/src/kern.py | 3 -- GPy/kern/src/kernel_slice_operations.py | 8 +-- GPy/kern/src/multioutput_kern.py | 66 ++++++++++++++++++++----- GPy/kern/src/rbf.py | 8 +-- GPy/kern/src/stationary.py | 16 +++--- GPy/testing/kernel_tests.py | 1 - 6 files changed, 69 insertions(+), 33 deletions(-) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index 6a7aea19..c08489e2 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -209,9 +209,6 @@ class Kern(Parameterized): dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0] self.gradient[:] = dtheta - def reset_gradients(self): - raise NotImplementedError - def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, psi0=None, psi1=None, psi2=None): """ diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 50a1e2e8..327436e7 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -97,17 +97,17 @@ def _slice_Kdiag(f): def _slice_update_gradients_full(f): @wraps(f) - def wrap(self, dL_dK, X, X2=None, *a, **kw): + def wrap(self, dL_dK, X, X2=None): with _Slice_wrap(self, X, X2) as s: - ret = f(self, dL_dK, s.X, s.X2, *a, **kw) + ret = f(self, dL_dK, s.X, s.X2) return ret return wrap def _slice_update_gradients_diag(f): @wraps(f) - def wrap(self, dL_dKdiag, X, *a, **kw): + def wrap(self, dL_dKdiag, X): with _Slice_wrap(self, X, None) as s: - ret = f(self, dL_dKdiag, s.X, *a, **kw) + ret = f(self, dL_dKdiag, s.X) return ret return wrap diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py index c196e2cd..7c499092 100644 --- a/GPy/kern/src/multioutput_kern.py +++ b/GPy/kern/src/multioutput_kern.py @@ -4,7 +4,39 @@ from functools import reduce, partial from .independent_outputs import index_to_slices from paramz.caching import Cache_this +class ZeroKern(Kern): + def __init__(self): + super(ZeroKern, self).__init__(1, None, name='ZeroKern',useGPU=False) + + def K(self, X ,X2=None): + if X2 is None: + X2 = X + return np.zeros((X.shape[0],X2.shape[0])) + + def update_gradients_full(self,dL_dK, X, X2=None): + return np.zeros(dL_dK.shape) + + def gradients_X(self,dL_dK, X, X2=None): + return np.zeros((X.shape[0],X.shape[1])) + class MultioutputKern(CombinationKernel): + """ + Multioutput kernel is a meta class for combining different kernels for multioutput GPs. + + As an example let us have inputs x1 for output 1 with covariance k1 and x2 for output 2 with covariance k2. + In addition, we need to define the cross covariances k12(x1,x2) and k21(x2,x1). Then the kernel becomes: + k([x1,x2],[x1,x2]) = [k1(x1,x1) k12(x1, x2); k21(x2, x1), k2(x2,x2)] + + For the kernel, the kernels of outputs are given as list in param "kernels" and cross covariances are + given in param "cross_covariances" as a dictionary of tuples (i,j) as keys. If no cross covariance is given, + it defaults to zero, as in k12(x1,x2)=0. + + In the cross covariance dictionary, the value needs to be a struct with elements + -'kernel': a member of Kernel class that stores the hyper parameters to be updated when optimizing the GP + -'K': function defining the cross covariance + -'update_gradients_full': a function to be used for updating gradients + -'gradients_X': gives a gradient of the cross covariance with respect to the first input + """ def __init__(self, kernels, cross_covariances={}, name='MultioutputKern'): #kernels contains a list of kernels as input, if not isinstance(kernels, list): @@ -30,13 +62,14 @@ class MultioutputKern(CombinationKernel): unique=True for j in range(0,nl): if i==j or (kernels[i] is kernels[j]): - covariance[i][j] = {'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} + covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} if i>j: unique=False elif cross_covariances.get((i,j)) is not None: #cross covariance is given covariance[i][j] = cross_covariances.get((i,j)) - else: # zero matrix - covariance[i][j] = {'K': lambda x, x2: np.zeros((x.shape[0],x2.shape[0])), 'update_gradients_full': lambda dl_dk, x, x2, reset: np.zeros(dl_dk.shape), 'gradients_X': lambda dl_dk, x, x2: np.zeros((x.shape[0],x.shape[1]))} + else: # zero covariance structure + kern = ZeroKern() + covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X} if unique is True: linked.append(i) self.covariance = covariance @@ -59,24 +92,33 @@ class MultioutputKern(CombinationKernel): target = np.zeros(X.shape[0]) [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target - + + def update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): + gradient = cov_struct['kern'].gradient.copy() + cov_struct['update_gradients_full'](dL_dK, X, X2) + cov_struct['kern'].gradient += gradient + + def update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): + gradient = kern.gradient.copy() + kern.update_gradients_diag(dL_dKdiag, X) + kern.gradient += gradient + def reset_gradients(self): for kern in self.kern: kern.reset_gradients() - def update_gradients_full(self,dL_dK, X, X2=None, reset=True): - if reset: self.reset_gradients() + def update_gradients_full(self,dL_dK, X, X2=None): + self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) if X2 is not None: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:], False) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + [[[[ self.update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] else: - [[[[ self.covariance[i][j]['update_gradients_full'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:] , False) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + [[[[ self.update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] - def update_gradients_diag(self, dL_dKdiag, X, reset=True): - if reset: self.reset_gradients() + def update_gradients_diag(self, dL_dKdiag, X): + self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) - kerns = itertools.repeat(self.kern) if self.single_kern else self.kern - [[ self.kern[i].update_gradients_diag(dL_dKdiag[slices[i][k]], X[slices[i][k],:], False) for k in range(len(slices[i]))] for i in range(len(slices))] + [[ self.update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] def gradients_X(self,dL_dK, X, X2=None): slices = index_to_slices(X[:,self.index_dim]) diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py index 479e1357..0b6730d8 100644 --- a/GPy/kern/src/rbf.py +++ b/GPy/kern/src/rbf.py @@ -107,10 +107,10 @@ class RBF(Stationary): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:] - def update_gradients_diag(self, dL_dKdiag, X, reset=True): - super(RBF,self).update_gradients_diag(dL_dKdiag, X, reset=reset) + def update_gradients_diag(self, dL_dKdiag, X): + super(RBF,self).update_gradients_diag(dL_dKdiag, X) if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) - def update_gradients_full(self, dL_dK, X, X2=None, reset=True): - super(RBF,self).update_gradients_full(dL_dK, X, X2, reset=reset) + def update_gradients_full(self, dL_dK, X, X2=None): + super(RBF,self).update_gradients_full(dL_dK, X, X2) if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index cd09a4a2..81129a75 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -178,7 +178,7 @@ class Stationary(Kern): else: self.lengthscale.gradient = np.zeros(self.input_dim) - def update_gradients_diag(self, dL_dKdiag, X, reset=True): + def update_gradients_diag(self, dL_dKdiag, X): """ Given the derivative of the objective with respect to the diagonal of the covariance matrix, compute the derivative wrt the parameters of @@ -186,9 +186,8 @@ class Stationary(Kern): See also update_gradients_full """ - if reset: self.reset_gradients() - self.variance.gradient += np.sum(dL_dKdiag) - self.lengthscale.gradient += 0. + self.variance.gradient = np.sum(dL_dKdiag) + self.lengthscale.gradient = 0. def update_gradients_full(self, dL_dK, X, X2=None, reset=True): """ @@ -196,8 +195,7 @@ class Stationary(Kern): (dL_dK), compute the gradient wrt the parameters of this kernel, and store in the parameters object as e.g. self.variance.gradient """ - if reset: self.reset_gradients() - self.variance.gradient += np.sum(self.K(X, X2)* dL_dK)/self.variance + self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance #now the lengthscale gradient(s) dL_dr = self.dK_dr_via_X(X, X2) * dL_dK @@ -206,12 +204,12 @@ class Stationary(Kern): tmp = dL_dr*self._inv_dist(X, X2) if X2 is None: X2 = X if config.getboolean('cython', 'working'): - self.lengthscale.gradient += self._lengthscale_grads_cython(tmp, X, X2) + self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2) else: - self.lengthscale.gradient += self._lengthscale_grads_pure(tmp, X, X2) + self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2) else: r = self._scaled_dist(X, X2) - self.lengthscale.gradient += -np.sum(dL_dr*r)/self.lengthscale + self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale def update_gradients_direct(self, dL_dVar, dL_dLen): diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index f903c9aa..e1c9d934 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -494,7 +494,6 @@ class KernelGradientTestsContinuous(unittest.TestCase): X2t,_,_ = GPy.util.multioutput.build_XY([self.X2, self.X2]) self.assertTrue(check_kernel_gradient_functions(k, X=Xt, X2=X2t, verbose=verbose, fixed_X_dims=-1)) - def test_Precomputed(self): Xall = np.concatenate([self.X, self.X2]) cov = np.dot(Xall, Xall.T) From 4f532216ad5fd8aa4e00c197f1fd456834b4ab38 Mon Sep 17 00:00:00 2001 From: Siivola Eero Date: Wed, 24 Jan 2018 13:40:34 +0200 Subject: [PATCH 5/5] Changed two function names so that they follow the python naming convention --- GPy/kern/src/multioutput_kern.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py index 7c499092..b7feaadb 100644 --- a/GPy/kern/src/multioutput_kern.py +++ b/GPy/kern/src/multioutput_kern.py @@ -93,12 +93,12 @@ class MultioutputKern(CombinationKernel): [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target - def update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): + def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): gradient = cov_struct['kern'].gradient.copy() cov_struct['update_gradients_full'](dL_dK, X, X2) cov_struct['kern'].gradient += gradient - def update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): + def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): gradient = kern.gradient.copy() kern.update_gradients_diag(dL_dKdiag, X) kern.gradient += gradient @@ -111,14 +111,14 @@ class MultioutputKern(CombinationKernel): slices = index_to_slices(X[:,self.index_dim]) if X2 is not None: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[[ self.update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] else: - [[[[ self.update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] def update_gradients_diag(self, dL_dKdiag, X): self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) - [[ self.update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] + [[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] def gradients_X(self,dL_dK, X, X2=None): slices = index_to_slices(X[:,self.index_dim])