Merge pull request #587 from esiivola/feature-multioutput

Merge the implementation of Multioutput kernel
This commit is contained in:
Zhenwen Dai 2018-01-26 17:53:42 +00:00 committed by GitHub
commit a6c154753d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 4 deletions

View file

@ -42,3 +42,4 @@ from .src.sde_standard_periodic import sde_StdPeriodic
from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian
from .src.multioutput_kern import MultioutputKern

View file

@ -185,6 +185,9 @@ class Kern(Parameterized):
def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def reset_gradients(self):
raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
@ -345,7 +348,7 @@ class CombinationKernel(Kern):
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
"""
def __init__(self, kernels, name, extra_dims=[]):
def __init__(self, kernels, name, extra_dims=[], link_parameters=True):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
@ -369,7 +372,8 @@ class CombinationKernel(Kern):
self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int)
self.extra_dims = extra_dims
self.link_parameters(*kernels)
if link_parameters:
self.link_parameters(*kernels)
def _to_dict(self):
input_dict = super(CombinationKernel, self)._to_dict()

View file

@ -0,0 +1,131 @@
from .kern import Kern, CombinationKernel
import numpy as np
from functools import reduce, partial
from .independent_outputs import index_to_slices
from paramz.caching import Cache_this
class ZeroKern(Kern):
def __init__(self):
super(ZeroKern, self).__init__(1, None, name='ZeroKern',useGPU=False)
def K(self, X ,X2=None):
if X2 is None:
X2 = X
return np.zeros((X.shape[0],X2.shape[0]))
def update_gradients_full(self,dL_dK, X, X2=None):
return np.zeros(dL_dK.shape)
def gradients_X(self,dL_dK, X, X2=None):
return np.zeros((X.shape[0],X.shape[1]))
class MultioutputKern(CombinationKernel):
"""
Multioutput kernel is a meta class for combining different kernels for multioutput GPs.
As an example let us have inputs x1 for output 1 with covariance k1 and x2 for output 2 with covariance k2.
In addition, we need to define the cross covariances k12(x1,x2) and k21(x2,x1). Then the kernel becomes:
k([x1,x2],[x1,x2]) = [k1(x1,x1) k12(x1, x2); k21(x2, x1), k2(x2,x2)]
For the kernel, the kernels of outputs are given as list in param "kernels" and cross covariances are
given in param "cross_covariances" as a dictionary of tuples (i,j) as keys. If no cross covariance is given,
it defaults to zero, as in k12(x1,x2)=0.
In the cross covariance dictionary, the value needs to be a struct with elements
-'kernel': a member of Kernel class that stores the hyper parameters to be updated when optimizing the GP
-'K': function defining the cross covariance
-'update_gradients_full': a function to be used for updating gradients
-'gradients_X': gives a gradient of the cross covariance with respect to the first input
"""
def __init__(self, kernels, cross_covariances={}, name='MultioutputKern'):
#kernels contains a list of kernels as input,
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
# The combination kernel ALLWAYS puts the extra dimension last.
# Thus, the index dimension of this kernel is always the last dimension
# after slicing. This is why the index_dim is just the last column:
self.index_dim = -1
super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False)
nl = len(kernels)
#build covariance structure
covariance = [[None for i in range(nl)] for j in range(nl)]
linked = []
for i in range(0,nl):
unique=True
for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]):
covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X}
if i>j:
unique=False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
covariance[i][j] = cross_covariances.get((i,j))
else: # zero covariance structure
kern = ZeroKern()
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
if unique is True:
linked.append(i)
self.covariance = covariance
self.link_parameters(*[kernels[i] for i in linked])
@Cache_this(limit=3, ignore_args=())
def K(self, X ,X2=None):
if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
return target
@Cache_this(limit=3, ignore_args=())
def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2):
gradient = cov_struct['kern'].gradient.copy()
cov_struct['update_gradients_full'](dL_dK, X, X2)
cov_struct['kern'].gradient += gradient
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
gradient = kern.gradient.copy()
kern.update_gradients_diag(dL_dKdiag, X)
kern.gradient += gradient
def reset_gradients(self):
for kern in self.kern: kern.reset_gradients()
def update_gradients_full(self,dL_dK, X, X2=None):
self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim])
if X2 is not None:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else:
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
def update_gradients_diag(self, dL_dKdiag, X):
self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim])
[[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
def gradients_X(self,dL_dK, X, X2=None):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros((X.shape[0], X.shape[1]) )
if X2 is not None:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else:
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
return target

View file

@ -171,6 +171,13 @@ class Stationary(Kern):
ret[:] = self.variance
return ret
def reset_gradients(self):
self.variance.gradient = 0.
if not self.ARD:
self.lengthscale.gradient = 0.
else:
self.lengthscale.gradient = np.zeros(self.input_dim)
def update_gradients_diag(self, dL_dKdiag, X):
"""
Given the derivative of the objective with respect to the diagonal of
@ -182,7 +189,7 @@ class Stationary(Kern):
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
def update_gradients_full(self, dL_dK, X, X2=None, reset=True):
"""
Given the derivative of the objective wrt the covariance matrix
(dL_dK), compute the gradient wrt the parameters of this kernel,
@ -632,7 +639,7 @@ class RatQuad(Stationary):
def dK_dr(self, r):
r2 = np.square(r)
# return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.)
return-self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.))
return -self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.))
def update_gradients_full(self, dL_dK, X, X2=None):
super(RatQuad, self).update_gradients_full(dL_dK, X, X2)

View file

@ -482,6 +482,17 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k = GPy.kern.StdPeriodic(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_MultioutputKern(self):
k1 = GPy.kern.RBF(self.D, ARD=True)
k1.randomize()
k2 = GPy.kern.RBF(self.D, ARD=True)
k2.randomize()
k = GPy.kern.MultioutputKern([k1, k2])
Xt,_,_ = GPy.util.multioutput.build_XY([self.X, self.X])
X2t,_,_ = GPy.util.multioutput.build_XY([self.X2, self.X2])
self.assertTrue(check_kernel_gradient_functions(k, X=Xt, X2=X2t, verbose=verbose, fixed_X_dims=-1))
def test_Precomputed(self):
Xall = np.concatenate([self.X, self.X2])