Enhance multioutput grad obs (#995)

* multiplied RBF kernels can now be used with gradient observations

* standard periodic kernels can now be used with gradient observations

* predictive gradients (derivatives of posterior means and variances) can now be calculated when using gradient observations

* simplified and commented RBF & StdP kernel derivatives

* updated kernel slicing and commented prod kernel derivatives

* removed caching from stdp kern, as it breaks optimization for some reason

* fixed hyperparameter optimization for prod kernel

* improved code readability

* added unit tests for gradient observing MultioutputGP models

* added predictions check to unit tests

* bugfix for multioutput_kern

* improved testing coverage

* reduced size of some tests; led to an issue in an unrelated test

* updated testing

* added gradient MultioutputGP prod kernel example

* added keywords and plotting to example
This commit is contained in:
mgranit 2023-09-21 16:53:42 +03:00 committed by GitHub
parent 2c22f1e9c5
commit 9c1db7aa34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 1494 additions and 164 deletions

View file

@ -771,3 +771,117 @@ def multioutput_gp_with_derivative_observations(plot=True):
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))])
return m
def multioutput_gp_with_derivative_observations_2D(optimize=True, plot=False):
'''
This in an example on how to use a MultioutputGP model with gradient
observations and multiple single-dimensional kernels of differing types.
'''
period = 3
w = 2*np.pi/period # angular frequency
bounds = (-period, period)
# latent function and gradient
f = lambda x: (np.exp(-x[:,0]**2) + np.cos(w*x[:,1]))[:,None]
df = lambda x: np.array([-2*np.exp(-x[:,0]**2)*x[:,0], -w*np.sin(w*x[:,1])]).T
# 2D input grid
ppa = 25 # points per axis
x = np.linspace(*bounds, ppa)
xx, yy = np.meshgrid(x, x)
grid = np.array([xx.reshape(-1), yy.reshape(-1)]).T
fgrid = f(grid)
dfgrid = df(grid)
# 10 random training points generated with a space-filling sobol sequence
X = np.array([
[ 0.50421399, 2.1331483 ],
[-2.15717152, -1.70295936],
[-1.46704334, 1.37111521],
[ 2.79064536, -0.9649018 ],
[ 1.60728264, 0.27702713],
[-0.30712366, -0.57372129],
[-2.6140632 , 2.49192488],
[ 0.89078772, -2.85873686],
[ 1.15813136, 0.96910322],
[-2.83307021, -1.38155383]
])
# Note!
# This example uses the same inputs for function and gradient observations.
noise_std = 1e-2
# function observations
Y = f(X) + np.random.normal(scale=noise_std, size=(len(X), 1))
# gradient observations
dY = df(X) + np.random.normal(scale=noise_std, size=(len(X), 2))
# gather inputs and observations into lists
X_list = [X, X, X]
# once for function observations, and once for each partial derivative
# make sure all arrays are of shape (N x dims), where N is # of training points
Y_list = [Y, dY[:,0,None], dY[:,1,None]]
# create a kernel that is the product of two one-dimensional kernels
# the first kernel is an RBF kernel
kern0 = GPy.kern.RBF(input_dim=1, active_dims=[0])
# as the function is periodic in the second dimension, we use a StdP kernel
kern1 = GPy.kern.StdPeriodic(input_dim=1, active_dims=[1], period=period)
kern1.period.constrain_fixed()
# the kernels can be multiplied together into a product kernel
kern = kern0 * kern1
# with gradient observations, we need to define a DiffKern for each dimension
# the DiffKern is given the main kernel as a base kernel
diffkern0 = GPy.kern.DiffKern(kern, 0)
diffkern1 = GPy.kern.DiffKern(kern, 1)
# gather the main kernel and diffkerns into a list
kern_list = [kern, diffkern0, diffkern1]
# define a likelihood and repeat it in a list
likelihood_list = [GPy.likelihoods.Gaussian(variance=noise_std**2)]*3
# create the MultioutputGP model and optimize
model = GPy.models.MultioutputGP(X_list, Y_list, kern_list, likelihood_list)
model.likelihood.constrain_fixed()
if optimize:
model.optimize()
# make function predictions
Xnew, _, ind = GPy.util.multioutput.build_XY([grid], index=[0])
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
mu, var = model.predict(Xnew, Y_metadata=Y_metadata)
# make gradient predictions
Xnew, _, ind = GPy.util.multioutput.build_XY([grid]*2, index=[1, 2])
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
mu_d, var_d = model.predict(Xnew, Y_metadata=Y_metadata)
mu_d = np.array([mu_d[:len(grid)], mu_d[len(grid):]]).T[0]
var_d = np.array([var_d[:len(grid)], var_d[len(grid):]]).T[0]
if plot and MPL_AVAILABLE:
fig, axs = plt.subplots(1, 3)
for ax in axs: ax.set_box_aspect(1)
axs[0].set_title('true f')
axs[0].contourf(xx, yy, fgrid.reshape(ppa, ppa), levels=25)
axs[1].set_title('true df1')
axs[1].contourf(xx, yy, dfgrid[:,0].reshape(ppa, ppa), levels=25)
axs[2].set_title('true df2')
axs[2].contourf(xx, yy, dfgrid[:,1].reshape(ppa, ppa), levels=25)
fig, axs = plt.subplots(1, 3)
for ax in axs: ax.set_box_aspect(1)
axs[0].set_title('pred f')
axs[0].contourf(xx, yy, mu.reshape(ppa, ppa), levels=25)
axs[1].set_title('pred df1')
axs[1].contourf(xx, yy, mu_d[:,0].reshape(ppa, ppa), levels=25)
axs[2].set_title('pred df2')
axs[2].contourf(xx, yy, mu_d[:,1].reshape(ppa, ppa), levels=25)
return model

View file

@ -23,24 +23,42 @@ class DiffKern(Kern):
self.base_kern.parameters_changed()
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
def K(self, X, X2=None, dimX2=None): #X in dimension self.dimension
if X2 is None:
X2 = X
if dimX2 is None:
dimX2 = self.dimension
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
return self.base_kern.dK2_dXdX2(X, X2, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX, dimX2=None):
if dimX2 is None:
dimX2 = self.dimension
return self.base_kern.dK3_dXdXdX2(X, X2, dimX, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=())
def Kdiag(self, X):
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
return self.base_kern.dK2_dXdX2diag(X, self.dimension, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
return self.base_kern.dK3_dXdXdX2diag(X, dimX, self.dimension, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX(X,X2, self.dimension)
return self.base_kern.dK_dX(X, X2, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX2(X,X2, self.dimension)
return self.base_kern.dK_dX2(X, X2, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2_wrap(self, X, X2, dimX):
return self.base_kern.dK2_dXdX2(X, X2, dimX, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX_wrap(self, X, X2, dimX):
return self.base_kern.dK2_dXdX(X, X2, dimX, self.dimension)
def reset_gradients(self):
self.base_kern.reset_gradients()
@ -56,33 +74,33 @@ class DiffKern(Kern):
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
if dimX2 is None:
dimX2 = self.dimension
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
gradients = self.base_kern.dgradients2_dXdX2(X, X2, self.dimension, dimX2)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_diag(self, dL_dK_diag, X):
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
gradients = self.base_kern.dgradients2_dXdX2(X, X, self.dimension, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
gradients = self.base_kern.dgradients_dX(X, X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
gradients = self.base_kern.dgradients_dX2(X, X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def gradients_X(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:,self.dimension]
return np.sum(tmp, axis=1)
def gradients_X2(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,self.dimension,:]
return np.sum(tmp, axis=1)
def _convert_gradients(self, l,g, f = lambda x:x):
def _convert_gradients(self, l, g, f=lambda x:x):
if type(g) is np.ndarray:
return np.sum(f(l)*f(g))
else:
return np.array([np.sum(f(l)*f(gi)) for gi in g])
return np.array([np.sum(f(l)*f(gi)) for gi in g])

View file

@ -22,7 +22,14 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'dK_dX', _slice_dK_dX)
put_clean(dct, 'dK_dX2', _slice_dK_dX)
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
put_clean(dct, 'dK2_dXdX', _slice_dK2_dXdX2)
put_clean(dct, 'dK3_dXdXdX2', _slice_dK3_dXdXdX2)
put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'dK_dXdiag', _slice_dK_dXdiag)
put_clean(dct, 'dK_dX2diag', _slice_dK_dXdiag)
put_clean(dct, 'dK2_dXdX2diag', _slice_dK2_dXdX2diag)
put_clean(dct, 'dK2_dXdXdiag', _slice_dK2_dXdX2diag)
put_clean(dct, 'dK3_dXdXdX2diag', _slice_dK3_dXdXdX2diag)
put_clean(dct, 'phi', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
@ -35,9 +42,10 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
put_clean(dct, 'dgradients', _slice_partial_gradients_list)
put_clean(dct, 'dgradients_dX', _slice_partial_gradients_list_X)
put_clean(dct, 'dgradients_dX2', _slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2', _slice_partial_gradients_list_XX)
put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi)
@ -155,6 +163,18 @@ def _slice_dK_dX(f):
return ret
return wrap
def _slice_dK_dXdiag(f):
@wraps(f)
def wrap(self, X, dim, *a, **kw):
with _Slice_wrap(self, X, None) as s:
d = s.k._project_dim(dim)
if d is None:
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, dim, *a, **kw)
return ret
return wrap
def _slice_dK2_dXdX2(f):
@wraps(f)
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
@ -168,6 +188,59 @@ def _slice_dK2_dXdX2(f):
return ret
return wrap
def _slice_dK2_dXdX2diag(f):
@wraps(f)
def wrap(self, X, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, None) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, d, d2, *a, **kw)
return ret
return wrap
def _slice_dK3_dXdXdX2(f):
@wraps(f)
def wrap(self, X, X2, dim, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
D = s.k._project_dim(dim)
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (D is None) or (d is None) or (d2 is None):
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, D, d, d2, *a, **kw)
return ret
return wrap
def _slice_dK3_dXdXdX2diag(f):
@wraps(f)
def wrap(self, X, dim, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, None) as s:
D = s.k._project_dim(dim)
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (D is None) or (d is None) or (d2 is None):
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, D, d, d2, *a, **kw)
return ret
return wrap
def _slice_partial_gradients_list(f):
@wraps(f)
def wrap(self, X, X2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
ret = f(self, s.X, s.X2)
return ret
return wrap
def _slice_partial_gradients_X(f):
@wraps(f)
def wrap(self, X, X2, dim):

View file

@ -7,20 +7,24 @@ import numpy as np
from functools import partial
class KernWrapper(Kern):
def __init__(self, fk, fug, fg, base_kern):
def __init__(self, fk, fdk, fug, fg, base_kern):
self.fk = fk
self.fdk = fdk
self.fug = fug
self.fg = fg
self.base_kern = base_kern
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper', useGPU=False)
def K(self, X, X2=None):
return self.fk(X,X2=X2)
return self.fk(X, X2=X2)
def dK_dX(self, X, X2, dimX):
return self.fdk(X, X2, dimX)
def update_gradients_full(self,dL_dK, X, X2=None):
def update_gradients_full(self, dL_dK, X, X2=None):
return self.fug(dL_dK, X, X2=X2)
def gradients_X(self,dL_dK, X, X2=None):
def gradients_X(self, dL_dK, X, X2=None):
return self.fg(dL_dK, X, X2=X2)
@property
@ -57,28 +61,46 @@ class MultioutputDerivativeKern(MultioutputKern):
#build covariance structure
covariance = [[None for i in range(nl)] for j in range(nl)]
linked = []
for i in range(0,nl):
unique=True
for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]):
for i in range(0, nl):
unique = True
for j in range(0, nl):
if (i == j) or (kernels[i] is kernels[j]):
kern = kernels[i]
if i>j:
unique=False
if i > j:
unique = False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
kern = cross_covariances.get((i,j))
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
elif (kernels[i].name == 'DiffKern') and (kernels[i].base_kern == kernels[j]): # one is derivative of other
kern = KernWrapper(
kernels[i].dK_dX_wrap,
kernels[i].dK2_dXdX_wrap,
kernels[i].update_gradients_dK_dX,
kernels[i].gradients_X,
kernels[j]
)
unique=False
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
if i>j:
unique=False
elif (kernels[j].name == 'DiffKern') and (kernels[j].base_kern == kernels[i]): # one is derivative of other
kern = KernWrapper(
kernels[j].dK_dX2_wrap,
kernels[j].dK2_dXdX2_wrap,
kernels[j].update_gradients_dK_dX2,
kernels[j].gradients_X2,
kernels[i]
)
elif (kernels[i].name == 'DiffKern') and (kernels[j].name == 'DiffKern') and (kernels[i].base_kern == kernels[j].base_kern): #both are partial derivatives
kern = KernWrapper(
partial(kernels[i].K, dimX2=kernels[j].dimension),
partial(kernels[i].dK_dX, dimX2=kernels[j].dimension),
partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),
None,
kernels[i].base_kern
)
if i > j:
unique = False
else:
kern = ZeroKern()
covariance[i][j] = kern
if unique is True:
linked.append(i)
self.covariance = covariance
self.link_parameters(*[kernels[i] for i in linked])
self.link_parameters(*[kernels[i] for i in linked])

View file

@ -85,21 +85,63 @@ class MultioutputKern(CombinationKernel):
self.link_parameters(*[kernels[i] for i in linked])
@Cache_this(limit=3, ignore_args=())
def K(self, X ,X2=None):
def K(self, X, X2=None):
if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
for j in range(len(slices2)):
for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
cov_K = self.covariance[i][j].K(X[slices[i][k],:], X2[slices2[j][l],:])
target.__setitem__((slices[i][k], slices2[j][l]), cov_K)
return target
@Cache_this(limit=3, ignore_args=())
def Kdiag(self,X):
def Kdiag(self, X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
for kern, slices_i in zip(kerns, slices):
for s in slices_i:
np.copyto(target[s], kern.Kdiag(X[s]))
return target
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
for j in range(len(slices2)):
for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
cov_dK_dX = self.covariance[i][j].dK_dX(X[slices[i][k],:], X2[slices2[j][l],:], dimX)
target.__setitem__((slices[i][k], slices2[j][l]), cov_dK_dX)
return target
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
for kern, slices_i in zip(kerns, slices):
for s in slices_i:
np.copyto(target[s], kern.dK_dXdiag(X[s], dimX))
return target
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
@ -115,19 +157,35 @@ class MultioutputKern(CombinationKernel):
def reset_gradients(self):
for kern in self.kern: kern.reset_gradients()
def update_gradients_full(self,dL_dK, X, X2=None):
self.reset_gradients()
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim])
if X2 is not None:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else:
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
slices2 = index_to_slices(X2[:,self.index_dim])
self.reset_gradients()
for j in range(len(slices2)):
for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
self._update_gradients_full_wrapper(
self.covariance[i][j],
dL_dK[slices[i][k],slices2[j][l]],
X[slices[i][k],:],
X2[slices2[j][l],:]
)
def update_gradients_diag(self, dL_dKdiag, X):
self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim])
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
self.reset_gradients()
for i in range(len(slices)):
for k in range(len(slices[i])):
self._update_gradients_diag_wrapper(
self.covariance[i][i],
dL_dKdiag[slices[i][k]],
X[slices[i][k],:]
)
def gradients_X(self,dL_dK, X, X2=None):
slices = index_to_slices(X[:,self.index_dim])
@ -137,4 +195,4 @@ class MultioutputKern(CombinationKernel):
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else:
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
return target
return target

View file

@ -70,6 +70,310 @@ class Prod(CombinationKernel):
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def reset_gradients(self):
for part in self.parts:
part.reset_gradients()
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dX(self, X, X2, dimX, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dX(X, X2, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dXdiag(self, X, dimX, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dXdiag(X, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dX2(self, X, X2, dimX2, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dX2(X, X2, dimX2)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX2(self, X, X2, dimX, dimX2, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX)*to_update2.dK_dX2(X, X2, dimX2)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX2diag(self, X, dimX, dimX2, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dXdiag(X, dimX)*to_update2.dK_dX2diag(X, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX(self, X, X2, dimX_0, dimX_1, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX(X, X2, dimX_1)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2, which_parts=None):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK3_dXdXdX2(X, X2, dimX_0, dimX_1, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX_0, dimX2)*to_update2.dK_dX(X, X2, dimX_1)
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)*to_update2.dK_dX2(X, X2, dimX2)
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK2_dXdX2(X, X2, dimX_1, dimX2)
if len(which_parts) > 2:
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
if len(combination3) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination3])
else:
prod = np.ones(prod_sum.shape)
to_update3 = list(set(combination2) - set(combination3))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX2(X, X2, dimX2)*to_update3.dK_dX(X, X2, dimX_1)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2, which_parts=None):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK3_dXdXdX2diag(X, dimX_0, dimX_1, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX_0, dimX2)*to_update2.dK_dXdiag(X, dimX_1)
prod_sum += prod*to_update1.dK2_dXdXdiag(X, dimX_0, dimX_1)*to_update2.dK_dX2diag(X, dimX2)
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK2_dXdX2diag(X, dimX_1, dimX2)
if len(which_parts) > 2:
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
if len(combination3) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination3])
else:
prod = np.ones(prod_sum.shape)
to_update3 = list(set(combination2) - set(combination3))[0]
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK_dX2diag(X, dimX2)*to_update3.dK_dXdiag(X, dimX_1)
return prod_sum
def update_gradients_direct(self, *args):
for i, (g,p) in enumerate(zip(args, self.parts)):
p.update_gradients_direct(*g)
def dgradients_dX(self, X, X2, dimX, parts=None):
"""
Compute the hyperparameter gradients of:
the derivative of K with respect to dimension dimX of set X
("dK_dX").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
if len(neq_parts) > 0:
K = self.K(X, X2, which_parts=neq_parts)
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
else:
K = np.ones((X.shape[0], X2.shape[0]))
K_dx = np.zeros((X.shape[0], X2.shape[0]))
g = part.dgradients(X, X2)
g_dx = part.dgradients_dX(X, X2, dimX)
gradients += [[(g_i*K_dx + g_dx_i*K) for (g_i, g_dx_i) in zip(g, g_dx)]]
return gradients
def dgradients_dX2(self, X, X2, dimX2, parts=None):
"""
Compute the hyperparameter gradients of:
the derivative of K with respect to dimension dimX2 of set X2
("dK_dX2").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
if len(neq_parts) > 0:
K = self.K(X, X2, which_parts=neq_parts)
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
else:
K = np.ones((X.shape[0], X2.shape[0]))
K_dx2 = np.zeros((X.shape[0], X2.shape[0]))
g = part.dgradients(X, X2)
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
gradients += [[(g_i*K_dx2 + g_dx2_i*K) for (g_i, g_dx2_i) in zip(g, g_dx2)]]
return gradients
def dgradients2_dXdX2(self, X, X2, dimX, dimX2, parts=None):
"""
Compute the hyperparameter gradients of:
the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2
("dK2_dXdX2").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
K = self.K(X, X2, which_parts=neq_parts)
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
K_dxdx2 = self.dK2_dXdX2(X, X2, dimX, dimX2, which_parts=neq_parts)
g = part.dgradients(X, X2)
g_dx = part.dgradients_dX(X, X2, dimX)
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
g_dxdx2 = part.dgradients2_dXdX2(X, X2, dimX, dimX2)
gradients += [[(g_i*K_dxdx2 + g_dx_i*K_dx2 + g_dx2_i*K_dx + g_dxdx2_i*K) for (g_i, g_dx_i, g_dx2_i, g_dxdx2_i) in zip(g, g_dx, g_dx2, g_dxdx2)]]
return gradients
def update_gradients_full(self, dL_dK, X, X2=None):
if len(self.parts)==2:
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)

View file

@ -53,24 +53,126 @@ class RBF(Stationary):
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
dist = X[:,None,dimX]-X2[None,:,dimX]
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX]
return -1.*K*dist*lengthscale2inv
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
return -dist*(lengthscaleinv**2)*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=())
def dK_dX2(self, X, X2, dimX2):
return -self.dK_dX(X,X2, dimX2)
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
return -self._clean_dK_dX(X, X2, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK_dX2diag(self, X, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2)
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX]
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
term = dist[dimX]*(lengthscaleinv[dimX]**2)
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
if dimX == dimX2:
term -= (lengthscaleinv[dimX]**2)
return -term*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2diag(self, X, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if dimX == dimX2:
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
return np.ones(X.shape[0])*(lengthscaleinv[dimX]**2)*self.variance
else:
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
Returns only diagonal elements.
"""
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
@Cache_this(limit=3, ignore_args=())
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
term = dist[dimX_0]*(lengthscaleinv[dimX_0]**2)
term *= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
if dimX_0 == dimX_1:
term -= dist[dimX2]*(lengthscaleinv[dimX2]**2)*(lengthscaleinv[dimX_0]**2)
if dimX_0 == dimX2:
term -= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)*(lengthscaleinv[dimX_0]**2)
if dimX_1 == dimX2:
term -= dist[dimX_0]*(lengthscaleinv[dimX_0]**2)*(lengthscaleinv[dimX_1]**2)
return term*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
return np.zeros(X.shape[0])
def dK_dr(self, r):
return -r*self.K_of_r(r)
@ -80,73 +182,132 @@ class RBF(Stationary):
def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros
@Cache_this(limit=3, ignore_args=())
def dK_dvariance(self,X,X2):
return self.K(X,X2)/self.variance
def dK_dvariance(self, X, X2):
"""
Compute the derivative of K with respect to variance.
"""
return self._clean_K(X, X2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX(self, X, X2, dim):
return self.dK_dX(X,X2, dim)/self.variance
def dK_dlengthscale(self, X, X2):
"""
Compute the derivative(s) of K with respect to lengthscale(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
K = self._clean_K(X, X2)
if self.ARD:
g = []
for diml in range(self.input_dim):
g += [(dist[diml]**2)*(lengthscaleinv[diml]**3)*K]
else:
g = (lengthscaleinv[0]**3)*np.sum(dist**2, axis=0)*K
return g
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX2(self, X, X2, dim):
return self.dK_dX2(X,X2, dim)/self.variance
def dK2_dvariancedX(self, X, X2, dimX):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX of set X.
"""
return self._clean_dK_dX(X, X2, dimX)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2):
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance
def dK2_dvariancedX2(self, X, X2, dimX2):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX2 of set X2.
"""
return -self.dK2_dvariancedX(X, X2, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK_dl = self.dK_dlengthscale(X, X2)
if self.ARD:
g = []
for diml in range(X.shape[1]):
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)]
for diml in range(self.input_dim):
term = -dist[dimX]*(lengthscaleinv[dimX]**2)*dK_dl[diml]
if diml == dimX:
term -= 2*lengthscaleinv[dimX]*dK_dX
g += [term]
else:
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K
term = -dist[dimX]*(lengthscaleinv[0]**2)*dK_dl
term -= 2*lengthscaleinv[0]*dK_dX
g = term
return g
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX2(self, X, X2, dimX2):
tmp = self.dK2_dlengthscaledX(X, X2, dimX2)
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX2 of set X2.
"""
dK2_dlengthscaledX = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD:
return [-1.*g for g in tmp]
return [-1.*g for g in dK2_dlengthscaledX]
else:
return -1*tmp
return -1*dK2_dlengthscaledX
@Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative of K with respect to:
variance,
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
lengthscale2inv = lengthscaleinv**2
"""
Compute the third derivative(s) of K with respect to:
lengthscale(s),
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
K = self._clean_K(X, X2)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK_dX2 = self._clean_dK_dX(X, X2, dimX2)
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
if self.ARD:
g = []
for diml in range(X.shape[1]):
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3)
if dimX == dimX2:
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
for diml in range(self.input_dim):
term = (dist[diml]**2)*(lengthscaleinv[diml]**3)*dK2_dXdX2
if diml == dimX:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3)
term -= 2*dist[dimX]*(lengthscaleinv[dimX]**3)*dK_dX2
if diml == dimX2:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3)
if dimX == dimX2:
tmp += -2.*K*(lengthscaleinv[dimX]**3)
g += [tmp]
term -= 2*dist[dimX2]*(lengthscaleinv[dimX2]**3)*dK_dX
if diml == dimX == dimX2:
term -= 2*(lengthscaleinv[dimX]**3)*K
g += [term]
else:
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5)
term = np.sum(dist**2, axis=0)*dK2_dXdX2
term -= 4*dist[dimX2]*dK_dX
if dimX == dimX2:
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2)
term -= 2*K
g = (lengthscaleinv[0]**3)*term
return g
def __getstate__(self):

View file

@ -122,7 +122,6 @@ class StdPeriodic(Kern):
pass
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
@ -133,13 +132,372 @@ class StdPeriodic(Kern):
return self.variance * exp_dist
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def dK_dX(self, X, X2, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
return -F*np.sin(2*base)*self._clean_K(X, X2)
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
def dK_dX2(self, X, X2, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
return -self._clean_dK_dX(X, X2, dimX2)
def dK_dX2diag(self, X, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
def dK2_dXdX2(self, X, X2, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
term = np.sin(2*base)*self._clean_dK_dX(X, X2, dimX)
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_K(X, X2)
return F*term
def dK2_dXdX2diag(self, X, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if dimX == dimX2:
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
return (np.pi**2)*(lengthscaleinv**2)*(periodinv**2)*self.variance*np.ones(X.shape[0])
else:
return np.zeros(X.shape[0])
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
Returns only diagonal elements.
"""
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
term = np.sin(2*base)*self._clean_dK2_dXdX(X, X2, dimX_0, dimX_1)
if dimX_0 == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_1)
if dimX_1 == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_0)
if dimX_0 == dimX_1 == dimX2:
term -= 4*(np.pi**2)*(periodinv**2)*np.sin(2*base)*self._clean_K(X, X2)
return F*term
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
return np.zeros(X.shape[0])
def dK_dvariance(self, X, X2):
"""
Compute the derivative of K with respect to variance.
"""
return self._clean_K(X, X2)/self.variance
def dK_dlengthscale(self, X, X2):
"""
Compute the derivative(s) of K with respect to lengthscale(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
periodinv = (np.ones(X.shape[1])/(self.period))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
base = np.pi*periodinv[:,None,None]*dist
K = self._clean_K(X, X2)
if self.ARD2:
g = []
for diml in range(self.input_dim):
g += [(lengthscaleinv[diml]**3)*np.square(np.sin(base[diml]))*K]
else:
g = (lengthscaleinv[0]**3)*np.sum(np.square(np.sin(base)), axis=0)*K
return g
def dK_dperiod(self, X, X2):
"""
Compute the derivative(s) of K with respect to period(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
periodinv = (np.ones(X.shape[1])/(self.period))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
base = np.pi*periodinv[:,None,None]*dist
K = self._clean_K(X, X2)
if self.ARD1:
g = []
for diml in range(self.input_dim):
g += [0.5*base[diml]*(lengthscaleinv[diml]**2)*periodinv[diml]*np.sin(2*base[diml])*K]
else:
g = 0.5*periodinv[0]*np.sum(base*(lengthscaleinv**2)[:,None,None]*np.sin(2*base), axis=0)*K
return g
def dK2_dvariancedX(self, X, X2, dimX):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX of set X.
"""
return self._clean_dK_dX(X, X2, dimX)/self.variance
def dK2_dvariancedX2(self, X, X2, dimX2):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX2 of set X2.
"""
return -self.dK2_dvariancedX(X, X2, dimX2)
def dK2_dlengthscaledX(self, X, X2, dimX):
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dl = self.dK_dlengthscale(X, X2)
if self.ARD2:
g = []
for diml in range(self.input_dim):
term = dK_dl[diml]
if diml == dimX:
term -= 2*lengthscaleinv*K
g += [-F*np.sin(2*base)*term]
else:
g = -F*np.sin(2*base)*(dK_dl - 2*lengthscaleinv*K)
return g
def dK2_dlengthscaledX2(self, X, X2, dimX2):
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX2 of set X2.
"""
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD2:
return [-1*g for g in dK2_dldX]
else:
return -1*dK2_dldX
def dK2_dperioddX(self, X, X2, dimX):
"""
Compute the second derivative(s) of K with respect to:
period(s), and
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dT = self.dK_dperiod(X, X2)
if self.ARD1:
g = []
for dimT in range(self.input_dim):
term = np.sin(2*base)*dK_dT[dimT]
if dimT == dimX:
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
g += [-F*term]
else:
term = np.sin(2*base)*dK_dT
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
g = -F*term
return g
def dK2_dperioddX2(self, X, X2, dimX2):
"""
Compute the second derivative(s) of K with respect to:
period(s), and
dimension dimX2 of set X2.
"""
dK2_dperioddX = self.dK2_dperioddX(X, X2, dimX2)
if self.ARD1:
return [-1*g for g in dK2_dperioddX]
else:
return -1*dK2_dperioddX
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative of K with respect to:
variance,
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative(s) of K with respect to:
lengthscale(s),
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
dK_dl = self.dK_dlengthscale(X, X2)
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX)
if self.ARD2:
g = []
for diml in range(self.input_dim):
term = np.sin(2*base)*dK2_dldX[diml]
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl[diml]
term *= F
if diml == dimX2:
term -= 2*lengthscaleinv*dK2_dXdX2
g += [term]
else:
term = np.sin(2*base)*dK2_dldX
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl
term *= F
term -= 2*lengthscaleinv*dK2_dXdX2
g = term
return g
def dK3_dperioddXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative(s) of K with respect to:
period(s),
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
dK_dT = self.dK_dperiod(X, X2)
dK2_dTdX = self.dK2_dperioddX(X, X2, dimX)
if self.ARD1:
g = []
for dimT in range(self.input_dim):
term = np.sin(2*base)*dK2_dTdX[dimT]
if dimT == dimX2:
term -= 2*periodinv*np.cos(2*base)*base*dK_dX
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dT[dimT]
if dimX == dimX2 == dimT:
term += 2*np.pi*(periodinv**2)*(2*base*np.sin(2*base)-np.cos(2*base))*K
term *= F
if dimT == dimX2:
term -= periodinv*dK2_dXdX2
g += [term]
else:
term = np.sin(2*base)*dK2_dTdX-2*periodinv*base*np.cos(2*base)*dK_dX
if dimX == dimX2:
term += 2*np.pi*periodinv*(np.cos(2*base)*dK_dT+periodinv*(2*base*np.sin(2*base)-np.cos(2*base))*K)
g = F*term-periodinv*dK2_dXdX2
return g
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None:
@ -167,12 +525,52 @@ class StdPeriodic(Kern):
else: # same lengthscales
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_direct(self, dL_dVar, dL_dPer, dL_dLen):
self.variance.gradient = dL_dVar
self.period.gradient = dL_dPer
self.lengthscale.gradient = dL_dLen
def reset_gradients(self):
self.variance.gradient = 0.
if not self.ARD1:
self.period.gradient = 0.
else:
self.period.gradient = np.zeros(self.input_dim)
if not self.ARD2:
self.lengthscale.gradient = 0.
else:
self.lengthscale.gradient = np.zeros(self.input_dim)
def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag)
self.period.gradient = 0
self.lengthscale.gradient = 0
def dgradients(self, X, X2):
g1 = self.dK_dvariance(X, X2)
g2 = self.dK_dperiod(X, X2)
g3 = self.dK_dlengthscale(X, X2)
return [g1, g2, g3]
def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dperioddX(X, X2, dimX)
g3 = self.dK2_dlengthscaledX(X, X2, dimX)
return [g1, g2, g3]
def dgradients_dX2(self, X, X2, dimX2):
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
g2 = self.dK2_dperioddX2(X, X2, dimX2)
g3 = self.dK2_dlengthscaledX2(X, X2, dimX2)
return [g1, g2, g3]
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
g2 = self.dK3_dperioddXdX2(X, X2, dimX, dimX2)
g3 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
return [g1, g2, g3]
def gradients_X(self, dL_dK, X, X2=None):
K = self.K(X, X2)
if X2 is None:
@ -185,4 +583,4 @@ class StdPeriodic(Kern):
return np.zeros(X.shape)
def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
return self.variance*np.ones(self.input_dim)/self.lengthscale**2

View file

@ -306,7 +306,12 @@ class Stationary(Kern):
l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def dgradients(self, X, X2):
g1 = self.dK_dvariance(X, X2)
g2 = self.dK_dlengthscale(X, X2)
return [g1, g2]
def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dlengthscaledX(X, X2, dimX)

View file

@ -9,6 +9,7 @@ from ..core.mapping import Mapping
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from .. import kern
from ..kern import DiffKern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..util.normalizer import Standardize
from .. import util
@ -69,39 +70,80 @@ class MultioutputGP(GP):
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
def predictive_gradients(self, Xnew, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
#if Y_metadata is None:
#Y_metadata={'output_index': ind}
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
def predictive_gradients(self, Xnew, kern=None):
"""
Compute the derivatives of the predicted latent function with respect to X*
Compute the derivatives of the predicted latent function with respect
to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
(usually one).
Note that this is not the same as computing the mean and variance of
the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
"""
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
slices = index_to_slices(Xnew[:,-1])
for i in range(len(slices)):
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
if kern is None:
kern = self.kern
if all([(isinstance(k, DiffKern)) for k in self.kern.kern[1:]]):
"""
Compute the gradients of the predicted latent function and predicted
partial derivatives with respect to X*.
This works only for models that observe the gradient of the latent function.
Xnew is given as a list of arrays, where each array X*_i (size [N_i*, Q])
contains points at which to compute gradients for each predicted latent
function or partial derivative.
Resulting arrays are sized [sum_i^D : N_i*, Q]
Passing a list of only one array [X*] returns only gradients of
the predicted latent function and does not compute gradients of
predicted partial derivatives.
In this case the resulting arrays are sized [N*, Q].
:param Xnew: points at which to compute predictive gradients
:type Xnew: list
:type Xnew[i]: np.darray (sum_i^D : N_i*, Q)
:returns: dmu_dX, dv_dX
:rtype: (np.ndarray (sum_i^D : N_i*, Q), np.ndarray (sum_i^D : N_i*, Q))
"""
dims = Xnew.shape[1] - 1
mean_jac = np.empty((Xnew.shape[0], dims))
var_jac = np.empty((Xnew.shape[0], dims))
X = self._predictive_variable
alpha = self.posterior.woodbury_vector
Wi = self.posterior.woodbury_inv
k = kern.K(Xnew, X)
for dimX in range(dims):
dk_dx = kern.dK_dX(Xnew, X, dimX)
dk_dxdiag = kern.dK_dXdiag(Xnew, dimX)
mean_jac[:,dimX] = np.dot(dk_dx, alpha).flatten()
var_jac[:,dimX] = dk_dxdiag - 2*(np.dot(k, Wi)*dk_dx).sum(-1)
return mean_jac, var_jac
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]

View file

@ -5,6 +5,8 @@ from __future__ import division
import unittest
import numpy as np
import GPy
from GPy.models import GradientChecker
from functools import reduce
class MiscTests(unittest.TestCase):
def setUp(self):
@ -1208,40 +1210,6 @@ class GradientTests(np.testing.TestCase):
with self.assertRaises(RuntimeError):
m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
def test_multioutput_model_with_derivative_observations(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
M=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss = GPy.likelihoods.Gaussian(variance=0.1)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
m.randomize()
self.assertTrue(m.checkgrad())
m.optimize(messages=0, ipython_notebook=False)
self.assertTrue(m.checkgrad())
def test_multioutput_model_with_ep(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
@ -1308,6 +1276,173 @@ class GradientTests(np.testing.TestCase):
c2 = model.predict(x, full_cov=True)[1]
np.testing.assert_allclose(c1,c2)
class GradientMultioutputGPModelTests(np.testing.TestCase):
def setUp(self):
# standard test function
self.period = 3
self.w = 2*np.pi/self.period
self.f = lambda x: np.sum(np.square(np.sin(self.w*x)), axis=1)
self.df = lambda x: self.w*np.sin(2*self.w*x)
self.noise_std = 1e-2
self.bounds = (-self.period, self.period)
self.train_points = 5
self.test_points = 25
def approximate_predictive_gradients(self, model, x_test, D, step=1e-6):
'''
Approximates gradients of predicted posterior means and variances.
This function is used as the frameworks for GradientChecker and
MultioutputGP do not easily combine when checking gradients of predicted
partial derivative posteriors.
'''
dmdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
dvdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
for d in range(D):
x_over = x_test.copy()
x_over[:,d] += step
x_undr = x_test.copy()
x_undr[:,d] -= step
m_over, v_over = model.predict([x_over]*(D + 1))
m_undr, v_undr = model.predict([x_undr]*(D + 1))
dmdx_aprx[:,d,None] = (m_over - m_undr)/(2*step)
dvdx_aprx[:,d,None] = (v_over - v_undr)/(2*step)
return dmdx_aprx, dvdx_aprx
def check_model(self, kern):
'''
Checks predictions, hyperparameter gradients, and gradients of predicted
posterior means and variances for MultioutputGP models that incorporate
observed latent function gradient information.
'''
D = kern.input_dim
X_list = []
Y_list = []
for i in range(D + 1):
# sample inputs for either latent function or partial derivatives
X_i = np.random.uniform(*self.bounds, size=(self.train_points, D))
# output of latent function or partial derivatives
Y_i = (self.f(X_i) if (i == 0) else self.df(X_i)[:,i - 1])[:,None]
# noisy observations
Y_i += np.random.normal(scale=self.noise_std, size=Y_i.shape)
X_list.append(X_i)
Y_list.append(Y_i)
# the kernel is accompanied with derivative kernels, one for each dimension
kernel_list = [kern] + [GPy.kern.DiffKern(kern, d) for d in range(D)]
# create model and check its hyperparameter gradient
likelihood_list = [GPy.likelihoods.Gaussian(variance=self.noise_std**2)]*(D + 1)
model = GPy.models.MultioutputGP(X_list, Y_list, kernel_list, likelihood_list)
model.likelihood.constrain_fixed()
self.assertTrue(model.checkgrad(step=1e-3))
# optimize the model, and check its hyperparameter gradient again
model.optimize()
self.assertTrue(model.checkgrad(step=1e-3))
# check predictions
np.testing.assert_allclose(model.predict(X_list)[0], model.Y, atol=3*self.noise_std)
# test inputs for checking predictive gradients
x_test = np.random.uniform(*self.bounds, size=(self.test_points, D))
# predictive gradients
dmdx, dvdx = model.predictive_gradients([x_test]*(D + 1))
# approximated predictive gradients
dmdx_aprx, dvdx_aprx = self.approximate_predictive_gradients(model, x_test, D, step=1e-3)
# check predictive gradients
np.testing.assert_allclose(dmdx, dmdx_aprx, atol=3*self.noise_std)
np.testing.assert_allclose(dvdx, dvdx_aprx, atol=3*self.noise_std)
def test_MultioutputGP_gradobs_RBF(self):
'''
Testing gradient observing MultioutputGP model with an RBF kernel.
'''
for D in range(1, 4):
kern = GPy.kern.RBF(input_dim=D)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_RBF_ARD(self):
'''
Testing gradient observing MultioutputGP model with an RBF (ARD) kernel.
'''
for D in range(1, 4):
kern = GPy.kern.RBF(input_dim=D, ARD=True)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_StdP(self):
'''
Testing gradient observing MultioutputGP model with a StdP kernel.
'''
for D in range(1, 4):
kern = GPy.kern.StdPeriodic(input_dim=D, period=self.period)
kern.period.constrain_fixed()
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_StdP_ARD(self):
'''
Testing gradient observing MultioutputGP model with a StdP (ARD) kernel.
'''
for D in range(1, 4):
kern = GPy.kern.StdPeriodic(input_dim=D, period=[self.period]*D, ARD1=True, ARD2=True)
kern.period.constrain_fixed()
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_RBF(self):
'''
Testing gradient observing MultioutputGP model with several RBF kernels.
'''
for D in range(2, 4):
kerns = [GPy.kern.RBF(input_dim=1) for d in range(D)]
kern = reduce(lambda k0, k1: k0 * k1, kerns)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_StdP(self):
'''
Testing gradient observing MultioutputGP model with several StdP kernels.
'''
for D in range(2, 4):
kerns = [GPy.kern.StdPeriodic(input_dim=1, period=self.period) for d in range(D)]
kern = reduce(lambda k0, k1: k0 * k1, kerns)
[k.period.constrain_fixed() for k in kern.parts]
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_mix(self):
'''
Testing gradient observing MultioutputGP model with a mix of kernel types.
'''
for D in range(2, 4):
kerns = []
for d in range(D):
if d % 2 == 0:
k = GPy.kern.RBF(input_dim=1)
else:
k = GPy.kern.StdPeriodic(input_dim=1, period=self.period)
k.period.constrain_fixed()
kerns.append(k)
kern = reduce(lambda k0, k1: k0 * k1, kerns)
kern.randomize()
self.check_model(kern)
def _create_missing_data_model(kernel, Q):
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3