Enhance multioutput grad obs (#995)

* multiplied RBF kernels can now be used with gradient observations

* standard periodic kernels can now be used with gradient observations

* predictive gradients (derivatives of posterior means and variances) can now be calculated when using gradient observations

* simplified and commented RBF & StdP kernel derivatives

* updated kernel slicing and commented prod kernel derivatives

* removed caching from stdp kern, as it breaks optimization for some reason

* fixed hyperparameter optimization for prod kernel

* improved code readability

* added unit tests for gradient observing MultioutputGP models

* added predictions check to unit tests

* bugfix for multioutput_kern

* improved testing coverage

* reduced size of some tests; led to an issue in an unrelated test

* updated testing

* added gradient MultioutputGP prod kernel example

* added keywords and plotting to example
This commit is contained in:
mgranit 2023-09-21 16:53:42 +03:00 committed by GitHub
parent 2c22f1e9c5
commit 9c1db7aa34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 1494 additions and 164 deletions

View file

@ -771,3 +771,117 @@ def multioutput_gp_with_derivative_observations(plot=True):
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))]) mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))])
return m return m
def multioutput_gp_with_derivative_observations_2D(optimize=True, plot=False):
'''
This in an example on how to use a MultioutputGP model with gradient
observations and multiple single-dimensional kernels of differing types.
'''
period = 3
w = 2*np.pi/period # angular frequency
bounds = (-period, period)
# latent function and gradient
f = lambda x: (np.exp(-x[:,0]**2) + np.cos(w*x[:,1]))[:,None]
df = lambda x: np.array([-2*np.exp(-x[:,0]**2)*x[:,0], -w*np.sin(w*x[:,1])]).T
# 2D input grid
ppa = 25 # points per axis
x = np.linspace(*bounds, ppa)
xx, yy = np.meshgrid(x, x)
grid = np.array([xx.reshape(-1), yy.reshape(-1)]).T
fgrid = f(grid)
dfgrid = df(grid)
# 10 random training points generated with a space-filling sobol sequence
X = np.array([
[ 0.50421399, 2.1331483 ],
[-2.15717152, -1.70295936],
[-1.46704334, 1.37111521],
[ 2.79064536, -0.9649018 ],
[ 1.60728264, 0.27702713],
[-0.30712366, -0.57372129],
[-2.6140632 , 2.49192488],
[ 0.89078772, -2.85873686],
[ 1.15813136, 0.96910322],
[-2.83307021, -1.38155383]
])
# Note!
# This example uses the same inputs for function and gradient observations.
noise_std = 1e-2
# function observations
Y = f(X) + np.random.normal(scale=noise_std, size=(len(X), 1))
# gradient observations
dY = df(X) + np.random.normal(scale=noise_std, size=(len(X), 2))
# gather inputs and observations into lists
X_list = [X, X, X]
# once for function observations, and once for each partial derivative
# make sure all arrays are of shape (N x dims), where N is # of training points
Y_list = [Y, dY[:,0,None], dY[:,1,None]]
# create a kernel that is the product of two one-dimensional kernels
# the first kernel is an RBF kernel
kern0 = GPy.kern.RBF(input_dim=1, active_dims=[0])
# as the function is periodic in the second dimension, we use a StdP kernel
kern1 = GPy.kern.StdPeriodic(input_dim=1, active_dims=[1], period=period)
kern1.period.constrain_fixed()
# the kernels can be multiplied together into a product kernel
kern = kern0 * kern1
# with gradient observations, we need to define a DiffKern for each dimension
# the DiffKern is given the main kernel as a base kernel
diffkern0 = GPy.kern.DiffKern(kern, 0)
diffkern1 = GPy.kern.DiffKern(kern, 1)
# gather the main kernel and diffkerns into a list
kern_list = [kern, diffkern0, diffkern1]
# define a likelihood and repeat it in a list
likelihood_list = [GPy.likelihoods.Gaussian(variance=noise_std**2)]*3
# create the MultioutputGP model and optimize
model = GPy.models.MultioutputGP(X_list, Y_list, kern_list, likelihood_list)
model.likelihood.constrain_fixed()
if optimize:
model.optimize()
# make function predictions
Xnew, _, ind = GPy.util.multioutput.build_XY([grid], index=[0])
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
mu, var = model.predict(Xnew, Y_metadata=Y_metadata)
# make gradient predictions
Xnew, _, ind = GPy.util.multioutput.build_XY([grid]*2, index=[1, 2])
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
mu_d, var_d = model.predict(Xnew, Y_metadata=Y_metadata)
mu_d = np.array([mu_d[:len(grid)], mu_d[len(grid):]]).T[0]
var_d = np.array([var_d[:len(grid)], var_d[len(grid):]]).T[0]
if plot and MPL_AVAILABLE:
fig, axs = plt.subplots(1, 3)
for ax in axs: ax.set_box_aspect(1)
axs[0].set_title('true f')
axs[0].contourf(xx, yy, fgrid.reshape(ppa, ppa), levels=25)
axs[1].set_title('true df1')
axs[1].contourf(xx, yy, dfgrid[:,0].reshape(ppa, ppa), levels=25)
axs[2].set_title('true df2')
axs[2].contourf(xx, yy, dfgrid[:,1].reshape(ppa, ppa), levels=25)
fig, axs = plt.subplots(1, 3)
for ax in axs: ax.set_box_aspect(1)
axs[0].set_title('pred f')
axs[0].contourf(xx, yy, mu.reshape(ppa, ppa), levels=25)
axs[1].set_title('pred df1')
axs[1].contourf(xx, yy, mu_d[:,0].reshape(ppa, ppa), levels=25)
axs[2].set_title('pred df2')
axs[2].contourf(xx, yy, mu_d[:,1].reshape(ppa, ppa), levels=25)
return model

View file

@ -23,24 +23,42 @@ class DiffKern(Kern):
self.base_kern.parameters_changed() self.base_kern.parameters_changed()
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension def K(self, X, X2=None, dimX2=None): #X in dimension self.dimension
if X2 is None: if X2 is None:
X2 = X X2 = X
if dimX2 is None: if dimX2 is None:
dimX2 = self.dimension dimX2 = self.dimension
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2) return self.base_kern.dK2_dXdX2(X, X2, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX, dimX2=None):
if dimX2 is None:
dimX2 = self.dimension
return self.base_kern.dK3_dXdXdX2(X, X2, dimX, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def Kdiag(self, X): def Kdiag(self, X):
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension)) return self.base_kern.dK2_dXdX2diag(X, self.dimension, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
return self.base_kern.dK3_dXdXdX2diag(X, dimX, self.dimension, self.dimension)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX(X,X2, self.dimension) return self.base_kern.dK_dX(X, X2, self.dimension)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX2(X,X2, self.dimension) return self.base_kern.dK_dX2(X, X2, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2_wrap(self, X, X2, dimX):
return self.base_kern.dK2_dXdX2(X, X2, dimX, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX_wrap(self, X, X2, dimX):
return self.base_kern.dK2_dXdX(X, X2, dimX, self.dimension)
def reset_gradients(self): def reset_gradients(self):
self.base_kern.reset_gradients() self.base_kern.reset_gradients()
@ -56,33 +74,33 @@ class DiffKern(Kern):
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None): def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
if dimX2 is None: if dimX2 is None:
dimX2 = self.dimension dimX2 = self.dimension
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2) gradients = self.base_kern.dgradients2_dXdX2(X, X2, self.dimension, dimX2)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_diag(self, dL_dK_diag, X): def update_gradients_diag(self, dL_dK_diag, X):
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension) gradients = self.base_kern.dgradients2_dXdX2(X, X, self.dimension, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients]) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
def update_gradients_dK_dX(self, dL_dK, X, X2=None): def update_gradients_dK_dX(self, dL_dK, X, X2=None):
if X2 is None: if X2 is None:
X2 = X X2 = X
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension) gradients = self.base_kern.dgradients_dX(X, X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_dK_dX2(self, dL_dK, X, X2=None): def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension) gradients = self.base_kern.dgradients_dX2(X, X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension] tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:,self.dimension]
return np.sum(tmp, axis=1) return np.sum(tmp, axis=1)
def gradients_X2(self, dL_dK, X, X2): def gradients_X2(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :] tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,self.dimension,:]
return np.sum(tmp, axis=1) return np.sum(tmp, axis=1)
def _convert_gradients(self, l,g, f = lambda x:x): def _convert_gradients(self, l, g, f=lambda x:x):
if type(g) is np.ndarray: if type(g) is np.ndarray:
return np.sum(f(l)*f(g)) return np.sum(f(l)*f(g))
else: else:
return np.array([np.sum(f(l)*f(gi)) for gi in g]) return np.array([np.sum(f(l)*f(gi)) for gi in g])

View file

@ -22,7 +22,14 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'dK_dX', _slice_dK_dX) put_clean(dct, 'dK_dX', _slice_dK_dX)
put_clean(dct, 'dK_dX2', _slice_dK_dX) put_clean(dct, 'dK_dX2', _slice_dK_dX)
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2) put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
put_clean(dct, 'dK2_dXdX', _slice_dK2_dXdX2)
put_clean(dct, 'dK3_dXdXdX2', _slice_dK3_dXdXdX2)
put_clean(dct, 'Kdiag', _slice_Kdiag) put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'dK_dXdiag', _slice_dK_dXdiag)
put_clean(dct, 'dK_dX2diag', _slice_dK_dXdiag)
put_clean(dct, 'dK2_dXdX2diag', _slice_dK2_dXdX2diag)
put_clean(dct, 'dK2_dXdXdiag', _slice_dK2_dXdX2diag)
put_clean(dct, 'dK3_dXdXdX2diag', _slice_dK3_dXdXdX2diag)
put_clean(dct, 'phi', _slice_Kdiag) put_clean(dct, 'phi', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
@ -35,9 +42,10 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag) put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X) put_clean(dct, 'dgradients', _slice_partial_gradients_list)
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X) put_clean(dct, 'dgradients_dX', _slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX) put_clean(dct, 'dgradients_dX2', _slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2', _slice_partial_gradients_list_XX)
put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi) put_clean(dct, 'psi1', _slice_psi)
@ -155,6 +163,18 @@ def _slice_dK_dX(f):
return ret return ret
return wrap return wrap
def _slice_dK_dXdiag(f):
@wraps(f)
def wrap(self, X, dim, *a, **kw):
with _Slice_wrap(self, X, None) as s:
d = s.k._project_dim(dim)
if d is None:
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, dim, *a, **kw)
return ret
return wrap
def _slice_dK2_dXdX2(f): def _slice_dK2_dXdX2(f):
@wraps(f) @wraps(f)
def wrap(self, X, X2, dimX, dimX2, *a, **kw): def wrap(self, X, X2, dimX, dimX2, *a, **kw):
@ -168,6 +188,59 @@ def _slice_dK2_dXdX2(f):
return ret return ret
return wrap return wrap
def _slice_dK2_dXdX2diag(f):
@wraps(f)
def wrap(self, X, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, None) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, d, d2, *a, **kw)
return ret
return wrap
def _slice_dK3_dXdXdX2(f):
@wraps(f)
def wrap(self, X, X2, dim, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
D = s.k._project_dim(dim)
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (D is None) or (d is None) or (d2 is None):
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, D, d, d2, *a, **kw)
return ret
return wrap
def _slice_dK3_dXdXdX2diag(f):
@wraps(f)
def wrap(self, X, dim, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, None) as s:
D = s.k._project_dim(dim)
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (D is None) or (d is None) or (d2 is None):
ret = np.zeros(X.shape[0])
else:
ret = f(self, s.X, D, d, d2, *a, **kw)
return ret
return wrap
def _slice_partial_gradients_list(f):
@wraps(f)
def wrap(self, X, X2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
ret = f(self, s.X, s.X2)
return ret
return wrap
def _slice_partial_gradients_X(f): def _slice_partial_gradients_X(f):
@wraps(f) @wraps(f)
def wrap(self, X, X2, dim): def wrap(self, X, X2, dim):

View file

@ -7,20 +7,24 @@ import numpy as np
from functools import partial from functools import partial
class KernWrapper(Kern): class KernWrapper(Kern):
def __init__(self, fk, fug, fg, base_kern): def __init__(self, fk, fdk, fug, fg, base_kern):
self.fk = fk self.fk = fk
self.fdk = fdk
self.fug = fug self.fug = fug
self.fg = fg self.fg = fg
self.base_kern = base_kern self.base_kern = base_kern
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False) super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper', useGPU=False)
def K(self, X, X2=None): def K(self, X, X2=None):
return self.fk(X,X2=X2) return self.fk(X, X2=X2)
def dK_dX(self, X, X2, dimX):
return self.fdk(X, X2, dimX)
def update_gradients_full(self,dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
return self.fug(dL_dK, X, X2=X2) return self.fug(dL_dK, X, X2=X2)
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
return self.fg(dL_dK, X, X2=X2) return self.fg(dL_dK, X, X2=X2)
@property @property
@ -57,28 +61,46 @@ class MultioutputDerivativeKern(MultioutputKern):
#build covariance structure #build covariance structure
covariance = [[None for i in range(nl)] for j in range(nl)] covariance = [[None for i in range(nl)] for j in range(nl)]
linked = [] linked = []
for i in range(0,nl): for i in range(0, nl):
unique=True unique = True
for j in range(0,nl): for j in range(0, nl):
if i==j or (kernels[i] is kernels[j]): if (i == j) or (kernels[i] is kernels[j]):
kern = kernels[i] kern = kernels[i]
if i>j: if i > j:
unique=False unique = False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given elif cross_covariances.get((i,j)) is not None: #cross covariance is given
kern = cross_covariances.get((i,j)) kern = cross_covariances.get((i,j))
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other elif (kernels[i].name == 'DiffKern') and (kernels[i].base_kern == kernels[j]): # one is derivative of other
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j]) kern = KernWrapper(
kernels[i].dK_dX_wrap,
kernels[i].dK2_dXdX_wrap,
kernels[i].update_gradients_dK_dX,
kernels[i].gradients_X,
kernels[j]
)
unique=False unique=False
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other elif (kernels[j].name == 'DiffKern') and (kernels[j].base_kern == kernels[i]): # one is derivative of other
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i]) kern = KernWrapper(
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives kernels[j].dK_dX2_wrap,
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern) kernels[j].dK2_dXdX2_wrap,
if i>j: kernels[j].update_gradients_dK_dX2,
unique=False kernels[j].gradients_X2,
kernels[i]
)
elif (kernels[i].name == 'DiffKern') and (kernels[j].name == 'DiffKern') and (kernels[i].base_kern == kernels[j].base_kern): #both are partial derivatives
kern = KernWrapper(
partial(kernels[i].K, dimX2=kernels[j].dimension),
partial(kernels[i].dK_dX, dimX2=kernels[j].dimension),
partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),
None,
kernels[i].base_kern
)
if i > j:
unique = False
else: else:
kern = ZeroKern() kern = ZeroKern()
covariance[i][j] = kern covariance[i][j] = kern
if unique is True: if unique is True:
linked.append(i) linked.append(i)
self.covariance = covariance self.covariance = covariance
self.link_parameters(*[kernels[i] for i in linked]) self.link_parameters(*[kernels[i] for i in linked])

View file

@ -85,21 +85,63 @@ class MultioutputKern(CombinationKernel):
self.link_parameters(*[kernels[i] for i in linked]) self.link_parameters(*[kernels[i] for i in linked])
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def K(self, X ,X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
X2 = X X2 = X
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] for j in range(len(slices2)):
for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
cov_K = self.covariance[i][j].K(X[slices[i][k],:], X2[slices2[j][l],:])
target.__setitem__((slices[i][k], slices2[j][l]), cov_K)
return target return target
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def Kdiag(self,X): def Kdiag(self, X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] for kern, slices_i in zip(kerns, slices):
for s in slices_i:
np.copyto(target[s], kern.Kdiag(X[s]))
return target
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
for j in range(len(slices2)):
for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
cov_dK_dX = self.covariance[i][j].dK_dX(X[slices[i][k],:], X2[slices2[j][l],:], dimX)
target.__setitem__((slices[i][k], slices2[j][l]), cov_dK_dX)
return target
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
for kern, slices_i in zip(kerns, slices):
for s in slices_i:
np.copyto(target[s], kern.dK_dXdiag(X[s], dimX))
return target return target
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2): def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
@ -115,19 +157,35 @@ class MultioutputKern(CombinationKernel):
def reset_gradients(self): def reset_gradients(self):
for kern in self.kern: kern.reset_gradients() for kern in self.kern: kern.reset_gradients()
def update_gradients_full(self,dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.reset_gradients() if X2 is None:
X2 = X
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if X2 is not None: slices2 = index_to_slices(X2[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] self.reset_gradients()
else: for j in range(len(slices2)):
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] for i in range(len(slices)):
for l in range(len(slices2[j])):
for k in range(len(slices[i])):
self._update_gradients_full_wrapper(
self.covariance[i][j],
dL_dK[slices[i][k],slices2[j][l]],
X[slices[i][k],:],
X2[slices2[j][l],:]
)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
self.reset_gradients()
for i in range(len(slices)):
for k in range(len(slices[i])):
self._update_gradients_diag_wrapper(
self.covariance[i][i],
dL_dKdiag[slices[i][k]],
X[slices[i][k],:]
)
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
@ -137,4 +195,4 @@ class MultioutputKern(CombinationKernel):
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else: else:
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
return target return target

View file

@ -70,6 +70,310 @@ class Prod(CombinationKernel):
which_parts = self.parts which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def reset_gradients(self):
for part in self.parts:
part.reset_gradients()
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dX(self, X, X2, dimX, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dX(X, X2, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dXdiag(self, X, dimX, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dXdiag(X, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK_dX2(self, X, X2, dimX2, which_parts=None):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
else:
prod = np.ones(prod_sum.shape)
to_update = list(set(which_parts) - set(combination))[0]
prod_sum += prod*to_update.dK_dX2(X, X2, dimX2)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX2(self, X, X2, dimX, dimX2, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX)*to_update2.dK_dX2(X, X2, dimX2)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX2diag(self, X, dimX, dimX2, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dXdiag(X, dimX)*to_update2.dK_dX2diag(X, dimX)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK2_dXdX(self, X, X2, dimX_0, dimX_1, which_parts=None):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX(X, X2, dimX_1)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2, which_parts=None):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK3_dXdXdX2(X, X2, dimX_0, dimX_1, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX_0, dimX2)*to_update2.dK_dX(X, X2, dimX_1)
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)*to_update2.dK_dX2(X, X2, dimX2)
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK2_dXdX2(X, X2, dimX_1, dimX2)
if len(which_parts) > 2:
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
if len(combination3) > 0:
prod = reduce(np.multiply, [p.K(X, X2) for p in combination3])
else:
prod = np.ones(prod_sum.shape)
to_update3 = list(set(combination2) - set(combination3))[0]
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX2(X, X2, dimX2)*to_update3.dK_dX(X, X2, dimX_1)
return prod_sum
@Cache_this(limit=3, force_kwargs=['which_parts'])
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2, which_parts=None):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
if which_parts is None:
which_parts = self.parts
prod_sum = np.zeros(X.shape[0])
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
if len(combination1) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
else:
prod = np.ones(prod_sum.shape)
to_update1 = list(set(which_parts) - set(combination1))[0]
prod_sum += prod*to_update1.dK3_dXdXdX2diag(X, dimX_0, dimX_1, dimX2)
if len(which_parts) > 1:
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
if len(combination2) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
else:
prod = np.ones(prod_sum.shape)
to_update2 = list(set(combination1) - set(combination2))[0]
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX_0, dimX2)*to_update2.dK_dXdiag(X, dimX_1)
prod_sum += prod*to_update1.dK2_dXdXdiag(X, dimX_0, dimX_1)*to_update2.dK_dX2diag(X, dimX2)
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK2_dXdX2diag(X, dimX_1, dimX2)
if len(which_parts) > 2:
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
if len(combination3) > 0:
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination3])
else:
prod = np.ones(prod_sum.shape)
to_update3 = list(set(combination2) - set(combination3))[0]
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK_dX2diag(X, dimX2)*to_update3.dK_dXdiag(X, dimX_1)
return prod_sum
def update_gradients_direct(self, *args):
for i, (g,p) in enumerate(zip(args, self.parts)):
p.update_gradients_direct(*g)
def dgradients_dX(self, X, X2, dimX, parts=None):
"""
Compute the hyperparameter gradients of:
the derivative of K with respect to dimension dimX of set X
("dK_dX").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
if len(neq_parts) > 0:
K = self.K(X, X2, which_parts=neq_parts)
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
else:
K = np.ones((X.shape[0], X2.shape[0]))
K_dx = np.zeros((X.shape[0], X2.shape[0]))
g = part.dgradients(X, X2)
g_dx = part.dgradients_dX(X, X2, dimX)
gradients += [[(g_i*K_dx + g_dx_i*K) for (g_i, g_dx_i) in zip(g, g_dx)]]
return gradients
def dgradients_dX2(self, X, X2, dimX2, parts=None):
"""
Compute the hyperparameter gradients of:
the derivative of K with respect to dimension dimX2 of set X2
("dK_dX2").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
if len(neq_parts) > 0:
K = self.K(X, X2, which_parts=neq_parts)
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
else:
K = np.ones((X.shape[0], X2.shape[0]))
K_dx2 = np.zeros((X.shape[0], X2.shape[0]))
g = part.dgradients(X, X2)
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
gradients += [[(g_i*K_dx2 + g_dx2_i*K) for (g_i, g_dx2_i) in zip(g, g_dx2)]]
return gradients
def dgradients2_dXdX2(self, X, X2, dimX, dimX2, parts=None):
"""
Compute the hyperparameter gradients of:
the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2
("dK2_dXdX2").
"""
if parts is None:
parts = self.parts
gradients = []
for part in parts:
neq_parts = [p for p in parts if p is not part]
K = self.K(X, X2, which_parts=neq_parts)
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
K_dxdx2 = self.dK2_dXdX2(X, X2, dimX, dimX2, which_parts=neq_parts)
g = part.dgradients(X, X2)
g_dx = part.dgradients_dX(X, X2, dimX)
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
g_dxdx2 = part.dgradients2_dXdX2(X, X2, dimX, dimX2)
gradients += [[(g_i*K_dxdx2 + g_dx_i*K_dx2 + g_dx2_i*K_dx + g_dxdx2_i*K) for (g_i, g_dx_i, g_dx2_i, g_dxdx2_i) in zip(g, g_dx, g_dx2, g_dxdx2)]]
return gradients
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if len(self.parts)==2: if len(self.parts)==2:
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2) self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)

View file

@ -53,24 +53,126 @@ class RBF(Stationary):
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX): def dK_dX(self, X, X2, dimX):
r = self._scaled_dist(X, X2) """
K = self.K_of_r(r) Compute the derivative of K with respect to:
dist = X[:,None,dimX]-X2[None,:,dimX] dimension dimX of set X.
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX] """
return -1.*K*dist*lengthscale2inv lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
return -dist*(lengthscaleinv**2)*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dX2(self, X, X2, dimX2): def dK_dX2(self, X, X2, dimX2):
return -self.dK_dX(X,X2, dimX2) """
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
return -self._clean_dK_dX(X, X2, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK_dX2diag(self, X, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_dXdX2(self, X, X2, dimX, dimX2): def dK2_dXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2) """
K = self.K_of_r(r) Compute the second derivative of K with respect to:
if X2 is None: dimension dimX of set X, and
X2=X dimension dimX2 of set X2.
dist = X[:,None,:]-X2[None,:,:] """
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2) lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX] dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
term = dist[dimX]*(lengthscaleinv[dimX]**2)
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
if dimX == dimX2:
term -= (lengthscaleinv[dimX]**2)
return -term*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2diag(self, X, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if dimX == dimX2:
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
return np.ones(X.shape[0])*(lengthscaleinv[dimX]**2)*self.variance
else:
return np.zeros(X.shape[0])
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
Returns only diagonal elements.
"""
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
@Cache_this(limit=3, ignore_args=())
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
term = dist[dimX_0]*(lengthscaleinv[dimX_0]**2)
term *= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
if dimX_0 == dimX_1:
term -= dist[dimX2]*(lengthscaleinv[dimX2]**2)*(lengthscaleinv[dimX_0]**2)
if dimX_0 == dimX2:
term -= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)*(lengthscaleinv[dimX_0]**2)
if dimX_1 == dimX2:
term -= dist[dimX_0]*(lengthscaleinv[dimX_0]**2)*(lengthscaleinv[dimX_1]**2)
return term*self._clean_K(X, X2)
@Cache_this(limit=3, ignore_args=())
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
return np.zeros(X.shape[0])
def dK_dr(self, r): def dK_dr(self, r):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
@ -80,73 +182,132 @@ class RBF(Stationary):
def dK2_drdr_diag(self): def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros return -self.variance # as the diagonal of r is always filled with zeros
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dvariance(self,X,X2): def dK_dvariance(self, X, X2):
return self.K(X,X2)/self.variance """
Compute the derivative of K with respect to variance.
"""
return self._clean_K(X, X2)/self.variance
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX(self, X, X2, dim): def dK_dlengthscale(self, X, X2):
return self.dK_dX(X,X2, dim)/self.variance """
Compute the derivative(s) of K with respect to lengthscale(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
K = self._clean_K(X, X2)
if self.ARD:
g = []
for diml in range(self.input_dim):
g += [(dist[diml]**2)*(lengthscaleinv[diml]**3)*K]
else:
g = (lengthscaleinv[0]**3)*np.sum(dist**2, axis=0)*K
return g
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX2(self, X, X2, dim): def dK2_dvariancedX(self, X, X2, dimX):
return self.dK_dX2(X,X2, dim)/self.variance """
Compute the second derivative of K with respect to:
variance, and
dimension dimX of set X.
"""
return self._clean_dK_dX(X, X2, dimX)/self.variance
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2): def dK2_dvariancedX2(self, X, X2, dimX2):
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance """
Compute the second derivative of K with respect to:
variance, and
dimension dimX2 of set X2.
"""
return -self.dK2_dvariancedX(X, X2, dimX2)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX(self, X, X2, dimX): def dK2_dlengthscaledX(self, X, X2, dimX):
r = self._scaled_dist(X, X2) """
K = self.K_of_r(r) Compute the second derivative(s) of K with respect to:
if X2 is None: lengthscale(s), and
X2=X dimension dimX of set X.
dist = X[:,None,:]-X2[None,:,:] """
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK_dl = self.dK_dlengthscale(X, X2)
if self.ARD: if self.ARD:
g = [] g = []
for diml in range(X.shape[1]): for diml in range(self.input_dim):
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)] term = -dist[dimX]*(lengthscaleinv[dimX]**2)*dK_dl[diml]
if diml == dimX:
term -= 2*lengthscaleinv[dimX]*dK_dX
g += [term]
else: else:
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K term = -dist[dimX]*(lengthscaleinv[0]**2)*dK_dl
term -= 2*lengthscaleinv[0]*dK_dX
g = term
return g return g
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX2(self, X, X2, dimX2): def dK2_dlengthscaledX2(self, X, X2, dimX2):
tmp = self.dK2_dlengthscaledX(X, X2, dimX2) """
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX2 of set X2.
"""
dK2_dlengthscaledX = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD: if self.ARD:
return [-1.*g for g in tmp] return [-1.*g for g in dK2_dlengthscaledX]
else: else:
return -1*tmp return -1*dK2_dlengthscaledX
@Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative of K with respect to:
variance,
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2): def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2) """
K = self.K_of_r(r) Compute the third derivative(s) of K with respect to:
if X2 is None: lengthscale(s),
X2=X dimension dimX of set X, and
dist = X[:,None,:]-X2[None,:,:] dimension dimX2 of set X2.
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) """
lengthscale2inv = lengthscaleinv**2 lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
K = self._clean_K(X, X2)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK_dX2 = self._clean_dK_dX(X, X2, dimX2)
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
if self.ARD: if self.ARD:
g = [] g = []
for diml in range(X.shape[1]): for diml in range(self.input_dim):
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3) term = (dist[diml]**2)*(lengthscaleinv[diml]**3)*dK2_dXdX2
if dimX == dimX2:
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
if diml == dimX: if diml == dimX:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3) term -= 2*dist[dimX]*(lengthscaleinv[dimX]**3)*dK_dX2
if diml == dimX2: if diml == dimX2:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3) term -= 2*dist[dimX2]*(lengthscaleinv[dimX2]**3)*dK_dX
if dimX == dimX2: if diml == dimX == dimX2:
tmp += -2.*K*(lengthscaleinv[dimX]**3) term -= 2*(lengthscaleinv[dimX]**3)*K
g += [tmp] g += [term]
else: else:
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5) term = np.sum(dist**2, axis=0)*dK2_dXdX2
term -= 4*dist[dimX2]*dK_dX
if dimX == dimX2: if dimX == dimX2:
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2) term -= 2*K
g = (lengthscaleinv[0]**3)*term
return g return g
def __getstate__(self): def __getstate__(self):

View file

@ -122,7 +122,6 @@ class StdPeriodic(Kern):
pass pass
def K(self, X, X2=None): def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2.""" """Compute the covariance matrix between X and X2."""
if X2 is None: if X2 is None:
@ -133,13 +132,372 @@ class StdPeriodic(Kern):
return self.variance * exp_dist return self.variance * exp_dist
def Kdiag(self, X): def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
ret = np.empty(X.shape[0]) ret = np.empty(X.shape[0])
ret[:] = self.variance ret[:] = self.variance
return ret return ret
def dK_dX(self, X, X2, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
return -F*np.sin(2*base)*self._clean_K(X, X2)
def dK_dXdiag(self, X, dimX):
"""
Compute the derivative of K with respect to:
dimension dimX of set X.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
def dK_dX2(self, X, X2, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
"""
return -self._clean_dK_dX(X, X2, dimX2)
def dK_dX2diag(self, X, dimX2):
"""
Compute the derivative of K with respect to:
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
def dK2_dXdX2(self, X, X2, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
term = np.sin(2*base)*self._clean_dK_dX(X, X2, dimX)
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_K(X, X2)
return F*term
def dK2_dXdX2diag(self, X, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
dimension dimX of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements.
"""
if dimX == dimX2:
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
return (np.pi**2)*(lengthscaleinv**2)*(periodinv**2)*self.variance*np.ones(X.shape[0])
else:
return np.zeros(X.shape[0])
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
"""
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
"""
Compute the second derivative of K with respect to:
dimension dimX_0 of set X, and
dimension dimX_1 of set X.
Returns only diagonal elements.
"""
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
term = np.sin(2*base)*self._clean_dK2_dXdX(X, X2, dimX_0, dimX_1)
if dimX_0 == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_1)
if dimX_1 == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_0)
if dimX_0 == dimX_1 == dimX2:
term -= 4*(np.pi**2)*(periodinv**2)*np.sin(2*base)*self._clean_K(X, X2)
return F*term
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
"""
Compute the third derivative of K with respect to:
dimension dimX_0 of set X,
dimension dimX_1 of set X, and
dimension dimX2 of set X2.
Returns only diagonal elements of the covariance matrix.
"""
return np.zeros(X.shape[0])
def dK_dvariance(self, X, X2):
"""
Compute the derivative of K with respect to variance.
"""
return self._clean_K(X, X2)/self.variance
def dK_dlengthscale(self, X, X2):
"""
Compute the derivative(s) of K with respect to lengthscale(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
periodinv = (np.ones(X.shape[1])/(self.period))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
base = np.pi*periodinv[:,None,None]*dist
K = self._clean_K(X, X2)
if self.ARD2:
g = []
for diml in range(self.input_dim):
g += [(lengthscaleinv[diml]**3)*np.square(np.sin(base[diml]))*K]
else:
g = (lengthscaleinv[0]**3)*np.sum(np.square(np.sin(base)), axis=0)*K
return g
def dK_dperiod(self, X, X2):
"""
Compute the derivative(s) of K with respect to period(s).
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
periodinv = (np.ones(X.shape[1])/(self.period))
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
base = np.pi*periodinv[:,None,None]*dist
K = self._clean_K(X, X2)
if self.ARD1:
g = []
for diml in range(self.input_dim):
g += [0.5*base[diml]*(lengthscaleinv[diml]**2)*periodinv[diml]*np.sin(2*base[diml])*K]
else:
g = 0.5*periodinv[0]*np.sum(base*(lengthscaleinv**2)[:,None,None]*np.sin(2*base), axis=0)*K
return g
def dK2_dvariancedX(self, X, X2, dimX):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX of set X.
"""
return self._clean_dK_dX(X, X2, dimX)/self.variance
def dK2_dvariancedX2(self, X, X2, dimX2):
"""
Compute the second derivative of K with respect to:
variance, and
dimension dimX2 of set X2.
"""
return -self.dK2_dvariancedX(X, X2, dimX2)
def dK2_dlengthscaledX(self, X, X2, dimX):
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dl = self.dK_dlengthscale(X, X2)
if self.ARD2:
g = []
for diml in range(self.input_dim):
term = dK_dl[diml]
if diml == dimX:
term -= 2*lengthscaleinv*K
g += [-F*np.sin(2*base)*term]
else:
g = -F*np.sin(2*base)*(dK_dl - 2*lengthscaleinv*K)
return g
def dK2_dlengthscaledX2(self, X, X2, dimX2):
"""
Compute the second derivative(s) of K with respect to:
lengthscale(s), and
dimension dimX2 of set X2.
"""
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD2:
return [-1*g for g in dK2_dldX]
else:
return -1*dK2_dldX
def dK2_dperioddX(self, X, X2, dimX):
"""
Compute the second derivative(s) of K with respect to:
period(s), and
dimension dimX of set X.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
dist = X[:,None,dimX] - X2[None,:,dimX]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dT = self.dK_dperiod(X, X2)
if self.ARD1:
g = []
for dimT in range(self.input_dim):
term = np.sin(2*base)*dK_dT[dimT]
if dimT == dimX:
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
g += [-F*term]
else:
term = np.sin(2*base)*dK_dT
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
g = -F*term
return g
def dK2_dperioddX2(self, X, X2, dimX2):
"""
Compute the second derivative(s) of K with respect to:
period(s), and
dimension dimX2 of set X2.
"""
dK2_dperioddX = self.dK2_dperioddX(X, X2, dimX2)
if self.ARD1:
return [-1*g for g in dK2_dperioddX]
else:
return -1*dK2_dperioddX
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative of K with respect to:
variance,
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative(s) of K with respect to:
lengthscale(s),
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
dK_dl = self.dK_dlengthscale(X, X2)
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX)
if self.ARD2:
g = []
for diml in range(self.input_dim):
term = np.sin(2*base)*dK2_dldX[diml]
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl[diml]
term *= F
if diml == dimX2:
term -= 2*lengthscaleinv*dK2_dXdX2
g += [term]
else:
term = np.sin(2*base)*dK2_dldX
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl
term *= F
term -= 2*lengthscaleinv*dK2_dXdX2
g = term
return g
def dK3_dperioddXdX2(self, X, X2, dimX, dimX2):
"""
Compute the third derivative(s) of K with respect to:
period(s),
dimension dimX of set X, and
dimension dimX2 of set X2.
"""
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
dist = X[:,None,dimX2] - X2[None,:,dimX2]
base = np.pi*periodinv*dist
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
K = self._clean_K(X, X2)
dK_dX = self._clean_dK_dX(X, X2, dimX)
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
dK_dT = self.dK_dperiod(X, X2)
dK2_dTdX = self.dK2_dperioddX(X, X2, dimX)
if self.ARD1:
g = []
for dimT in range(self.input_dim):
term = np.sin(2*base)*dK2_dTdX[dimT]
if dimT == dimX2:
term -= 2*periodinv*np.cos(2*base)*base*dK_dX
if dimX == dimX2:
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dT[dimT]
if dimX == dimX2 == dimT:
term += 2*np.pi*(periodinv**2)*(2*base*np.sin(2*base)-np.cos(2*base))*K
term *= F
if dimT == dimX2:
term -= periodinv*dK2_dXdX2
g += [term]
else:
term = np.sin(2*base)*dK2_dTdX-2*periodinv*base*np.cos(2*base)*dK_dX
if dimX == dimX2:
term += 2*np.pi*periodinv*(np.cos(2*base)*dK_dT+periodinv*(2*base*np.sin(2*base)-np.cos(2*base))*K)
g = F*term-periodinv*dK2_dXdX2
return g
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: if X2 is None:
@ -167,12 +525,52 @@ class StdPeriodic(Kern):
else: # same lengthscales else: # same lengthscales
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK) self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_direct(self, dL_dVar, dL_dPer, dL_dLen):
self.variance.gradient = dL_dVar
self.period.gradient = dL_dPer
self.lengthscale.gradient = dL_dLen
def reset_gradients(self):
self.variance.gradient = 0.
if not self.ARD1:
self.period.gradient = 0.
else:
self.period.gradient = np.zeros(self.input_dim)
if not self.ARD2:
self.lengthscale.gradient = 0.
else:
self.lengthscale.gradient = np.zeros(self.input_dim)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag) self.variance.gradient = np.sum(dL_dKdiag)
self.period.gradient = 0 self.period.gradient = 0
self.lengthscale.gradient = 0 self.lengthscale.gradient = 0
def dgradients(self, X, X2):
g1 = self.dK_dvariance(X, X2)
g2 = self.dK_dperiod(X, X2)
g3 = self.dK_dlengthscale(X, X2)
return [g1, g2, g3]
def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dperioddX(X, X2, dimX)
g3 = self.dK2_dlengthscaledX(X, X2, dimX)
return [g1, g2, g3]
def dgradients_dX2(self, X, X2, dimX2):
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
g2 = self.dK2_dperioddX2(X, X2, dimX2)
g3 = self.dK2_dlengthscaledX2(X, X2, dimX2)
return [g1, g2, g3]
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
g2 = self.dK3_dperioddXdX2(X, X2, dimX, dimX2)
g3 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
return [g1, g2, g3]
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
K = self.K(X, X2) K = self.K(X, X2)
if X2 is None: if X2 is None:
@ -185,4 +583,4 @@ class StdPeriodic(Kern):
return np.zeros(X.shape) return np.zeros(X.shape)
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2 return self.variance*np.ones(self.input_dim)/self.lengthscale**2

View file

@ -306,7 +306,12 @@ class Stationary(Kern):
l4 = np.ones(X.shape[1])*self.lengthscale**2 l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],)) return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def dgradients(self, X, X2):
g1 = self.dK_dvariance(X, X2)
g2 = self.dK_dlengthscale(X, X2)
return [g1, g2]
def dgradients_dX(self, X, X2, dimX): def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX) g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dlengthscaledX(X, X2, dimX) g2 = self.dK2_dlengthscaledX(X, X2, dimX)

View file

@ -9,6 +9,7 @@ from ..core.mapping import Mapping
from .. import likelihoods from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from .. import kern from .. import kern
from ..kern import DiffKern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..util.normalizer import Standardize from ..util.normalizer import Standardize
from .. import util from .. import util
@ -69,39 +70,80 @@ class MultioutputGP(GP):
if Y_metadata is None: if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood) return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
def predictive_gradients(self, Xnew, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
#if Y_metadata is None:
#Y_metadata={'output_index': ind}
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!! def predictive_gradients(self, Xnew, kern=None):
""" """
Compute the derivatives of the predicted latent function with respect to X* Compute the derivatives of the predicted latent function with respect
to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized: derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
Note that this is not the same as computing the mean and variance of the derivative of the function! (usually one).
Note that this is not the same as computing the mean and variance of
the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance) dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients :param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim) :type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX :returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ] :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
""" """
if isinstance(Xnew, list): if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None) Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
slices = index_to_slices(Xnew[:,-1]) slices = index_to_slices(Xnew[:,-1])
for i in range(len(slices)):
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
if kern is None: if kern is None:
kern = self.kern kern = self.kern
if all([(isinstance(k, DiffKern)) for k in self.kern.kern[1:]]):
"""
Compute the gradients of the predicted latent function and predicted
partial derivatives with respect to X*.
This works only for models that observe the gradient of the latent function.
Xnew is given as a list of arrays, where each array X*_i (size [N_i*, Q])
contains points at which to compute gradients for each predicted latent
function or partial derivative.
Resulting arrays are sized [sum_i^D : N_i*, Q]
Passing a list of only one array [X*] returns only gradients of
the predicted latent function and does not compute gradients of
predicted partial derivatives.
In this case the resulting arrays are sized [N*, Q].
:param Xnew: points at which to compute predictive gradients
:type Xnew: list
:type Xnew[i]: np.darray (sum_i^D : N_i*, Q)
:returns: dmu_dX, dv_dX
:rtype: (np.ndarray (sum_i^D : N_i*, Q), np.ndarray (sum_i^D : N_i*, Q))
"""
dims = Xnew.shape[1] - 1
mean_jac = np.empty((Xnew.shape[0], dims))
var_jac = np.empty((Xnew.shape[0], dims))
X = self._predictive_variable
alpha = self.posterior.woodbury_vector
Wi = self.posterior.woodbury_inv
k = kern.K(Xnew, X)
for dimX in range(dims):
dk_dx = kern.dK_dX(Xnew, X, dimX)
dk_dxdiag = kern.dK_dXdiag(Xnew, dimX)
mean_jac[:,dimX] = np.dot(dk_dx, alpha).flatten()
var_jac[:,dimX] = dk_dxdiag - 2*(np.dot(k, Wi)*dk_dx).sum(-1)
return mean_jac, var_jac
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim)) mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
for i in range(self.output_dim): for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1] mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]

View file

@ -5,6 +5,8 @@ from __future__ import division
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.models import GradientChecker
from functools import reduce
class MiscTests(unittest.TestCase): class MiscTests(unittest.TestCase):
def setUp(self): def setUp(self):
@ -1208,40 +1210,6 @@ class GradientTests(np.testing.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
def test_multioutput_model_with_derivative_observations(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
M=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss = GPy.likelihoods.Gaussian(variance=0.1)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
m.randomize()
self.assertTrue(m.checkgrad())
m.optimize(messages=0, ipython_notebook=False)
self.assertTrue(m.checkgrad())
def test_multioutput_model_with_ep(self): def test_multioutput_model_with_ep(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
@ -1308,6 +1276,173 @@ class GradientTests(np.testing.TestCase):
c2 = model.predict(x, full_cov=True)[1] c2 = model.predict(x, full_cov=True)[1]
np.testing.assert_allclose(c1,c2) np.testing.assert_allclose(c1,c2)
class GradientMultioutputGPModelTests(np.testing.TestCase):
def setUp(self):
# standard test function
self.period = 3
self.w = 2*np.pi/self.period
self.f = lambda x: np.sum(np.square(np.sin(self.w*x)), axis=1)
self.df = lambda x: self.w*np.sin(2*self.w*x)
self.noise_std = 1e-2
self.bounds = (-self.period, self.period)
self.train_points = 5
self.test_points = 25
def approximate_predictive_gradients(self, model, x_test, D, step=1e-6):
'''
Approximates gradients of predicted posterior means and variances.
This function is used as the frameworks for GradientChecker and
MultioutputGP do not easily combine when checking gradients of predicted
partial derivative posteriors.
'''
dmdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
dvdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
for d in range(D):
x_over = x_test.copy()
x_over[:,d] += step
x_undr = x_test.copy()
x_undr[:,d] -= step
m_over, v_over = model.predict([x_over]*(D + 1))
m_undr, v_undr = model.predict([x_undr]*(D + 1))
dmdx_aprx[:,d,None] = (m_over - m_undr)/(2*step)
dvdx_aprx[:,d,None] = (v_over - v_undr)/(2*step)
return dmdx_aprx, dvdx_aprx
def check_model(self, kern):
'''
Checks predictions, hyperparameter gradients, and gradients of predicted
posterior means and variances for MultioutputGP models that incorporate
observed latent function gradient information.
'''
D = kern.input_dim
X_list = []
Y_list = []
for i in range(D + 1):
# sample inputs for either latent function or partial derivatives
X_i = np.random.uniform(*self.bounds, size=(self.train_points, D))
# output of latent function or partial derivatives
Y_i = (self.f(X_i) if (i == 0) else self.df(X_i)[:,i - 1])[:,None]
# noisy observations
Y_i += np.random.normal(scale=self.noise_std, size=Y_i.shape)
X_list.append(X_i)
Y_list.append(Y_i)
# the kernel is accompanied with derivative kernels, one for each dimension
kernel_list = [kern] + [GPy.kern.DiffKern(kern, d) for d in range(D)]
# create model and check its hyperparameter gradient
likelihood_list = [GPy.likelihoods.Gaussian(variance=self.noise_std**2)]*(D + 1)
model = GPy.models.MultioutputGP(X_list, Y_list, kernel_list, likelihood_list)
model.likelihood.constrain_fixed()
self.assertTrue(model.checkgrad(step=1e-3))
# optimize the model, and check its hyperparameter gradient again
model.optimize()
self.assertTrue(model.checkgrad(step=1e-3))
# check predictions
np.testing.assert_allclose(model.predict(X_list)[0], model.Y, atol=3*self.noise_std)
# test inputs for checking predictive gradients
x_test = np.random.uniform(*self.bounds, size=(self.test_points, D))
# predictive gradients
dmdx, dvdx = model.predictive_gradients([x_test]*(D + 1))
# approximated predictive gradients
dmdx_aprx, dvdx_aprx = self.approximate_predictive_gradients(model, x_test, D, step=1e-3)
# check predictive gradients
np.testing.assert_allclose(dmdx, dmdx_aprx, atol=3*self.noise_std)
np.testing.assert_allclose(dvdx, dvdx_aprx, atol=3*self.noise_std)
def test_MultioutputGP_gradobs_RBF(self):
'''
Testing gradient observing MultioutputGP model with an RBF kernel.
'''
for D in range(1, 4):
kern = GPy.kern.RBF(input_dim=D)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_RBF_ARD(self):
'''
Testing gradient observing MultioutputGP model with an RBF (ARD) kernel.
'''
for D in range(1, 4):
kern = GPy.kern.RBF(input_dim=D, ARD=True)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_StdP(self):
'''
Testing gradient observing MultioutputGP model with a StdP kernel.
'''
for D in range(1, 4):
kern = GPy.kern.StdPeriodic(input_dim=D, period=self.period)
kern.period.constrain_fixed()
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_StdP_ARD(self):
'''
Testing gradient observing MultioutputGP model with a StdP (ARD) kernel.
'''
for D in range(1, 4):
kern = GPy.kern.StdPeriodic(input_dim=D, period=[self.period]*D, ARD1=True, ARD2=True)
kern.period.constrain_fixed()
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_RBF(self):
'''
Testing gradient observing MultioutputGP model with several RBF kernels.
'''
for D in range(2, 4):
kerns = [GPy.kern.RBF(input_dim=1) for d in range(D)]
kern = reduce(lambda k0, k1: k0 * k1, kerns)
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_StdP(self):
'''
Testing gradient observing MultioutputGP model with several StdP kernels.
'''
for D in range(2, 4):
kerns = [GPy.kern.StdPeriodic(input_dim=1, period=self.period) for d in range(D)]
kern = reduce(lambda k0, k1: k0 * k1, kerns)
[k.period.constrain_fixed() for k in kern.parts]
kern.randomize()
self.check_model(kern)
def test_MultioutputGP_gradobs_prod_mix(self):
'''
Testing gradient observing MultioutputGP model with a mix of kernel types.
'''
for D in range(2, 4):
kerns = []
for d in range(D):
if d % 2 == 0:
k = GPy.kern.RBF(input_dim=1)
else:
k = GPy.kern.StdPeriodic(input_dim=1, period=self.period)
k.period.constrain_fixed()
kerns.append(k)
kern = reduce(lambda k0, k1: k0 * k1, kerns)
kern.randomize()
self.check_model(kern)
def _create_missing_data_model(kernel, Q): def _create_missing_data_model(kernel, Q):
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3 D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3