mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Enhance multioutput grad obs (#995)
* multiplied RBF kernels can now be used with gradient observations * standard periodic kernels can now be used with gradient observations * predictive gradients (derivatives of posterior means and variances) can now be calculated when using gradient observations * simplified and commented RBF & StdP kernel derivatives * updated kernel slicing and commented prod kernel derivatives * removed caching from stdp kern, as it breaks optimization for some reason * fixed hyperparameter optimization for prod kernel * improved code readability * added unit tests for gradient observing MultioutputGP models * added predictions check to unit tests * bugfix for multioutput_kern * improved testing coverage * reduced size of some tests; led to an issue in an unrelated test * updated testing * added gradient MultioutputGP prod kernel example * added keywords and plotting to example
This commit is contained in:
parent
2c22f1e9c5
commit
9c1db7aa34
11 changed files with 1494 additions and 164 deletions
|
|
@ -771,3 +771,117 @@ def multioutput_gp_with_derivative_observations(plot=True):
|
||||||
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))])
|
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))])
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def multioutput_gp_with_derivative_observations_2D(optimize=True, plot=False):
|
||||||
|
'''
|
||||||
|
This in an example on how to use a MultioutputGP model with gradient
|
||||||
|
observations and multiple single-dimensional kernels of differing types.
|
||||||
|
'''
|
||||||
|
|
||||||
|
period = 3
|
||||||
|
w = 2*np.pi/period # angular frequency
|
||||||
|
bounds = (-period, period)
|
||||||
|
|
||||||
|
# latent function and gradient
|
||||||
|
f = lambda x: (np.exp(-x[:,0]**2) + np.cos(w*x[:,1]))[:,None]
|
||||||
|
df = lambda x: np.array([-2*np.exp(-x[:,0]**2)*x[:,0], -w*np.sin(w*x[:,1])]).T
|
||||||
|
|
||||||
|
# 2D input grid
|
||||||
|
ppa = 25 # points per axis
|
||||||
|
x = np.linspace(*bounds, ppa)
|
||||||
|
xx, yy = np.meshgrid(x, x)
|
||||||
|
grid = np.array([xx.reshape(-1), yy.reshape(-1)]).T
|
||||||
|
|
||||||
|
fgrid = f(grid)
|
||||||
|
dfgrid = df(grid)
|
||||||
|
|
||||||
|
# 10 random training points generated with a space-filling sobol sequence
|
||||||
|
X = np.array([
|
||||||
|
[ 0.50421399, 2.1331483 ],
|
||||||
|
[-2.15717152, -1.70295936],
|
||||||
|
[-1.46704334, 1.37111521],
|
||||||
|
[ 2.79064536, -0.9649018 ],
|
||||||
|
[ 1.60728264, 0.27702713],
|
||||||
|
[-0.30712366, -0.57372129],
|
||||||
|
[-2.6140632 , 2.49192488],
|
||||||
|
[ 0.89078772, -2.85873686],
|
||||||
|
[ 1.15813136, 0.96910322],
|
||||||
|
[-2.83307021, -1.38155383]
|
||||||
|
])
|
||||||
|
|
||||||
|
# Note!
|
||||||
|
# This example uses the same inputs for function and gradient observations.
|
||||||
|
|
||||||
|
noise_std = 1e-2
|
||||||
|
# function observations
|
||||||
|
Y = f(X) + np.random.normal(scale=noise_std, size=(len(X), 1))
|
||||||
|
# gradient observations
|
||||||
|
dY = df(X) + np.random.normal(scale=noise_std, size=(len(X), 2))
|
||||||
|
|
||||||
|
# gather inputs and observations into lists
|
||||||
|
X_list = [X, X, X]
|
||||||
|
# once for function observations, and once for each partial derivative
|
||||||
|
# make sure all arrays are of shape (N x dims), where N is # of training points
|
||||||
|
Y_list = [Y, dY[:,0,None], dY[:,1,None]]
|
||||||
|
|
||||||
|
# create a kernel that is the product of two one-dimensional kernels
|
||||||
|
# the first kernel is an RBF kernel
|
||||||
|
kern0 = GPy.kern.RBF(input_dim=1, active_dims=[0])
|
||||||
|
# as the function is periodic in the second dimension, we use a StdP kernel
|
||||||
|
kern1 = GPy.kern.StdPeriodic(input_dim=1, active_dims=[1], period=period)
|
||||||
|
kern1.period.constrain_fixed()
|
||||||
|
# the kernels can be multiplied together into a product kernel
|
||||||
|
kern = kern0 * kern1
|
||||||
|
|
||||||
|
# with gradient observations, we need to define a DiffKern for each dimension
|
||||||
|
# the DiffKern is given the main kernel as a base kernel
|
||||||
|
diffkern0 = GPy.kern.DiffKern(kern, 0)
|
||||||
|
diffkern1 = GPy.kern.DiffKern(kern, 1)
|
||||||
|
|
||||||
|
# gather the main kernel and diffkerns into a list
|
||||||
|
kern_list = [kern, diffkern0, diffkern1]
|
||||||
|
|
||||||
|
# define a likelihood and repeat it in a list
|
||||||
|
likelihood_list = [GPy.likelihoods.Gaussian(variance=noise_std**2)]*3
|
||||||
|
|
||||||
|
# create the MultioutputGP model and optimize
|
||||||
|
model = GPy.models.MultioutputGP(X_list, Y_list, kern_list, likelihood_list)
|
||||||
|
model.likelihood.constrain_fixed()
|
||||||
|
if optimize:
|
||||||
|
model.optimize()
|
||||||
|
|
||||||
|
# make function predictions
|
||||||
|
Xnew, _, ind = GPy.util.multioutput.build_XY([grid], index=[0])
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
|
||||||
|
mu, var = model.predict(Xnew, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
|
# make gradient predictions
|
||||||
|
Xnew, _, ind = GPy.util.multioutput.build_XY([grid]*2, index=[1, 2])
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
|
||||||
|
mu_d, var_d = model.predict(Xnew, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
|
mu_d = np.array([mu_d[:len(grid)], mu_d[len(grid):]]).T[0]
|
||||||
|
var_d = np.array([var_d[:len(grid)], var_d[len(grid):]]).T[0]
|
||||||
|
|
||||||
|
if plot and MPL_AVAILABLE:
|
||||||
|
fig, axs = plt.subplots(1, 3)
|
||||||
|
for ax in axs: ax.set_box_aspect(1)
|
||||||
|
axs[0].set_title('true f')
|
||||||
|
axs[0].contourf(xx, yy, fgrid.reshape(ppa, ppa), levels=25)
|
||||||
|
axs[1].set_title('true df1')
|
||||||
|
axs[1].contourf(xx, yy, dfgrid[:,0].reshape(ppa, ppa), levels=25)
|
||||||
|
axs[2].set_title('true df2')
|
||||||
|
axs[2].contourf(xx, yy, dfgrid[:,1].reshape(ppa, ppa), levels=25)
|
||||||
|
|
||||||
|
fig, axs = plt.subplots(1, 3)
|
||||||
|
for ax in axs: ax.set_box_aspect(1)
|
||||||
|
axs[0].set_title('pred f')
|
||||||
|
axs[0].contourf(xx, yy, mu.reshape(ppa, ppa), levels=25)
|
||||||
|
axs[1].set_title('pred df1')
|
||||||
|
axs[1].contourf(xx, yy, mu_d[:,0].reshape(ppa, ppa), levels=25)
|
||||||
|
axs[2].set_title('pred df2')
|
||||||
|
axs[2].contourf(xx, yy, mu_d[:,1].reshape(ppa, ppa), levels=25)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
|
||||||
|
|
@ -23,24 +23,42 @@ class DiffKern(Kern):
|
||||||
self.base_kern.parameters_changed()
|
self.base_kern.parameters_changed()
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
|
def K(self, X, X2=None, dimX2=None): #X in dimension self.dimension
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
if dimX2 is None:
|
if dimX2 is None:
|
||||||
dimX2 = self.dimension
|
dimX2 = self.dimension
|
||||||
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
|
return self.base_kern.dK2_dXdX2(X, X2, self.dimension, dimX2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX(self, X, X2, dimX, dimX2=None):
|
||||||
|
if dimX2 is None:
|
||||||
|
dimX2 = self.dimension
|
||||||
|
return self.base_kern.dK3_dXdXdX2(X, X2, dimX, self.dimension, dimX2)
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
|
return self.base_kern.dK2_dXdX2diag(X, self.dimension, self.dimension)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dXdiag(self, X, dimX):
|
||||||
|
return self.base_kern.dK3_dXdXdX2diag(X, dimX, self.dimension, self.dimension)
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
|
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
|
||||||
return self.base_kern.dK_dX(X,X2, self.dimension)
|
return self.base_kern.dK_dX(X, X2, self.dimension)
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
|
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
|
||||||
return self.base_kern.dK_dX2(X,X2, self.dimension)
|
return self.base_kern.dK_dX2(X, X2, self.dimension)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdX2_wrap(self, X, X2, dimX):
|
||||||
|
return self.base_kern.dK2_dXdX2(X, X2, dimX, self.dimension)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdX_wrap(self, X, X2, dimX):
|
||||||
|
return self.base_kern.dK2_dXdX(X, X2, dimX, self.dimension)
|
||||||
|
|
||||||
def reset_gradients(self):
|
def reset_gradients(self):
|
||||||
self.base_kern.reset_gradients()
|
self.base_kern.reset_gradients()
|
||||||
|
|
@ -56,33 +74,33 @@ class DiffKern(Kern):
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
|
||||||
if dimX2 is None:
|
if dimX2 is None:
|
||||||
dimX2 = self.dimension
|
dimX2 = self.dimension
|
||||||
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
|
gradients = self.base_kern.dgradients2_dXdX2(X, X2, self.dimension, dimX2)
|
||||||
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dK_diag, X):
|
def update_gradients_diag(self, dL_dK_diag, X):
|
||||||
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
|
gradients = self.base_kern.dgradients2_dXdX2(X, X, self.dimension, self.dimension)
|
||||||
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
|
||||||
|
|
||||||
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
|
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
|
gradients = self.base_kern.dgradients_dX(X, X2, self.dimension)
|
||||||
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
|
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
|
||||||
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
|
gradients = self.base_kern.dgradients_dX2(X, X2, self.dimension)
|
||||||
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:,self.dimension]
|
||||||
return np.sum(tmp, axis=1)
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
def gradients_X2(self, dL_dK, X, X2):
|
def gradients_X2(self, dL_dK, X, X2):
|
||||||
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,self.dimension,:]
|
||||||
return np.sum(tmp, axis=1)
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
def _convert_gradients(self, l,g, f = lambda x:x):
|
def _convert_gradients(self, l, g, f=lambda x:x):
|
||||||
if type(g) is np.ndarray:
|
if type(g) is np.ndarray:
|
||||||
return np.sum(f(l)*f(g))
|
return np.sum(f(l)*f(g))
|
||||||
else:
|
else:
|
||||||
return np.array([np.sum(f(l)*f(gi)) for gi in g])
|
return np.array([np.sum(f(l)*f(gi)) for gi in g])
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,14 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
put_clean(dct, 'dK_dX', _slice_dK_dX)
|
put_clean(dct, 'dK_dX', _slice_dK_dX)
|
||||||
put_clean(dct, 'dK_dX2', _slice_dK_dX)
|
put_clean(dct, 'dK_dX2', _slice_dK_dX)
|
||||||
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
|
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
|
||||||
|
put_clean(dct, 'dK2_dXdX', _slice_dK2_dXdX2)
|
||||||
|
put_clean(dct, 'dK3_dXdXdX2', _slice_dK3_dXdXdX2)
|
||||||
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||||
|
put_clean(dct, 'dK_dXdiag', _slice_dK_dXdiag)
|
||||||
|
put_clean(dct, 'dK_dX2diag', _slice_dK_dXdiag)
|
||||||
|
put_clean(dct, 'dK2_dXdX2diag', _slice_dK2_dXdX2diag)
|
||||||
|
put_clean(dct, 'dK2_dXdXdiag', _slice_dK2_dXdX2diag)
|
||||||
|
put_clean(dct, 'dK3_dXdXdX2diag', _slice_dK3_dXdXdX2diag)
|
||||||
put_clean(dct, 'phi', _slice_Kdiag)
|
put_clean(dct, 'phi', _slice_Kdiag)
|
||||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||||
|
|
@ -35,9 +42,10 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
||||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||||
|
|
||||||
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
|
put_clean(dct, 'dgradients', _slice_partial_gradients_list)
|
||||||
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
|
put_clean(dct, 'dgradients_dX', _slice_partial_gradients_list_X)
|
||||||
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
|
put_clean(dct, 'dgradients_dX2', _slice_partial_gradients_list_X)
|
||||||
|
put_clean(dct, 'dgradients2_dXdX2', _slice_partial_gradients_list_XX)
|
||||||
|
|
||||||
put_clean(dct, 'psi0', _slice_psi)
|
put_clean(dct, 'psi0', _slice_psi)
|
||||||
put_clean(dct, 'psi1', _slice_psi)
|
put_clean(dct, 'psi1', _slice_psi)
|
||||||
|
|
@ -155,6 +163,18 @@ def _slice_dK_dX(f):
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK_dXdiag(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, dim, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, None) as s:
|
||||||
|
d = s.k._project_dim(dim)
|
||||||
|
if d is None:
|
||||||
|
ret = np.zeros(X.shape[0])
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, dim, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
def _slice_dK2_dXdX2(f):
|
def _slice_dK2_dXdX2(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
|
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
|
||||||
|
|
@ -168,6 +188,59 @@ def _slice_dK2_dXdX2(f):
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK2_dXdX2diag(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, dimX, dimX2, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, None) as s:
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (d is None) or (d2 is None):
|
||||||
|
ret = np.zeros(X.shape[0])
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, d, d2, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK3_dXdXdX2(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim, dimX, dimX2, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
D = s.k._project_dim(dim)
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (D is None) or (d is None) or (d2 is None):
|
||||||
|
ret = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, D, d, d2, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK3_dXdXdX2diag(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, dim, dimX, dimX2, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, None) as s:
|
||||||
|
D = s.k._project_dim(dim)
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (D is None) or (d is None) or (d2 is None):
|
||||||
|
ret = np.zeros(X.shape[0])
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, D, d, d2, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_list(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
|
||||||
|
ret = f(self, s.X, s.X2)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
def _slice_partial_gradients_X(f):
|
def _slice_partial_gradients_X(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, X, X2, dim):
|
def wrap(self, X, X2, dim):
|
||||||
|
|
|
||||||
|
|
@ -7,20 +7,24 @@ import numpy as np
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
class KernWrapper(Kern):
|
class KernWrapper(Kern):
|
||||||
def __init__(self, fk, fug, fg, base_kern):
|
def __init__(self, fk, fdk, fug, fg, base_kern):
|
||||||
self.fk = fk
|
self.fk = fk
|
||||||
|
self.fdk = fdk
|
||||||
self.fug = fug
|
self.fug = fug
|
||||||
self.fg = fg
|
self.fg = fg
|
||||||
self.base_kern = base_kern
|
self.base_kern = base_kern
|
||||||
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
|
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper', useGPU=False)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
return self.fk(X,X2=X2)
|
return self.fk(X, X2=X2)
|
||||||
|
|
||||||
|
def dK_dX(self, X, X2, dimX):
|
||||||
|
return self.fdk(X, X2, dimX)
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
return self.fug(dL_dK, X, X2=X2)
|
return self.fug(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
return self.fg(dL_dK, X, X2=X2)
|
return self.fg(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -57,28 +61,46 @@ class MultioutputDerivativeKern(MultioutputKern):
|
||||||
#build covariance structure
|
#build covariance structure
|
||||||
covariance = [[None for i in range(nl)] for j in range(nl)]
|
covariance = [[None for i in range(nl)] for j in range(nl)]
|
||||||
linked = []
|
linked = []
|
||||||
for i in range(0,nl):
|
for i in range(0, nl):
|
||||||
unique=True
|
unique = True
|
||||||
for j in range(0,nl):
|
for j in range(0, nl):
|
||||||
if i==j or (kernels[i] is kernels[j]):
|
if (i == j) or (kernels[i] is kernels[j]):
|
||||||
kern = kernels[i]
|
kern = kernels[i]
|
||||||
if i>j:
|
if i > j:
|
||||||
unique=False
|
unique = False
|
||||||
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
||||||
kern = cross_covariances.get((i,j))
|
kern = cross_covariances.get((i,j))
|
||||||
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
|
elif (kernels[i].name == 'DiffKern') and (kernels[i].base_kern == kernels[j]): # one is derivative of other
|
||||||
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
|
kern = KernWrapper(
|
||||||
|
kernels[i].dK_dX_wrap,
|
||||||
|
kernels[i].dK2_dXdX_wrap,
|
||||||
|
kernels[i].update_gradients_dK_dX,
|
||||||
|
kernels[i].gradients_X,
|
||||||
|
kernels[j]
|
||||||
|
)
|
||||||
unique=False
|
unique=False
|
||||||
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
|
elif (kernels[j].name == 'DiffKern') and (kernels[j].base_kern == kernels[i]): # one is derivative of other
|
||||||
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
|
kern = KernWrapper(
|
||||||
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
|
kernels[j].dK_dX2_wrap,
|
||||||
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
|
kernels[j].dK2_dXdX2_wrap,
|
||||||
if i>j:
|
kernels[j].update_gradients_dK_dX2,
|
||||||
unique=False
|
kernels[j].gradients_X2,
|
||||||
|
kernels[i]
|
||||||
|
)
|
||||||
|
elif (kernels[i].name == 'DiffKern') and (kernels[j].name == 'DiffKern') and (kernels[i].base_kern == kernels[j].base_kern): #both are partial derivatives
|
||||||
|
kern = KernWrapper(
|
||||||
|
partial(kernels[i].K, dimX2=kernels[j].dimension),
|
||||||
|
partial(kernels[i].dK_dX, dimX2=kernels[j].dimension),
|
||||||
|
partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),
|
||||||
|
None,
|
||||||
|
kernels[i].base_kern
|
||||||
|
)
|
||||||
|
if i > j:
|
||||||
|
unique = False
|
||||||
else:
|
else:
|
||||||
kern = ZeroKern()
|
kern = ZeroKern()
|
||||||
covariance[i][j] = kern
|
covariance[i][j] = kern
|
||||||
if unique is True:
|
if unique is True:
|
||||||
linked.append(i)
|
linked.append(i)
|
||||||
self.covariance = covariance
|
self.covariance = covariance
|
||||||
self.link_parameters(*[kernels[i] for i in linked])
|
self.link_parameters(*[kernels[i] for i in linked])
|
||||||
|
|
|
||||||
|
|
@ -85,21 +85,63 @@ class MultioutputKern(CombinationKernel):
|
||||||
self.link_parameters(*[kernels[i] for i in linked])
|
self.link_parameters(*[kernels[i] for i in linked])
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def K(self, X ,X2=None):
|
def K(self, X, X2=None):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
|
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
|
for j in range(len(slices2)):
|
||||||
|
for i in range(len(slices)):
|
||||||
|
for l in range(len(slices2[j])):
|
||||||
|
for k in range(len(slices[i])):
|
||||||
|
cov_K = self.covariance[i][j].K(X[slices[i][k],:], X2[slices2[j][l],:])
|
||||||
|
target.__setitem__((slices[i][k], slices2[j][l]), cov_K)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def Kdiag(self,X):
|
def Kdiag(self, X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
target = np.zeros(X.shape[0])
|
target = np.zeros(X.shape[0])
|
||||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
for kern, slices_i in zip(kerns, slices):
|
||||||
|
for s in slices_i:
|
||||||
|
np.copyto(target[s], kern.Kdiag(X[s]))
|
||||||
|
return target
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX(self, X, X2, dimX):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
|
|
||||||
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for j in range(len(slices2)):
|
||||||
|
for i in range(len(slices)):
|
||||||
|
for l in range(len(slices2[j])):
|
||||||
|
for k in range(len(slices[i])):
|
||||||
|
cov_dK_dX = self.covariance[i][j].dK_dX(X[slices[i][k],:], X2[slices2[j][l],:], dimX)
|
||||||
|
target.__setitem__((slices[i][k], slices2[j][l]), cov_dK_dX)
|
||||||
|
return target
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dXdiag(self, X, dimX):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
|
target = np.zeros(X.shape[0])
|
||||||
|
for kern, slices_i in zip(kerns, slices):
|
||||||
|
for s in slices_i:
|
||||||
|
np.copyto(target[s], kern.dK_dXdiag(X[s], dimX))
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
|
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
|
||||||
|
|
@ -115,19 +157,35 @@ class MultioutputKern(CombinationKernel):
|
||||||
def reset_gradients(self):
|
def reset_gradients(self):
|
||||||
for kern in self.kern: kern.reset_gradients()
|
for kern in self.kern: kern.reset_gradients()
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
self.reset_gradients()
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
if X2 is not None:
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
|
||||||
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
self.reset_gradients()
|
||||||
else:
|
for j in range(len(slices2)):
|
||||||
[[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
for i in range(len(slices)):
|
||||||
|
for l in range(len(slices2[j])):
|
||||||
|
for k in range(len(slices[i])):
|
||||||
|
self._update_gradients_full_wrapper(
|
||||||
|
self.covariance[i][j],
|
||||||
|
dL_dK[slices[i][k],slices2[j][l]],
|
||||||
|
X[slices[i][k],:],
|
||||||
|
X2[slices2[j][l],:]
|
||||||
|
)
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.reset_gradients()
|
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
|
|
||||||
|
self.reset_gradients()
|
||||||
|
for i in range(len(slices)):
|
||||||
|
for k in range(len(slices[i])):
|
||||||
|
self._update_gradients_diag_wrapper(
|
||||||
|
self.covariance[i][i],
|
||||||
|
dL_dKdiag[slices[i][k]],
|
||||||
|
X[slices[i][k],:]
|
||||||
|
)
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
|
@ -137,4 +195,4 @@ class MultioutputKern(CombinationKernel):
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
||||||
else:
|
else:
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
||||||
return target
|
return target
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,310 @@ class Prod(CombinationKernel):
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
|
def reset_gradients(self):
|
||||||
|
for part in self.parts:
|
||||||
|
part.reset_gradients()
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK_dX(self, X, X2, dimX, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update = list(set(which_parts) - set(combination))[0]
|
||||||
|
prod_sum += prod*to_update.dK_dX(X, X2, dimX)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK_dXdiag(self, X, dimX, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros(X.shape[0])
|
||||||
|
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update = list(set(which_parts) - set(combination))[0]
|
||||||
|
prod_sum += prod*to_update.dK_dXdiag(X, dimX)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK_dX2(self, X, X2, dimX2, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for combination in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update = list(set(which_parts) - set(combination))[0]
|
||||||
|
prod_sum += prod*to_update.dK_dX2(X, X2, dimX2)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK2_dXdX2(self, X, X2, dimX, dimX2, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination1) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update1 = list(set(which_parts) - set(combination1))[0]
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX, dimX2)
|
||||||
|
if len(which_parts) > 1:
|
||||||
|
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
|
||||||
|
if len(combination2) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update2 = list(set(combination1) - set(combination2))[0]
|
||||||
|
prod_sum += prod*to_update1.dK_dX(X, X2, dimX)*to_update2.dK_dX2(X, X2, dimX2)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK2_dXdX2diag(self, X, dimX, dimX2, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros(X.shape[0])
|
||||||
|
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination1) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update1 = list(set(which_parts) - set(combination1))[0]
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX, dimX2)
|
||||||
|
if len(which_parts) > 1:
|
||||||
|
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
|
||||||
|
if len(combination2) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update2 = list(set(combination1) - set(combination2))[0]
|
||||||
|
prod_sum += prod*to_update1.dK_dXdiag(X, dimX)*to_update2.dK_dX2diag(X, dimX)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK2_dXdX(self, X, X2, dimX_0, dimX_1, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X, and
|
||||||
|
dimension dimX_1 of set X.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination1) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update1 = list(set(which_parts) - set(combination1))[0]
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)
|
||||||
|
if len(which_parts) > 1:
|
||||||
|
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
|
||||||
|
if len(combination2) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update2 = list(set(combination1) - set(combination2))[0]
|
||||||
|
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX(X, X2, dimX_1)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination1) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination1])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update1 = list(set(which_parts) - set(combination1))[0]
|
||||||
|
prod_sum += prod*to_update1.dK3_dXdXdX2(X, X2, dimX_0, dimX_1, dimX2)
|
||||||
|
if len(which_parts) > 1:
|
||||||
|
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
|
||||||
|
if len(combination2) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination2])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update2 = list(set(combination1) - set(combination2))[0]
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX_0, dimX2)*to_update2.dK_dX(X, X2, dimX_1)
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)*to_update2.dK_dX2(X, X2, dimX2)
|
||||||
|
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK2_dXdX2(X, X2, dimX_1, dimX2)
|
||||||
|
if len(which_parts) > 2:
|
||||||
|
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
|
||||||
|
if len(combination3) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.K(X, X2) for p in combination3])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update3 = list(set(combination2) - set(combination3))[0]
|
||||||
|
prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX2(X, X2, dimX2)*to_update3.dK_dX(X, X2, dimX_1)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||||
|
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2, which_parts=None):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements of the covariance matrix.
|
||||||
|
"""
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
prod_sum = np.zeros(X.shape[0])
|
||||||
|
for combination1 in itertools.combinations(which_parts, len(which_parts) - 1):
|
||||||
|
if len(combination1) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update1 = list(set(which_parts) - set(combination1))[0]
|
||||||
|
prod_sum += prod*to_update1.dK3_dXdXdX2diag(X, dimX_0, dimX_1, dimX2)
|
||||||
|
if len(which_parts) > 1:
|
||||||
|
for combination2 in itertools.combinations(combination1, len(combination1) - 1):
|
||||||
|
if len(combination2) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update2 = list(set(combination1) - set(combination2))[0]
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX_0, dimX2)*to_update2.dK_dXdiag(X, dimX_1)
|
||||||
|
prod_sum += prod*to_update1.dK2_dXdXdiag(X, dimX_0, dimX_1)*to_update2.dK_dX2diag(X, dimX2)
|
||||||
|
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK2_dXdX2diag(X, dimX_1, dimX2)
|
||||||
|
if len(which_parts) > 2:
|
||||||
|
for combination3 in itertools.combinations(combination2, len(combination2) - 1):
|
||||||
|
if len(combination3) > 0:
|
||||||
|
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination3])
|
||||||
|
else:
|
||||||
|
prod = np.ones(prod_sum.shape)
|
||||||
|
to_update3 = list(set(combination2) - set(combination3))[0]
|
||||||
|
prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK_dX2diag(X, dimX2)*to_update3.dK_dXdiag(X, dimX_1)
|
||||||
|
return prod_sum
|
||||||
|
|
||||||
|
def update_gradients_direct(self, *args):
|
||||||
|
for i, (g,p) in enumerate(zip(args, self.parts)):
|
||||||
|
p.update_gradients_direct(*g)
|
||||||
|
|
||||||
|
def dgradients_dX(self, X, X2, dimX, parts=None):
|
||||||
|
"""
|
||||||
|
Compute the hyperparameter gradients of:
|
||||||
|
the derivative of K with respect to dimension dimX of set X
|
||||||
|
("dK_dX").
|
||||||
|
"""
|
||||||
|
if parts is None:
|
||||||
|
parts = self.parts
|
||||||
|
gradients = []
|
||||||
|
for part in parts:
|
||||||
|
neq_parts = [p for p in parts if p is not part]
|
||||||
|
|
||||||
|
if len(neq_parts) > 0:
|
||||||
|
K = self.K(X, X2, which_parts=neq_parts)
|
||||||
|
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
|
||||||
|
else:
|
||||||
|
K = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
K_dx = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
|
g = part.dgradients(X, X2)
|
||||||
|
g_dx = part.dgradients_dX(X, X2, dimX)
|
||||||
|
|
||||||
|
gradients += [[(g_i*K_dx + g_dx_i*K) for (g_i, g_dx_i) in zip(g, g_dx)]]
|
||||||
|
|
||||||
|
return gradients
|
||||||
|
|
||||||
|
def dgradients_dX2(self, X, X2, dimX2, parts=None):
|
||||||
|
"""
|
||||||
|
Compute the hyperparameter gradients of:
|
||||||
|
the derivative of K with respect to dimension dimX2 of set X2
|
||||||
|
("dK_dX2").
|
||||||
|
"""
|
||||||
|
if parts is None:
|
||||||
|
parts = self.parts
|
||||||
|
gradients = []
|
||||||
|
for part in parts:
|
||||||
|
neq_parts = [p for p in parts if p is not part]
|
||||||
|
|
||||||
|
if len(neq_parts) > 0:
|
||||||
|
K = self.K(X, X2, which_parts=neq_parts)
|
||||||
|
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
|
||||||
|
else:
|
||||||
|
K = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
K_dx2 = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
|
g = part.dgradients(X, X2)
|
||||||
|
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
|
||||||
|
|
||||||
|
gradients += [[(g_i*K_dx2 + g_dx2_i*K) for (g_i, g_dx2_i) in zip(g, g_dx2)]]
|
||||||
|
|
||||||
|
return gradients
|
||||||
|
|
||||||
|
def dgradients2_dXdX2(self, X, X2, dimX, dimX2, parts=None):
|
||||||
|
"""
|
||||||
|
Compute the hyperparameter gradients of:
|
||||||
|
the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2
|
||||||
|
("dK2_dXdX2").
|
||||||
|
"""
|
||||||
|
if parts is None:
|
||||||
|
parts = self.parts
|
||||||
|
gradients = []
|
||||||
|
for part in parts:
|
||||||
|
neq_parts = [p for p in parts if p is not part]
|
||||||
|
|
||||||
|
K = self.K(X, X2, which_parts=neq_parts)
|
||||||
|
K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts)
|
||||||
|
K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts)
|
||||||
|
K_dxdx2 = self.dK2_dXdX2(X, X2, dimX, dimX2, which_parts=neq_parts)
|
||||||
|
|
||||||
|
g = part.dgradients(X, X2)
|
||||||
|
g_dx = part.dgradients_dX(X, X2, dimX)
|
||||||
|
g_dx2 = part.dgradients_dX2(X, X2, dimX2)
|
||||||
|
g_dxdx2 = part.dgradients2_dXdX2(X, X2, dimX, dimX2)
|
||||||
|
|
||||||
|
gradients += [[(g_i*K_dxdx2 + g_dx_i*K_dx2 + g_dx2_i*K_dx + g_dxdx2_i*K) for (g_i, g_dx_i, g_dx2_i, g_dxdx2_i) in zip(g, g_dx, g_dx2, g_dxdx2)]]
|
||||||
|
return gradients
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
if len(self.parts)==2:
|
if len(self.parts)==2:
|
||||||
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
|
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
|
||||||
|
|
|
||||||
|
|
@ -53,24 +53,126 @@ class RBF(Stationary):
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dX(self, X, X2, dimX):
|
def dK_dX(self, X, X2, dimX):
|
||||||
r = self._scaled_dist(X, X2)
|
"""
|
||||||
K = self.K_of_r(r)
|
Compute the derivative of K with respect to:
|
||||||
dist = X[:,None,dimX]-X2[None,:,dimX]
|
dimension dimX of set X.
|
||||||
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX]
|
"""
|
||||||
return -1.*K*dist*lengthscale2inv
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
|
||||||
|
dist = X[:,None,dimX] - X2[None,:,dimX]
|
||||||
|
return -dist*(lengthscaleinv**2)*self._clean_K(X, X2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dXdiag(self, X, dimX):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dX2(self, X, X2, dimX2):
|
def dK_dX2(self, X, X2, dimX2):
|
||||||
return -self.dK_dX(X,X2, dimX2)
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK_dX(X, X2, dimX2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX2diag(self, X, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK2_dXdX2(self, X, X2, dimX, dimX2):
|
def dK2_dXdX2(self, X, X2, dimX, dimX2):
|
||||||
r = self._scaled_dist(X, X2)
|
"""
|
||||||
K = self.K_of_r(r)
|
Compute the second derivative of K with respect to:
|
||||||
if X2 is None:
|
dimension dimX of set X, and
|
||||||
X2=X
|
dimension dimX2 of set X2.
|
||||||
dist = X[:,None,:]-X2[None,:,:]
|
"""
|
||||||
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2)
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX]
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
|
||||||
|
term = dist[dimX]*(lengthscaleinv[dimX]**2)
|
||||||
|
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
|
||||||
|
if dimX == dimX2:
|
||||||
|
term -= (lengthscaleinv[dimX]**2)
|
||||||
|
return -term*self._clean_K(X, X2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdX2diag(self, X, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
if dimX == dimX2:
|
||||||
|
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
|
||||||
|
return np.ones(X.shape[0])*(lengthscaleinv[dimX]**2)*self.variance
|
||||||
|
else:
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X, and
|
||||||
|
dimension dimX_1 of set X.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X, and
|
||||||
|
dimension dimX_1 of set X.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
|
||||||
|
term = dist[dimX_0]*(lengthscaleinv[dimX_0]**2)
|
||||||
|
term *= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)
|
||||||
|
term *= dist[dimX2]*(lengthscaleinv[dimX2]**2)
|
||||||
|
if dimX_0 == dimX_1:
|
||||||
|
term -= dist[dimX2]*(lengthscaleinv[dimX2]**2)*(lengthscaleinv[dimX_0]**2)
|
||||||
|
if dimX_0 == dimX2:
|
||||||
|
term -= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)*(lengthscaleinv[dimX_0]**2)
|
||||||
|
if dimX_1 == dimX2:
|
||||||
|
term -= dist[dimX_0]*(lengthscaleinv[dimX_0]**2)*(lengthscaleinv[dimX_1]**2)
|
||||||
|
return term*self._clean_K(X, X2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements of the covariance matrix.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
def dK_dr(self, r):
|
def dK_dr(self, r):
|
||||||
return -r*self.K_of_r(r)
|
return -r*self.K_of_r(r)
|
||||||
|
|
@ -80,73 +182,132 @@ class RBF(Stationary):
|
||||||
|
|
||||||
def dK2_drdr_diag(self):
|
def dK2_drdr_diag(self):
|
||||||
return -self.variance # as the diagonal of r is always filled with zeros
|
return -self.variance # as the diagonal of r is always filled with zeros
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dvariance(self,X,X2):
|
def dK_dvariance(self, X, X2):
|
||||||
return self.K(X,X2)/self.variance
|
"""
|
||||||
|
Compute the derivative of K with respect to variance.
|
||||||
|
"""
|
||||||
|
return self._clean_K(X, X2)/self.variance
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK2_dvariancedX(self, X, X2, dim):
|
def dK_dlengthscale(self, X, X2):
|
||||||
return self.dK_dX(X,X2, dim)/self.variance
|
"""
|
||||||
|
Compute the derivative(s) of K with respect to lengthscale(s).
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
|
||||||
|
if self.ARD:
|
||||||
|
g = []
|
||||||
|
for diml in range(self.input_dim):
|
||||||
|
g += [(dist[diml]**2)*(lengthscaleinv[diml]**3)*K]
|
||||||
|
else:
|
||||||
|
g = (lengthscaleinv[0]**3)*np.sum(dist**2, axis=0)*K
|
||||||
|
return g
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK2_dvariancedX2(self, X, X2, dim):
|
def dK2_dvariancedX(self, X, X2, dimX):
|
||||||
return self.dK_dX2(X,X2, dim)/self.variance
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
variance, and
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
return self._clean_dK_dX(X, X2, dimX)/self.variance
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2):
|
def dK2_dvariancedX2(self, X, X2, dimX2):
|
||||||
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
variance, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return -self.dK2_dvariancedX(X, X2, dimX2)
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK2_dlengthscaledX(self, X, X2, dimX):
|
def dK2_dlengthscaledX(self, X, X2, dimX):
|
||||||
r = self._scaled_dist(X, X2)
|
"""
|
||||||
K = self.K_of_r(r)
|
Compute the second derivative(s) of K with respect to:
|
||||||
if X2 is None:
|
lengthscale(s), and
|
||||||
X2=X
|
dimension dimX of set X.
|
||||||
dist = X[:,None,:]-X2[None,:,:]
|
"""
|
||||||
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
|
||||||
|
dK_dX = self._clean_dK_dX(X, X2, dimX)
|
||||||
|
dK_dl = self.dK_dlengthscale(X, X2)
|
||||||
|
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
g = []
|
g = []
|
||||||
for diml in range(X.shape[1]):
|
for diml in range(self.input_dim):
|
||||||
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)]
|
term = -dist[dimX]*(lengthscaleinv[dimX]**2)*dK_dl[diml]
|
||||||
|
if diml == dimX:
|
||||||
|
term -= 2*lengthscaleinv[dimX]*dK_dX
|
||||||
|
g += [term]
|
||||||
else:
|
else:
|
||||||
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K
|
term = -dist[dimX]*(lengthscaleinv[0]**2)*dK_dl
|
||||||
|
term -= 2*lengthscaleinv[0]*dK_dX
|
||||||
|
g = term
|
||||||
return g
|
return g
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK2_dlengthscaledX2(self, X, X2, dimX2):
|
def dK2_dlengthscaledX2(self, X, X2, dimX2):
|
||||||
tmp = self.dK2_dlengthscaledX(X, X2, dimX2)
|
"""
|
||||||
|
Compute the second derivative(s) of K with respect to:
|
||||||
|
lengthscale(s), and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
dK2_dlengthscaledX = self.dK2_dlengthscaledX(X, X2, dimX2)
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
return [-1.*g for g in tmp]
|
return [-1.*g for g in dK2_dlengthscaledX]
|
||||||
else:
|
else:
|
||||||
return -1*tmp
|
return -1*dK2_dlengthscaledX
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
variance,
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
|
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
|
||||||
r = self._scaled_dist(X, X2)
|
"""
|
||||||
K = self.K_of_r(r)
|
Compute the third derivative(s) of K with respect to:
|
||||||
if X2 is None:
|
lengthscale(s),
|
||||||
X2=X
|
dimension dimX of set X, and
|
||||||
dist = X[:,None,:]-X2[None,:,:]
|
dimension dimX2 of set X2.
|
||||||
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
|
"""
|
||||||
lengthscale2inv = lengthscaleinv**2
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
dK_dX = self._clean_dK_dX(X, X2, dimX)
|
||||||
|
dK_dX2 = self._clean_dK_dX(X, X2, dimX2)
|
||||||
|
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
|
||||||
|
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
g = []
|
g = []
|
||||||
for diml in range(X.shape[1]):
|
for diml in range(self.input_dim):
|
||||||
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3)
|
term = (dist[diml]**2)*(lengthscaleinv[diml]**3)*dK2_dXdX2
|
||||||
if dimX == dimX2:
|
|
||||||
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
|
|
||||||
if diml == dimX:
|
if diml == dimX:
|
||||||
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3)
|
term -= 2*dist[dimX]*(lengthscaleinv[dimX]**3)*dK_dX2
|
||||||
if diml == dimX2:
|
if diml == dimX2:
|
||||||
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3)
|
term -= 2*dist[dimX2]*(lengthscaleinv[dimX2]**3)*dK_dX
|
||||||
if dimX == dimX2:
|
if diml == dimX == dimX2:
|
||||||
tmp += -2.*K*(lengthscaleinv[dimX]**3)
|
term -= 2*(lengthscaleinv[dimX]**3)*K
|
||||||
g += [tmp]
|
g += [term]
|
||||||
else:
|
else:
|
||||||
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5)
|
term = np.sum(dist**2, axis=0)*dK2_dXdX2
|
||||||
|
term -= 4*dist[dimX2]*dK_dX
|
||||||
if dimX == dimX2:
|
if dimX == dimX2:
|
||||||
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2)
|
term -= 2*K
|
||||||
|
g = (lengthscaleinv[0]**3)*term
|
||||||
return g
|
return g
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,6 @@ class StdPeriodic(Kern):
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
"""Compute the covariance matrix between X and X2."""
|
"""Compute the covariance matrix between X and X2."""
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -133,13 +132,372 @@ class StdPeriodic(Kern):
|
||||||
|
|
||||||
return self.variance * exp_dist
|
return self.variance * exp_dist
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
ret = np.empty(X.shape[0])
|
ret = np.empty(X.shape[0])
|
||||||
ret[:] = self.variance
|
ret[:] = self.variance
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def dK_dX(self, X, X2, dimX):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
dist = X[:,None,dimX] - X2[None,:,dimX]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
return -F*np.sin(2*base)*self._clean_K(X, X2)
|
||||||
|
|
||||||
|
def dK_dXdiag(self, X, dimX):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX of set X.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
|
def dK_dX2(self, X, X2, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK_dX(X, X2, dimX2)
|
||||||
|
|
||||||
|
def dK_dX2diag(self, X, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to:
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
|
def dK2_dXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
dist = X[:,None,dimX2] - X2[None,:,dimX2]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
term = np.sin(2*base)*self._clean_dK_dX(X, X2, dimX)
|
||||||
|
if dimX == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_K(X, X2)
|
||||||
|
return F*term
|
||||||
|
|
||||||
|
def dK2_dXdX2diag(self, X, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
if dimX == dimX2:
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
|
||||||
|
return (np.pi**2)*(lengthscaleinv**2)*(periodinv**2)*self.variance*np.ones(X.shape[0])
|
||||||
|
else:
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
|
def dK2_dXdX(self, X, X2, dimX_0, dimX_1):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X, and
|
||||||
|
dimension dimX_1 of set X.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1)
|
||||||
|
|
||||||
|
def dK2_dXdXdiag(self, X, dimX_0, dimX_1):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X, and
|
||||||
|
dimension dimX_1 of set X.
|
||||||
|
|
||||||
|
Returns only diagonal elements.
|
||||||
|
"""
|
||||||
|
return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1)
|
||||||
|
|
||||||
|
def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
dist = X[:,None,dimX2] - X2[None,:,dimX2]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
term = np.sin(2*base)*self._clean_dK2_dXdX(X, X2, dimX_0, dimX_1)
|
||||||
|
if dimX_0 == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_1)
|
||||||
|
if dimX_1 == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_0)
|
||||||
|
if dimX_0 == dimX_1 == dimX2:
|
||||||
|
term -= 4*(np.pi**2)*(periodinv**2)*np.sin(2*base)*self._clean_K(X, X2)
|
||||||
|
return F*term
|
||||||
|
|
||||||
|
def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
dimension dimX_0 of set X,
|
||||||
|
dimension dimX_1 of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
|
||||||
|
Returns only diagonal elements of the covariance matrix.
|
||||||
|
"""
|
||||||
|
return np.zeros(X.shape[0])
|
||||||
|
|
||||||
|
def dK_dvariance(self, X, X2):
|
||||||
|
"""
|
||||||
|
Compute the derivative of K with respect to variance.
|
||||||
|
"""
|
||||||
|
return self._clean_K(X, X2)/self.variance
|
||||||
|
|
||||||
|
def dK_dlengthscale(self, X, X2):
|
||||||
|
"""
|
||||||
|
Compute the derivative(s) of K with respect to lengthscale(s).
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))
|
||||||
|
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
base = np.pi*periodinv[:,None,None]*dist
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
|
||||||
|
if self.ARD2:
|
||||||
|
g = []
|
||||||
|
for diml in range(self.input_dim):
|
||||||
|
g += [(lengthscaleinv[diml]**3)*np.square(np.sin(base[diml]))*K]
|
||||||
|
else:
|
||||||
|
g = (lengthscaleinv[0]**3)*np.sum(np.square(np.sin(base)), axis=0)*K
|
||||||
|
return g
|
||||||
|
|
||||||
|
def dK_dperiod(self, X, X2):
|
||||||
|
"""
|
||||||
|
Compute the derivative(s) of K with respect to period(s).
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))
|
||||||
|
|
||||||
|
dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0)
|
||||||
|
base = np.pi*periodinv[:,None,None]*dist
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
|
||||||
|
if self.ARD1:
|
||||||
|
g = []
|
||||||
|
for diml in range(self.input_dim):
|
||||||
|
g += [0.5*base[diml]*(lengthscaleinv[diml]**2)*periodinv[diml]*np.sin(2*base[diml])*K]
|
||||||
|
else:
|
||||||
|
g = 0.5*periodinv[0]*np.sum(base*(lengthscaleinv**2)[:,None,None]*np.sin(2*base), axis=0)*K
|
||||||
|
return g
|
||||||
|
|
||||||
|
def dK2_dvariancedX(self, X, X2, dimX):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
variance, and
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
return self._clean_dK_dX(X, X2, dimX)/self.variance
|
||||||
|
|
||||||
|
def dK2_dvariancedX2(self, X, X2, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative of K with respect to:
|
||||||
|
variance, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return -self.dK2_dvariancedX(X, X2, dimX2)
|
||||||
|
|
||||||
|
def dK2_dlengthscaledX(self, X, X2, dimX):
|
||||||
|
"""
|
||||||
|
Compute the second derivative(s) of K with respect to:
|
||||||
|
lengthscale(s), and
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
|
||||||
|
|
||||||
|
dist = X[:,None,dimX] - X2[None,:,dimX]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
dK_dl = self.dK_dlengthscale(X, X2)
|
||||||
|
|
||||||
|
if self.ARD2:
|
||||||
|
g = []
|
||||||
|
for diml in range(self.input_dim):
|
||||||
|
term = dK_dl[diml]
|
||||||
|
if diml == dimX:
|
||||||
|
term -= 2*lengthscaleinv*K
|
||||||
|
g += [-F*np.sin(2*base)*term]
|
||||||
|
else:
|
||||||
|
g = -F*np.sin(2*base)*(dK_dl - 2*lengthscaleinv*K)
|
||||||
|
return g
|
||||||
|
|
||||||
|
def dK2_dlengthscaledX2(self, X, X2, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative(s) of K with respect to:
|
||||||
|
lengthscale(s), and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX2)
|
||||||
|
if self.ARD2:
|
||||||
|
return [-1*g for g in dK2_dldX]
|
||||||
|
else:
|
||||||
|
return -1*dK2_dldX
|
||||||
|
|
||||||
|
def dK2_dperioddX(self, X, X2, dimX):
|
||||||
|
"""
|
||||||
|
Compute the second derivative(s) of K with respect to:
|
||||||
|
period(s), and
|
||||||
|
dimension dimX of set X.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX]
|
||||||
|
|
||||||
|
dist = X[:,None,dimX] - X2[None,:,dimX]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
dK_dT = self.dK_dperiod(X, X2)
|
||||||
|
|
||||||
|
if self.ARD1:
|
||||||
|
g = []
|
||||||
|
for dimT in range(self.input_dim):
|
||||||
|
term = np.sin(2*base)*dK_dT[dimT]
|
||||||
|
if dimT == dimX:
|
||||||
|
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
|
||||||
|
g += [-F*term]
|
||||||
|
else:
|
||||||
|
term = np.sin(2*base)*dK_dT
|
||||||
|
term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K
|
||||||
|
g = -F*term
|
||||||
|
return g
|
||||||
|
|
||||||
|
def dK2_dperioddX2(self, X, X2, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the second derivative(s) of K with respect to:
|
||||||
|
period(s), and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
dK2_dperioddX = self.dK2_dperioddX(X, X2, dimX2)
|
||||||
|
if self.ARD1:
|
||||||
|
return [-1*g for g in dK2_dperioddX]
|
||||||
|
else:
|
||||||
|
return -1*dK2_dperioddX
|
||||||
|
|
||||||
|
def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative of K with respect to:
|
||||||
|
variance,
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance
|
||||||
|
|
||||||
|
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative(s) of K with respect to:
|
||||||
|
lengthscale(s),
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
|
||||||
|
|
||||||
|
dist = X[:,None,dimX2] - X2[None,:,dimX2]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
|
||||||
|
dK_dl = self.dK_dlengthscale(X, X2)
|
||||||
|
dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX)
|
||||||
|
|
||||||
|
if self.ARD2:
|
||||||
|
g = []
|
||||||
|
for diml in range(self.input_dim):
|
||||||
|
term = np.sin(2*base)*dK2_dldX[diml]
|
||||||
|
if dimX == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl[diml]
|
||||||
|
term *= F
|
||||||
|
if diml == dimX2:
|
||||||
|
term -= 2*lengthscaleinv*dK2_dXdX2
|
||||||
|
g += [term]
|
||||||
|
else:
|
||||||
|
term = np.sin(2*base)*dK2_dldX
|
||||||
|
if dimX == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl
|
||||||
|
term *= F
|
||||||
|
term -= 2*lengthscaleinv*dK2_dXdX2
|
||||||
|
g = term
|
||||||
|
return g
|
||||||
|
|
||||||
|
def dK3_dperioddXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
"""
|
||||||
|
Compute the third derivative(s) of K with respect to:
|
||||||
|
period(s),
|
||||||
|
dimension dimX of set X, and
|
||||||
|
dimension dimX2 of set X2.
|
||||||
|
"""
|
||||||
|
lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2]
|
||||||
|
periodinv = (np.ones(X.shape[1])/(self.period))[dimX2]
|
||||||
|
|
||||||
|
dist = X[:,None,dimX2] - X2[None,:,dimX2]
|
||||||
|
base = np.pi*periodinv*dist
|
||||||
|
|
||||||
|
F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor
|
||||||
|
|
||||||
|
K = self._clean_K(X, X2)
|
||||||
|
dK_dX = self._clean_dK_dX(X, X2, dimX)
|
||||||
|
dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2)
|
||||||
|
dK_dT = self.dK_dperiod(X, X2)
|
||||||
|
dK2_dTdX = self.dK2_dperioddX(X, X2, dimX)
|
||||||
|
|
||||||
|
if self.ARD1:
|
||||||
|
g = []
|
||||||
|
for dimT in range(self.input_dim):
|
||||||
|
term = np.sin(2*base)*dK2_dTdX[dimT]
|
||||||
|
if dimT == dimX2:
|
||||||
|
term -= 2*periodinv*np.cos(2*base)*base*dK_dX
|
||||||
|
if dimX == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*np.cos(2*base)*dK_dT[dimT]
|
||||||
|
if dimX == dimX2 == dimT:
|
||||||
|
term += 2*np.pi*(periodinv**2)*(2*base*np.sin(2*base)-np.cos(2*base))*K
|
||||||
|
term *= F
|
||||||
|
if dimT == dimX2:
|
||||||
|
term -= periodinv*dK2_dXdX2
|
||||||
|
g += [term]
|
||||||
|
else:
|
||||||
|
term = np.sin(2*base)*dK2_dTdX-2*periodinv*base*np.cos(2*base)*dK_dX
|
||||||
|
if dimX == dimX2:
|
||||||
|
term += 2*np.pi*periodinv*(np.cos(2*base)*dK_dT+periodinv*(2*base*np.sin(2*base)-np.cos(2*base))*K)
|
||||||
|
g = F*term-periodinv*dK2_dXdX2
|
||||||
|
return g
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -167,12 +525,52 @@ class StdPeriodic(Kern):
|
||||||
else: # same lengthscales
|
else: # same lengthscales
|
||||||
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
|
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
|
||||||
|
|
||||||
|
def update_gradients_direct(self, dL_dVar, dL_dPer, dL_dLen):
|
||||||
|
self.variance.gradient = dL_dVar
|
||||||
|
self.period.gradient = dL_dPer
|
||||||
|
self.lengthscale.gradient = dL_dLen
|
||||||
|
|
||||||
|
def reset_gradients(self):
|
||||||
|
self.variance.gradient = 0.
|
||||||
|
if not self.ARD1:
|
||||||
|
self.period.gradient = 0.
|
||||||
|
else:
|
||||||
|
self.period.gradient = np.zeros(self.input_dim)
|
||||||
|
if not self.ARD2:
|
||||||
|
self.lengthscale.gradient = 0.
|
||||||
|
else:
|
||||||
|
self.lengthscale.gradient = np.zeros(self.input_dim)
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||||
self.variance.gradient = np.sum(dL_dKdiag)
|
self.variance.gradient = np.sum(dL_dKdiag)
|
||||||
self.period.gradient = 0
|
self.period.gradient = 0
|
||||||
self.lengthscale.gradient = 0
|
self.lengthscale.gradient = 0
|
||||||
|
|
||||||
|
def dgradients(self, X, X2):
|
||||||
|
g1 = self.dK_dvariance(X, X2)
|
||||||
|
g2 = self.dK_dperiod(X, X2)
|
||||||
|
g3 = self.dK_dlengthscale(X, X2)
|
||||||
|
return [g1, g2, g3]
|
||||||
|
|
||||||
|
def dgradients_dX(self, X, X2, dimX):
|
||||||
|
g1 = self.dK2_dvariancedX(X, X2, dimX)
|
||||||
|
g2 = self.dK2_dperioddX(X, X2, dimX)
|
||||||
|
g3 = self.dK2_dlengthscaledX(X, X2, dimX)
|
||||||
|
return [g1, g2, g3]
|
||||||
|
|
||||||
|
def dgradients_dX2(self, X, X2, dimX2):
|
||||||
|
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
|
||||||
|
g2 = self.dK2_dperioddX2(X, X2, dimX2)
|
||||||
|
g3 = self.dK2_dlengthscaledX2(X, X2, dimX2)
|
||||||
|
return [g1, g2, g3]
|
||||||
|
|
||||||
|
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
|
||||||
|
g2 = self.dK3_dperioddXdX2(X, X2, dimX, dimX2)
|
||||||
|
g3 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
|
||||||
|
return [g1, g2, g3]
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
K = self.K(X, X2)
|
K = self.K(X, X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -185,4 +583,4 @@ class StdPeriodic(Kern):
|
||||||
return np.zeros(X.shape)
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
def input_sensitivity(self, summarize=True):
|
def input_sensitivity(self, summarize=True):
|
||||||
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,12 @@ class Stationary(Kern):
|
||||||
l4 = np.ones(X.shape[1])*self.lengthscale**2
|
l4 = np.ones(X.shape[1])*self.lengthscale**2
|
||||||
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
|
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
|
||||||
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||||
|
|
||||||
|
def dgradients(self, X, X2):
|
||||||
|
g1 = self.dK_dvariance(X, X2)
|
||||||
|
g2 = self.dK_dlengthscale(X, X2)
|
||||||
|
return [g1, g2]
|
||||||
|
|
||||||
def dgradients_dX(self, X, X2, dimX):
|
def dgradients_dX(self, X, X2, dimX):
|
||||||
g1 = self.dK2_dvariancedX(X, X2, dimX)
|
g1 = self.dK2_dvariancedX(X, X2, dimX)
|
||||||
g2 = self.dK2_dlengthscaledX(X, X2, dimX)
|
g2 = self.dK2_dlengthscaledX(X, X2, dimX)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from ..core.mapping import Mapping
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from ..likelihoods.gaussian import Gaussian
|
from ..likelihoods.gaussian import Gaussian
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
from ..kern import DiffKern
|
||||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||||
from ..util.normalizer import Standardize
|
from ..util.normalizer import Standardize
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
@ -69,39 +70,80 @@ class MultioutputGP(GP):
|
||||||
if Y_metadata is None:
|
if Y_metadata is None:
|
||||||
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
|
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
|
||||||
|
|
||||||
def predictive_gradients(self, Xnew, kern=None):
|
|
||||||
if isinstance(Xnew, list):
|
|
||||||
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
|
|
||||||
#if Y_metadata is None:
|
|
||||||
#Y_metadata={'output_index': ind}
|
|
||||||
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
|
|
||||||
|
|
||||||
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
|
def predictive_gradients(self, Xnew, kern=None):
|
||||||
"""
|
"""
|
||||||
Compute the derivatives of the predicted latent function with respect to X*
|
Compute the derivatives of the predicted latent function with respect
|
||||||
|
to X*
|
||||||
|
|
||||||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||||
derivatives of the mean and variance. Resulting arrays are sized:
|
derivatives of the mean and variance. Resulting arrays are sized:
|
||||||
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
|
||||||
Note that this is not the same as computing the mean and variance of the derivative of the function!
|
(usually one).
|
||||||
|
|
||||||
|
Note that this is not the same as computing the mean and variance of
|
||||||
|
the derivative of the function!
|
||||||
|
|
||||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||||
:param X: The points at which to get the predictive gradients
|
:param X: The points at which to get the predictive gradients
|
||||||
:type X: np.ndarray (Xnew x self.input_dim)
|
:type X: np.ndarray (Xnew x self.input_dim)
|
||||||
:returns: dmu_dX, dv_dX
|
:returns: dmu_dX, dv_dX
|
||||||
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
|
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if isinstance(Xnew, list):
|
if isinstance(Xnew, list):
|
||||||
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
|
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
|
||||||
|
|
||||||
slices = index_to_slices(Xnew[:,-1])
|
slices = index_to_slices(Xnew[:,-1])
|
||||||
|
|
||||||
for i in range(len(slices)):
|
|
||||||
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
|
|
||||||
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
|
|
||||||
|
|
||||||
if kern is None:
|
if kern is None:
|
||||||
kern = self.kern
|
kern = self.kern
|
||||||
|
|
||||||
|
if all([(isinstance(k, DiffKern)) for k in self.kern.kern[1:]]):
|
||||||
|
"""
|
||||||
|
Compute the gradients of the predicted latent function and predicted
|
||||||
|
partial derivatives with respect to X*.
|
||||||
|
|
||||||
|
This works only for models that observe the gradient of the latent function.
|
||||||
|
|
||||||
|
Xnew is given as a list of arrays, where each array X*_i (size [N_i*, Q])
|
||||||
|
contains points at which to compute gradients for each predicted latent
|
||||||
|
function or partial derivative.
|
||||||
|
|
||||||
|
Resulting arrays are sized [sum_i^D : N_i*, Q]
|
||||||
|
|
||||||
|
Passing a list of only one array [X*] returns only gradients of
|
||||||
|
the predicted latent function and does not compute gradients of
|
||||||
|
predicted partial derivatives.
|
||||||
|
|
||||||
|
In this case the resulting arrays are sized [N*, Q].
|
||||||
|
|
||||||
|
:param Xnew: points at which to compute predictive gradients
|
||||||
|
:type Xnew: list
|
||||||
|
:type Xnew[i]: np.darray (sum_i^D : N_i*, Q)
|
||||||
|
:returns: dmu_dX, dv_dX
|
||||||
|
:rtype: (np.ndarray (sum_i^D : N_i*, Q), np.ndarray (sum_i^D : N_i*, Q))
|
||||||
|
"""
|
||||||
|
|
||||||
|
dims = Xnew.shape[1] - 1
|
||||||
|
|
||||||
|
mean_jac = np.empty((Xnew.shape[0], dims))
|
||||||
|
var_jac = np.empty((Xnew.shape[0], dims))
|
||||||
|
|
||||||
|
X = self._predictive_variable
|
||||||
|
alpha = self.posterior.woodbury_vector
|
||||||
|
Wi = self.posterior.woodbury_inv
|
||||||
|
|
||||||
|
k = kern.K(Xnew, X)
|
||||||
|
for dimX in range(dims):
|
||||||
|
dk_dx = kern.dK_dX(Xnew, X, dimX)
|
||||||
|
dk_dxdiag = kern.dK_dXdiag(Xnew, dimX)
|
||||||
|
|
||||||
|
mean_jac[:,dimX] = np.dot(dk_dx, alpha).flatten()
|
||||||
|
var_jac[:,dimX] = dk_dxdiag - 2*(np.dot(k, Wi)*dk_dx).sum(-1)
|
||||||
|
return mean_jac, var_jac
|
||||||
|
|
||||||
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
|
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
|
||||||
for i in range(self.output_dim):
|
for i in range(self.output_dim):
|
||||||
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
|
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ from __future__ import division
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
from GPy.models import GradientChecker
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
class MiscTests(unittest.TestCase):
|
class MiscTests(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
@ -1208,40 +1210,6 @@ class GradientTests(np.testing.TestCase):
|
||||||
|
|
||||||
with self.assertRaises(RuntimeError):
|
with self.assertRaises(RuntimeError):
|
||||||
m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
|
m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
|
||||||
|
|
||||||
def test_multioutput_model_with_derivative_observations(self):
|
|
||||||
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
|
||||||
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
|
|
||||||
N=10
|
|
||||||
M=10
|
|
||||||
sigma=0.05
|
|
||||||
sigmader=0.05
|
|
||||||
x = np.array([np.linspace(1,10,N)]).T
|
|
||||||
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
|
|
||||||
|
|
||||||
xd = np.array([np.linspace(2,8,M)]).T
|
|
||||||
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
|
|
||||||
|
|
||||||
# squared exponential kernel:
|
|
||||||
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
|
|
||||||
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
|
|
||||||
se_der = GPy.kern.DiffKern(se, 0)
|
|
||||||
|
|
||||||
#Then
|
|
||||||
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
|
|
||||||
gauss = GPy.likelihoods.Gaussian(variance=0.1)
|
|
||||||
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
|
|
||||||
|
|
||||||
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
|
|
||||||
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
|
|
||||||
# the likelihoods must follow the same order
|
|
||||||
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
|
|
||||||
m.randomize()
|
|
||||||
self.assertTrue(m.checkgrad())
|
|
||||||
|
|
||||||
m.optimize(messages=0, ipython_notebook=False)
|
|
||||||
|
|
||||||
self.assertTrue(m.checkgrad())
|
|
||||||
|
|
||||||
def test_multioutput_model_with_ep(self):
|
def test_multioutput_model_with_ep(self):
|
||||||
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
||||||
|
|
@ -1308,6 +1276,173 @@ class GradientTests(np.testing.TestCase):
|
||||||
c2 = model.predict(x, full_cov=True)[1]
|
c2 = model.predict(x, full_cov=True)[1]
|
||||||
np.testing.assert_allclose(c1,c2)
|
np.testing.assert_allclose(c1,c2)
|
||||||
|
|
||||||
|
class GradientMultioutputGPModelTests(np.testing.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
|
||||||
|
# standard test function
|
||||||
|
self.period = 3
|
||||||
|
self.w = 2*np.pi/self.period
|
||||||
|
self.f = lambda x: np.sum(np.square(np.sin(self.w*x)), axis=1)
|
||||||
|
self.df = lambda x: self.w*np.sin(2*self.w*x)
|
||||||
|
|
||||||
|
self.noise_std = 1e-2
|
||||||
|
|
||||||
|
self.bounds = (-self.period, self.period)
|
||||||
|
|
||||||
|
self.train_points = 5
|
||||||
|
self.test_points = 25
|
||||||
|
|
||||||
|
def approximate_predictive_gradients(self, model, x_test, D, step=1e-6):
|
||||||
|
'''
|
||||||
|
Approximates gradients of predicted posterior means and variances.
|
||||||
|
|
||||||
|
This function is used as the frameworks for GradientChecker and
|
||||||
|
MultioutputGP do not easily combine when checking gradients of predicted
|
||||||
|
partial derivative posteriors.
|
||||||
|
'''
|
||||||
|
|
||||||
|
dmdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
|
||||||
|
dvdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D))
|
||||||
|
|
||||||
|
for d in range(D):
|
||||||
|
|
||||||
|
x_over = x_test.copy()
|
||||||
|
x_over[:,d] += step
|
||||||
|
x_undr = x_test.copy()
|
||||||
|
x_undr[:,d] -= step
|
||||||
|
|
||||||
|
m_over, v_over = model.predict([x_over]*(D + 1))
|
||||||
|
m_undr, v_undr = model.predict([x_undr]*(D + 1))
|
||||||
|
|
||||||
|
dmdx_aprx[:,d,None] = (m_over - m_undr)/(2*step)
|
||||||
|
dvdx_aprx[:,d,None] = (v_over - v_undr)/(2*step)
|
||||||
|
|
||||||
|
return dmdx_aprx, dvdx_aprx
|
||||||
|
|
||||||
|
def check_model(self, kern):
|
||||||
|
'''
|
||||||
|
Checks predictions, hyperparameter gradients, and gradients of predicted
|
||||||
|
posterior means and variances for MultioutputGP models that incorporate
|
||||||
|
observed latent function gradient information.
|
||||||
|
'''
|
||||||
|
|
||||||
|
D = kern.input_dim
|
||||||
|
|
||||||
|
X_list = []
|
||||||
|
Y_list = []
|
||||||
|
for i in range(D + 1):
|
||||||
|
# sample inputs for either latent function or partial derivatives
|
||||||
|
X_i = np.random.uniform(*self.bounds, size=(self.train_points, D))
|
||||||
|
# output of latent function or partial derivatives
|
||||||
|
Y_i = (self.f(X_i) if (i == 0) else self.df(X_i)[:,i - 1])[:,None]
|
||||||
|
# noisy observations
|
||||||
|
Y_i += np.random.normal(scale=self.noise_std, size=Y_i.shape)
|
||||||
|
|
||||||
|
X_list.append(X_i)
|
||||||
|
Y_list.append(Y_i)
|
||||||
|
|
||||||
|
# the kernel is accompanied with derivative kernels, one for each dimension
|
||||||
|
kernel_list = [kern] + [GPy.kern.DiffKern(kern, d) for d in range(D)]
|
||||||
|
|
||||||
|
# create model and check its hyperparameter gradient
|
||||||
|
likelihood_list = [GPy.likelihoods.Gaussian(variance=self.noise_std**2)]*(D + 1)
|
||||||
|
model = GPy.models.MultioutputGP(X_list, Y_list, kernel_list, likelihood_list)
|
||||||
|
model.likelihood.constrain_fixed()
|
||||||
|
self.assertTrue(model.checkgrad(step=1e-3))
|
||||||
|
|
||||||
|
# optimize the model, and check its hyperparameter gradient again
|
||||||
|
model.optimize()
|
||||||
|
self.assertTrue(model.checkgrad(step=1e-3))
|
||||||
|
|
||||||
|
# check predictions
|
||||||
|
np.testing.assert_allclose(model.predict(X_list)[0], model.Y, atol=3*self.noise_std)
|
||||||
|
|
||||||
|
# test inputs for checking predictive gradients
|
||||||
|
x_test = np.random.uniform(*self.bounds, size=(self.test_points, D))
|
||||||
|
|
||||||
|
# predictive gradients
|
||||||
|
dmdx, dvdx = model.predictive_gradients([x_test]*(D + 1))
|
||||||
|
# approximated predictive gradients
|
||||||
|
dmdx_aprx, dvdx_aprx = self.approximate_predictive_gradients(model, x_test, D, step=1e-3)
|
||||||
|
# check predictive gradients
|
||||||
|
np.testing.assert_allclose(dmdx, dmdx_aprx, atol=3*self.noise_std)
|
||||||
|
np.testing.assert_allclose(dvdx, dvdx_aprx, atol=3*self.noise_std)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_RBF(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with an RBF kernel.
|
||||||
|
'''
|
||||||
|
for D in range(1, 4):
|
||||||
|
kern = GPy.kern.RBF(input_dim=D)
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_RBF_ARD(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with an RBF (ARD) kernel.
|
||||||
|
'''
|
||||||
|
for D in range(1, 4):
|
||||||
|
kern = GPy.kern.RBF(input_dim=D, ARD=True)
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_StdP(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with a StdP kernel.
|
||||||
|
'''
|
||||||
|
for D in range(1, 4):
|
||||||
|
kern = GPy.kern.StdPeriodic(input_dim=D, period=self.period)
|
||||||
|
kern.period.constrain_fixed()
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_StdP_ARD(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with a StdP (ARD) kernel.
|
||||||
|
'''
|
||||||
|
for D in range(1, 4):
|
||||||
|
kern = GPy.kern.StdPeriodic(input_dim=D, period=[self.period]*D, ARD1=True, ARD2=True)
|
||||||
|
kern.period.constrain_fixed()
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_prod_RBF(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with several RBF kernels.
|
||||||
|
'''
|
||||||
|
for D in range(2, 4):
|
||||||
|
kerns = [GPy.kern.RBF(input_dim=1) for d in range(D)]
|
||||||
|
kern = reduce(lambda k0, k1: k0 * k1, kerns)
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_prod_StdP(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with several StdP kernels.
|
||||||
|
'''
|
||||||
|
for D in range(2, 4):
|
||||||
|
kerns = [GPy.kern.StdPeriodic(input_dim=1, period=self.period) for d in range(D)]
|
||||||
|
kern = reduce(lambda k0, k1: k0 * k1, kerns)
|
||||||
|
[k.period.constrain_fixed() for k in kern.parts]
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
|
def test_MultioutputGP_gradobs_prod_mix(self):
|
||||||
|
'''
|
||||||
|
Testing gradient observing MultioutputGP model with a mix of kernel types.
|
||||||
|
'''
|
||||||
|
for D in range(2, 4):
|
||||||
|
kerns = []
|
||||||
|
for d in range(D):
|
||||||
|
if d % 2 == 0:
|
||||||
|
k = GPy.kern.RBF(input_dim=1)
|
||||||
|
else:
|
||||||
|
k = GPy.kern.StdPeriodic(input_dim=1, period=self.period)
|
||||||
|
k.period.constrain_fixed()
|
||||||
|
kerns.append(k)
|
||||||
|
kern = reduce(lambda k0, k1: k0 * k1, kerns)
|
||||||
|
kern.randomize()
|
||||||
|
self.check_model(kern)
|
||||||
|
|
||||||
def _create_missing_data_model(kernel, Q):
|
def _create_missing_data_model(kernel, Q):
|
||||||
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
|
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue