diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 13146e12..24ff0be8 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -771,3 +771,117 @@ def multioutput_gp_with_derivative_observations(plot=True): mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0, 1))]) return m + +def multioutput_gp_with_derivative_observations_2D(optimize=True, plot=False): + ''' + This in an example on how to use a MultioutputGP model with gradient + observations and multiple single-dimensional kernels of differing types. + ''' + + period = 3 + w = 2*np.pi/period # angular frequency + bounds = (-period, period) + + # latent function and gradient + f = lambda x: (np.exp(-x[:,0]**2) + np.cos(w*x[:,1]))[:,None] + df = lambda x: np.array([-2*np.exp(-x[:,0]**2)*x[:,0], -w*np.sin(w*x[:,1])]).T + + # 2D input grid + ppa = 25 # points per axis + x = np.linspace(*bounds, ppa) + xx, yy = np.meshgrid(x, x) + grid = np.array([xx.reshape(-1), yy.reshape(-1)]).T + + fgrid = f(grid) + dfgrid = df(grid) + + # 10 random training points generated with a space-filling sobol sequence + X = np.array([ + [ 0.50421399, 2.1331483 ], + [-2.15717152, -1.70295936], + [-1.46704334, 1.37111521], + [ 2.79064536, -0.9649018 ], + [ 1.60728264, 0.27702713], + [-0.30712366, -0.57372129], + [-2.6140632 , 2.49192488], + [ 0.89078772, -2.85873686], + [ 1.15813136, 0.96910322], + [-2.83307021, -1.38155383] + ]) + + # Note! + # This example uses the same inputs for function and gradient observations. + + noise_std = 1e-2 + # function observations + Y = f(X) + np.random.normal(scale=noise_std, size=(len(X), 1)) + # gradient observations + dY = df(X) + np.random.normal(scale=noise_std, size=(len(X), 2)) + + # gather inputs and observations into lists + X_list = [X, X, X] + # once for function observations, and once for each partial derivative + # make sure all arrays are of shape (N x dims), where N is # of training points + Y_list = [Y, dY[:,0,None], dY[:,1,None]] + + # create a kernel that is the product of two one-dimensional kernels + # the first kernel is an RBF kernel + kern0 = GPy.kern.RBF(input_dim=1, active_dims=[0]) + # as the function is periodic in the second dimension, we use a StdP kernel + kern1 = GPy.kern.StdPeriodic(input_dim=1, active_dims=[1], period=period) + kern1.period.constrain_fixed() + # the kernels can be multiplied together into a product kernel + kern = kern0 * kern1 + + # with gradient observations, we need to define a DiffKern for each dimension + # the DiffKern is given the main kernel as a base kernel + diffkern0 = GPy.kern.DiffKern(kern, 0) + diffkern1 = GPy.kern.DiffKern(kern, 1) + + # gather the main kernel and diffkerns into a list + kern_list = [kern, diffkern0, diffkern1] + + # define a likelihood and repeat it in a list + likelihood_list = [GPy.likelihoods.Gaussian(variance=noise_std**2)]*3 + + # create the MultioutputGP model and optimize + model = GPy.models.MultioutputGP(X_list, Y_list, kern_list, likelihood_list) + model.likelihood.constrain_fixed() + if optimize: + model.optimize() + + # make function predictions + Xnew, _, ind = GPy.util.multioutput.build_XY([grid], index=[0]) + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + + mu, var = model.predict(Xnew, Y_metadata=Y_metadata) + + # make gradient predictions + Xnew, _, ind = GPy.util.multioutput.build_XY([grid]*2, index=[1, 2]) + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + + mu_d, var_d = model.predict(Xnew, Y_metadata=Y_metadata) + + mu_d = np.array([mu_d[:len(grid)], mu_d[len(grid):]]).T[0] + var_d = np.array([var_d[:len(grid)], var_d[len(grid):]]).T[0] + + if plot and MPL_AVAILABLE: + fig, axs = plt.subplots(1, 3) + for ax in axs: ax.set_box_aspect(1) + axs[0].set_title('true f') + axs[0].contourf(xx, yy, fgrid.reshape(ppa, ppa), levels=25) + axs[1].set_title('true df1') + axs[1].contourf(xx, yy, dfgrid[:,0].reshape(ppa, ppa), levels=25) + axs[2].set_title('true df2') + axs[2].contourf(xx, yy, dfgrid[:,1].reshape(ppa, ppa), levels=25) + + fig, axs = plt.subplots(1, 3) + for ax in axs: ax.set_box_aspect(1) + axs[0].set_title('pred f') + axs[0].contourf(xx, yy, mu.reshape(ppa, ppa), levels=25) + axs[1].set_title('pred df1') + axs[1].contourf(xx, yy, mu_d[:,0].reshape(ppa, ppa), levels=25) + axs[2].set_title('pred df2') + axs[2].contourf(xx, yy, mu_d[:,1].reshape(ppa, ppa), levels=25) + + return model diff --git a/GPy/kern/src/diff_kern.py b/GPy/kern/src/diff_kern.py index 612c0632..a0dd1980 100644 --- a/GPy/kern/src/diff_kern.py +++ b/GPy/kern/src/diff_kern.py @@ -23,24 +23,42 @@ class DiffKern(Kern): self.base_kern.parameters_changed() @Cache_this(limit=3, ignore_args=()) - def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension + def K(self, X, X2=None, dimX2=None): #X in dimension self.dimension if X2 is None: X2 = X if dimX2 is None: dimX2 = self.dimension - return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2) - + return self.base_kern.dK2_dXdX2(X, X2, self.dimension, dimX2) + + @Cache_this(limit=3, ignore_args=()) + def dK_dX(self, X, X2, dimX, dimX2=None): + if dimX2 is None: + dimX2 = self.dimension + return self.base_kern.dK3_dXdXdX2(X, X2, dimX, self.dimension, dimX2) + @Cache_this(limit=3, ignore_args=()) def Kdiag(self, X): - return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension)) - + return self.base_kern.dK2_dXdX2diag(X, self.dimension, self.dimension) + + @Cache_this(limit=3, ignore_args=()) + def dK_dXdiag(self, X, dimX): + return self.base_kern.dK3_dXdXdX2diag(X, dimX, self.dimension, self.dimension) + @Cache_this(limit=3, ignore_args=()) def dK_dX_wrap(self, X, X2): #X in dimension self.dimension - return self.base_kern.dK_dX(X,X2, self.dimension) + return self.base_kern.dK_dX(X, X2, self.dimension) @Cache_this(limit=3, ignore_args=()) def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension - return self.base_kern.dK_dX2(X,X2, self.dimension) + return self.base_kern.dK_dX2(X, X2, self.dimension) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdX2_wrap(self, X, X2, dimX): + return self.base_kern.dK2_dXdX2(X, X2, dimX, self.dimension) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdX_wrap(self, X, X2, dimX): + return self.base_kern.dK2_dXdX(X, X2, dimX, self.dimension) def reset_gradients(self): self.base_kern.reset_gradients() @@ -56,33 +74,33 @@ class DiffKern(Kern): def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None): if dimX2 is None: dimX2 = self.dimension - gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2) + gradients = self.base_kern.dgradients2_dXdX2(X, X2, self.dimension, dimX2) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) def update_gradients_diag(self, dL_dK_diag, X): - gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension) + gradients = self.base_kern.dgradients2_dXdX2(X, X, self.dimension, self.dimension) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients]) def update_gradients_dK_dX(self, dL_dK, X, X2=None): if X2 is None: X2 = X - gradients = self.base_kern.dgradients_dX(X,X2, self.dimension) + gradients = self.base_kern.dgradients_dX(X, X2, self.dimension) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) def update_gradients_dK_dX2(self, dL_dK, X, X2=None): - gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension) + gradients = self.base_kern.dgradients_dX2(X, X2, self.dimension) self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) def gradients_X(self, dL_dK, X, X2): - tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension] + tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:,self.dimension] return np.sum(tmp, axis=1) def gradients_X2(self, dL_dK, X, X2): - tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :] + tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,self.dimension,:] return np.sum(tmp, axis=1) - def _convert_gradients(self, l,g, f = lambda x:x): + def _convert_gradients(self, l, g, f=lambda x:x): if type(g) is np.ndarray: return np.sum(f(l)*f(g)) else: - return np.array([np.sum(f(l)*f(gi)) for gi in g]) \ No newline at end of file + return np.array([np.sum(f(l)*f(gi)) for gi in g]) diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 8a5dcb81..abc073ea 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -22,7 +22,14 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'dK_dX', _slice_dK_dX) put_clean(dct, 'dK_dX2', _slice_dK_dX) put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2) + put_clean(dct, 'dK2_dXdX', _slice_dK2_dXdX2) + put_clean(dct, 'dK3_dXdXdX2', _slice_dK3_dXdXdX2) put_clean(dct, 'Kdiag', _slice_Kdiag) + put_clean(dct, 'dK_dXdiag', _slice_dK_dXdiag) + put_clean(dct, 'dK_dX2diag', _slice_dK_dXdiag) + put_clean(dct, 'dK2_dXdX2diag', _slice_dK2_dXdX2diag) + put_clean(dct, 'dK2_dXdXdiag', _slice_dK2_dXdX2diag) + put_clean(dct, 'dK3_dXdXdX2diag', _slice_dK3_dXdXdX2diag) put_clean(dct, 'phi', _slice_Kdiag) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) @@ -35,9 +42,10 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) - put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X) - put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X) - put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX) + put_clean(dct, 'dgradients', _slice_partial_gradients_list) + put_clean(dct, 'dgradients_dX', _slice_partial_gradients_list_X) + put_clean(dct, 'dgradients_dX2', _slice_partial_gradients_list_X) + put_clean(dct, 'dgradients2_dXdX2', _slice_partial_gradients_list_XX) put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi1', _slice_psi) @@ -155,6 +163,18 @@ def _slice_dK_dX(f): return ret return wrap +def _slice_dK_dXdiag(f): + @wraps(f) + def wrap(self, X, dim, *a, **kw): + with _Slice_wrap(self, X, None) as s: + d = s.k._project_dim(dim) + if d is None: + ret = np.zeros(X.shape[0]) + else: + ret = f(self, s.X, dim, *a, **kw) + return ret + return wrap + def _slice_dK2_dXdX2(f): @wraps(f) def wrap(self, X, X2, dimX, dimX2, *a, **kw): @@ -168,6 +188,59 @@ def _slice_dK2_dXdX2(f): return ret return wrap +def _slice_dK2_dXdX2diag(f): + @wraps(f) + def wrap(self, X, dimX, dimX2, *a, **kw): + with _Slice_wrap(self, X, None) as s: + d = s.k._project_dim(dimX) + d2 = s.k._project_dim(dimX2) + if (d is None) or (d2 is None): + ret = np.zeros(X.shape[0]) + else: + ret = f(self, s.X, d, d2, *a, **kw) + return ret + return wrap + +def _slice_dK3_dXdXdX2(f): + @wraps(f) + def wrap(self, X, X2, dim, dimX, dimX2, *a, **kw): + with _Slice_wrap(self, X, X2) as s: + D = s.k._project_dim(dim) + d = s.k._project_dim(dimX) + d2 = s.k._project_dim(dimX2) + if (D is None) or (d is None) or (d2 is None): + ret = np.zeros((X.shape[0], X2.shape[0])) + else: + ret = f(self, s.X, s.X2, D, d, d2, *a, **kw) + return ret + return wrap + +def _slice_dK3_dXdXdX2diag(f): + @wraps(f) + def wrap(self, X, dim, dimX, dimX2, *a, **kw): + with _Slice_wrap(self, X, None) as s: + D = s.k._project_dim(dim) + d = s.k._project_dim(dimX) + d2 = s.k._project_dim(dimX2) + if (D is None) or (d is None) or (d2 is None): + ret = np.zeros(X.shape[0]) + else: + ret = f(self, s.X, D, d, d2, *a, **kw) + return ret + return wrap + +def _slice_partial_gradients_list(f): + @wraps(f) + def wrap(self, X, X2): + if X2 is None: + N, M = X.shape[0], X.shape[0] + else: + N, M = X.shape[0], X2.shape[0] + with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s: + ret = f(self, s.X, s.X2) + return ret + return wrap + def _slice_partial_gradients_X(f): @wraps(f) def wrap(self, X, X2, dim): diff --git a/GPy/kern/src/multioutput_derivative_kern.py b/GPy/kern/src/multioutput_derivative_kern.py index 17da4a13..a884af71 100644 --- a/GPy/kern/src/multioutput_derivative_kern.py +++ b/GPy/kern/src/multioutput_derivative_kern.py @@ -7,20 +7,24 @@ import numpy as np from functools import partial class KernWrapper(Kern): - def __init__(self, fk, fug, fg, base_kern): + def __init__(self, fk, fdk, fug, fg, base_kern): self.fk = fk + self.fdk = fdk self.fug = fug self.fg = fg self.base_kern = base_kern - super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False) + super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper', useGPU=False) def K(self, X, X2=None): - return self.fk(X,X2=X2) + return self.fk(X, X2=X2) + + def dK_dX(self, X, X2, dimX): + return self.fdk(X, X2, dimX) - def update_gradients_full(self,dL_dK, X, X2=None): + def update_gradients_full(self, dL_dK, X, X2=None): return self.fug(dL_dK, X, X2=X2) - def gradients_X(self,dL_dK, X, X2=None): + def gradients_X(self, dL_dK, X, X2=None): return self.fg(dL_dK, X, X2=X2) @property @@ -57,28 +61,46 @@ class MultioutputDerivativeKern(MultioutputKern): #build covariance structure covariance = [[None for i in range(nl)] for j in range(nl)] linked = [] - for i in range(0,nl): - unique=True - for j in range(0,nl): - if i==j or (kernels[i] is kernels[j]): + for i in range(0, nl): + unique = True + for j in range(0, nl): + if (i == j) or (kernels[i] is kernels[j]): kern = kernels[i] - if i>j: - unique=False + if i > j: + unique = False elif cross_covariances.get((i,j)) is not None: #cross covariance is given kern = cross_covariances.get((i,j)) - elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other - kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j]) + elif (kernels[i].name == 'DiffKern') and (kernels[i].base_kern == kernels[j]): # one is derivative of other + kern = KernWrapper( + kernels[i].dK_dX_wrap, + kernels[i].dK2_dXdX_wrap, + kernels[i].update_gradients_dK_dX, + kernels[i].gradients_X, + kernels[j] + ) unique=False - elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other - kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i]) - elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives - kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern) - if i>j: - unique=False + elif (kernels[j].name == 'DiffKern') and (kernels[j].base_kern == kernels[i]): # one is derivative of other + kern = KernWrapper( + kernels[j].dK_dX2_wrap, + kernels[j].dK2_dXdX2_wrap, + kernels[j].update_gradients_dK_dX2, + kernels[j].gradients_X2, + kernels[i] + ) + elif (kernels[i].name == 'DiffKern') and (kernels[j].name == 'DiffKern') and (kernels[i].base_kern == kernels[j].base_kern): #both are partial derivatives + kern = KernWrapper( + partial(kernels[i].K, dimX2=kernels[j].dimension), + partial(kernels[i].dK_dX, dimX2=kernels[j].dimension), + partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension), + None, + kernels[i].base_kern + ) + if i > j: + unique = False else: kern = ZeroKern() covariance[i][j] = kern if unique is True: linked.append(i) self.covariance = covariance - self.link_parameters(*[kernels[i] for i in linked]) \ No newline at end of file + self.link_parameters(*[kernels[i] for i in linked]) diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py index 6bedeb59..6cc90ab4 100644 --- a/GPy/kern/src/multioutput_kern.py +++ b/GPy/kern/src/multioutput_kern.py @@ -85,21 +85,63 @@ class MultioutputKern(CombinationKernel): self.link_parameters(*[kernels[i] for i in linked]) @Cache_this(limit=3, ignore_args=()) - def K(self, X ,X2=None): + def K(self, X, X2=None): if X2 is None: X2 = X slices = index_to_slices(X[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim]) + target = np.zeros((X.shape[0], X2.shape[0])) - [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] + for j in range(len(slices2)): + for i in range(len(slices)): + for l in range(len(slices2[j])): + for k in range(len(slices[i])): + cov_K = self.covariance[i][j].K(X[slices[i][k],:], X2[slices2[j][l],:]) + target.__setitem__((slices[i][k], slices2[j][l]), cov_K) return target @Cache_this(limit=3, ignore_args=()) - def Kdiag(self,X): + def Kdiag(self, X): slices = index_to_slices(X[:,self.index_dim]) kerns = itertools.repeat(self.kern) if self.single_kern else self.kern target = np.zeros(X.shape[0]) - [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] + for kern, slices_i in zip(kerns, slices): + for s in slices_i: + np.copyto(target[s], kern.Kdiag(X[s])) + return target + + @Cache_this(limit=3, ignore_args=()) + def dK_dX(self, X, X2, dimX): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + """ + if X2 is None: + X2 = X + slices = index_to_slices(X[:,self.index_dim]) + slices2 = index_to_slices(X2[:,self.index_dim]) + + target = np.zeros((X.shape[0], X2.shape[0])) + for j in range(len(slices2)): + for i in range(len(slices)): + for l in range(len(slices2[j])): + for k in range(len(slices[i])): + cov_dK_dX = self.covariance[i][j].dK_dX(X[slices[i][k],:], X2[slices2[j][l],:], dimX) + target.__setitem__((slices[i][k], slices2[j][l]), cov_dK_dX) + return target + + @Cache_this(limit=3, ignore_args=()) + def dK_dXdiag(self, X, dimX): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + """ + slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern + target = np.zeros(X.shape[0]) + for kern, slices_i in zip(kerns, slices): + for s in slices_i: + np.copyto(target[s], kern.dK_dXdiag(X[s], dimX)) return target def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2): @@ -115,19 +157,35 @@ class MultioutputKern(CombinationKernel): def reset_gradients(self): for kern in self.kern: kern.reset_gradients() - def update_gradients_full(self,dL_dK, X, X2=None): - self.reset_gradients() + def update_gradients_full(self, dL_dK, X, X2=None): + if X2 is None: + X2 = X slices = index_to_slices(X[:,self.index_dim]) - if X2 is not None: - slices2 = index_to_slices(X2[:,self.index_dim]) - [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] - else: - [[[[ self._update_gradients_full_wrapper(self.covariance[i][j], dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], X[slices[j][l],:]) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] - + slices2 = index_to_slices(X2[:,self.index_dim]) + + self.reset_gradients() + for j in range(len(slices2)): + for i in range(len(slices)): + for l in range(len(slices2[j])): + for k in range(len(slices[i])): + self._update_gradients_full_wrapper( + self.covariance[i][j], + dL_dK[slices[i][k],slices2[j][l]], + X[slices[i][k],:], + X2[slices2[j][l],:] + ) + def update_gradients_diag(self, dL_dKdiag, X): - self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) - [[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] + + self.reset_gradients() + for i in range(len(slices)): + for k in range(len(slices[i])): + self._update_gradients_diag_wrapper( + self.covariance[i][i], + dL_dKdiag[slices[i][k]], + X[slices[i][k],:] + ) def gradients_X(self,dL_dK, X, X2=None): slices = index_to_slices(X[:,self.index_dim]) @@ -137,4 +195,4 @@ class MultioutputKern(CombinationKernel): [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] else: [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] - return target \ No newline at end of file + return target diff --git a/GPy/kern/src/prod.py b/GPy/kern/src/prod.py index 6bf5f9f3..b9d883de 100644 --- a/GPy/kern/src/prod.py +++ b/GPy/kern/src/prod.py @@ -70,6 +70,310 @@ class Prod(CombinationKernel): which_parts = self.parts return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) + def reset_gradients(self): + for part in self.parts: + part.reset_gradients() + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK_dX(self, X, X2, dimX, which_parts=None): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros((X.shape[0], X2.shape[0])) + for combination in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination]) + else: + prod = np.ones(prod_sum.shape) + to_update = list(set(which_parts) - set(combination))[0] + prod_sum += prod*to_update.dK_dX(X, X2, dimX) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK_dXdiag(self, X, dimX, which_parts=None): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + + Returns only diagonal elements. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros(X.shape[0]) + for combination in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination]) + else: + prod = np.ones(prod_sum.shape) + to_update = list(set(which_parts) - set(combination))[0] + prod_sum += prod*to_update.dK_dXdiag(X, dimX) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK_dX2(self, X, X2, dimX2, which_parts=None): + """ + Compute the derivative of K with respect to: + dimension dimX2 of set X2. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros((X.shape[0], X2.shape[0])) + for combination in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination]) + else: + prod = np.ones(prod_sum.shape) + to_update = list(set(which_parts) - set(combination))[0] + prod_sum += prod*to_update.dK_dX2(X, X2, dimX2) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK2_dXdX2(self, X, X2, dimX, dimX2, which_parts=None): + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros((X.shape[0], X2.shape[0])) + for combination1 in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination1) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination1]) + else: + prod = np.ones(prod_sum.shape) + to_update1 = list(set(which_parts) - set(combination1))[0] + prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX, dimX2) + if len(which_parts) > 1: + for combination2 in itertools.combinations(combination1, len(combination1) - 1): + if len(combination2) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination2]) + else: + prod = np.ones(prod_sum.shape) + to_update2 = list(set(combination1) - set(combination2))[0] + prod_sum += prod*to_update1.dK_dX(X, X2, dimX)*to_update2.dK_dX2(X, X2, dimX2) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK2_dXdX2diag(self, X, dimX, dimX2, which_parts=None): + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros(X.shape[0]) + for combination1 in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination1) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1]) + else: + prod = np.ones(prod_sum.shape) + to_update1 = list(set(which_parts) - set(combination1))[0] + prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX, dimX2) + if len(which_parts) > 1: + for combination2 in itertools.combinations(combination1, len(combination1) - 1): + if len(combination2) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2]) + else: + prod = np.ones(prod_sum.shape) + to_update2 = list(set(combination1) - set(combination2))[0] + prod_sum += prod*to_update1.dK_dXdiag(X, dimX)*to_update2.dK_dX2diag(X, dimX) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK2_dXdX(self, X, X2, dimX_0, dimX_1, which_parts=None): + """ + Compute the second derivative of K with respect to: + dimension dimX_0 of set X, and + dimension dimX_1 of set X. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros((X.shape[0], X2.shape[0])) + for combination1 in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination1) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination1]) + else: + prod = np.ones(prod_sum.shape) + to_update1 = list(set(which_parts) - set(combination1))[0] + prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1) + if len(which_parts) > 1: + for combination2 in itertools.combinations(combination1, len(combination1) - 1): + if len(combination2) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination2]) + else: + prod = np.ones(prod_sum.shape) + to_update2 = list(set(combination1) - set(combination2))[0] + prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX(X, X2, dimX_1) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2, which_parts=None): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros((X.shape[0], X2.shape[0])) + for combination1 in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination1) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination1]) + else: + prod = np.ones(prod_sum.shape) + to_update1 = list(set(which_parts) - set(combination1))[0] + prod_sum += prod*to_update1.dK3_dXdXdX2(X, X2, dimX_0, dimX_1, dimX2) + if len(which_parts) > 1: + for combination2 in itertools.combinations(combination1, len(combination1) - 1): + if len(combination2) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination2]) + else: + prod = np.ones(prod_sum.shape) + to_update2 = list(set(combination1) - set(combination2))[0] + prod_sum += prod*to_update1.dK2_dXdX2(X, X2, dimX_0, dimX2)*to_update2.dK_dX(X, X2, dimX_1) + prod_sum += prod*to_update1.dK2_dXdX(X, X2, dimX_0, dimX_1)*to_update2.dK_dX2(X, X2, dimX2) + prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK2_dXdX2(X, X2, dimX_1, dimX2) + if len(which_parts) > 2: + for combination3 in itertools.combinations(combination2, len(combination2) - 1): + if len(combination3) > 0: + prod = reduce(np.multiply, [p.K(X, X2) for p in combination3]) + else: + prod = np.ones(prod_sum.shape) + to_update3 = list(set(combination2) - set(combination3))[0] + prod_sum += prod*to_update1.dK_dX(X, X2, dimX_0)*to_update2.dK_dX2(X, X2, dimX2)*to_update3.dK_dX(X, X2, dimX_1) + return prod_sum + + @Cache_this(limit=3, force_kwargs=['which_parts']) + def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2, which_parts=None): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements of the covariance matrix. + """ + if which_parts is None: + which_parts = self.parts + prod_sum = np.zeros(X.shape[0]) + for combination1 in itertools.combinations(which_parts, len(which_parts) - 1): + if len(combination1) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination1]) + else: + prod = np.ones(prod_sum.shape) + to_update1 = list(set(which_parts) - set(combination1))[0] + prod_sum += prod*to_update1.dK3_dXdXdX2diag(X, dimX_0, dimX_1, dimX2) + if len(which_parts) > 1: + for combination2 in itertools.combinations(combination1, len(combination1) - 1): + if len(combination2) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination2]) + else: + prod = np.ones(prod_sum.shape) + to_update2 = list(set(combination1) - set(combination2))[0] + prod_sum += prod*to_update1.dK2_dXdX2diag(X, dimX_0, dimX2)*to_update2.dK_dXdiag(X, dimX_1) + prod_sum += prod*to_update1.dK2_dXdXdiag(X, dimX_0, dimX_1)*to_update2.dK_dX2diag(X, dimX2) + prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK2_dXdX2diag(X, dimX_1, dimX2) + if len(which_parts) > 2: + for combination3 in itertools.combinations(combination2, len(combination2) - 1): + if len(combination3) > 0: + prod = reduce(np.multiply, [p.Kdiag(X) for p in combination3]) + else: + prod = np.ones(prod_sum.shape) + to_update3 = list(set(combination2) - set(combination3))[0] + prod_sum += prod*to_update1.dK_dXdiag(X, dimX_0)*to_update2.dK_dX2diag(X, dimX2)*to_update3.dK_dXdiag(X, dimX_1) + return prod_sum + + def update_gradients_direct(self, *args): + for i, (g,p) in enumerate(zip(args, self.parts)): + p.update_gradients_direct(*g) + + def dgradients_dX(self, X, X2, dimX, parts=None): + """ + Compute the hyperparameter gradients of: + the derivative of K with respect to dimension dimX of set X + ("dK_dX"). + """ + if parts is None: + parts = self.parts + gradients = [] + for part in parts: + neq_parts = [p for p in parts if p is not part] + + if len(neq_parts) > 0: + K = self.K(X, X2, which_parts=neq_parts) + K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts) + else: + K = np.ones((X.shape[0], X2.shape[0])) + K_dx = np.zeros((X.shape[0], X2.shape[0])) + + g = part.dgradients(X, X2) + g_dx = part.dgradients_dX(X, X2, dimX) + + gradients += [[(g_i*K_dx + g_dx_i*K) for (g_i, g_dx_i) in zip(g, g_dx)]] + + return gradients + + def dgradients_dX2(self, X, X2, dimX2, parts=None): + """ + Compute the hyperparameter gradients of: + the derivative of K with respect to dimension dimX2 of set X2 + ("dK_dX2"). + """ + if parts is None: + parts = self.parts + gradients = [] + for part in parts: + neq_parts = [p for p in parts if p is not part] + + if len(neq_parts) > 0: + K = self.K(X, X2, which_parts=neq_parts) + K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts) + else: + K = np.ones((X.shape[0], X2.shape[0])) + K_dx2 = np.zeros((X.shape[0], X2.shape[0])) + + g = part.dgradients(X, X2) + g_dx2 = part.dgradients_dX2(X, X2, dimX2) + + gradients += [[(g_i*K_dx2 + g_dx2_i*K) for (g_i, g_dx2_i) in zip(g, g_dx2)]] + + return gradients + + def dgradients2_dXdX2(self, X, X2, dimX, dimX2, parts=None): + """ + Compute the hyperparameter gradients of: + the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2 + ("dK2_dXdX2"). + """ + if parts is None: + parts = self.parts + gradients = [] + for part in parts: + neq_parts = [p for p in parts if p is not part] + + K = self.K(X, X2, which_parts=neq_parts) + K_dx = self.dK_dX(X, X2, dimX, which_parts=neq_parts) + K_dx2 = self.dK_dX2(X, X2, dimX2, which_parts=neq_parts) + K_dxdx2 = self.dK2_dXdX2(X, X2, dimX, dimX2, which_parts=neq_parts) + + g = part.dgradients(X, X2) + g_dx = part.dgradients_dX(X, X2, dimX) + g_dx2 = part.dgradients_dX2(X, X2, dimX2) + g_dxdx2 = part.dgradients2_dXdX2(X, X2, dimX, dimX2) + + gradients += [[(g_i*K_dxdx2 + g_dx_i*K_dx2 + g_dx2_i*K_dx + g_dxdx2_i*K) for (g_i, g_dx_i, g_dx2_i, g_dxdx2_i) in zip(g, g_dx, g_dx2, g_dxdx2)]] + return gradients + def update_gradients_full(self, dL_dK, X, X2=None): if len(self.parts)==2: self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2) diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py index cbabcb99..9ed81b9c 100644 --- a/GPy/kern/src/rbf.py +++ b/GPy/kern/src/rbf.py @@ -53,24 +53,126 @@ class RBF(Stationary): @Cache_this(limit=3, ignore_args=()) def dK_dX(self, X, X2, dimX): - r = self._scaled_dist(X, X2) - K = self.K_of_r(r) - dist = X[:,None,dimX]-X2[None,:,dimX] - lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX] - return -1.*K*dist*lengthscale2inv + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX] + dist = X[:,None,dimX] - X2[None,:,dimX] + return -dist*(lengthscaleinv**2)*self._clean_K(X, X2) + + @Cache_this(limit=3, ignore_args=()) + def dK_dXdiag(self, X, dimX): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + + Returns only diagonal elements. + """ + return np.zeros(X.shape[0]) + @Cache_this(limit=3, ignore_args=()) def dK_dX2(self, X, X2, dimX2): - return -self.dK_dX(X,X2, dimX2) - + """ + Compute the derivative of K with respect to: + dimension dimX2 of set X2. + """ + return -self._clean_dK_dX(X, X2, dimX2) + + @Cache_this(limit=3, ignore_args=()) + def dK_dX2diag(self, X, dimX2): + """ + Compute the derivative of K with respect to: + dimension dimX2 of set X2. + + Returns only diagonal elements. + """ + return np.zeros(X.shape[0]) + @Cache_this(limit=3, ignore_args=()) def dK2_dXdX2(self, X, X2, dimX, dimX2): - r = self._scaled_dist(X, X2) - K = self.K_of_r(r) - if X2 is None: - X2=X - dist = X[:,None,:]-X2[None,:,:] - lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2) - return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX] + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + + term = dist[dimX]*(lengthscaleinv[dimX]**2) + term *= dist[dimX2]*(lengthscaleinv[dimX2]**2) + if dimX == dimX2: + term -= (lengthscaleinv[dimX]**2) + return -term*self._clean_K(X, X2) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdX2diag(self, X, dimX, dimX2): + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements. + """ + if dimX == dimX2: + lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) + return np.ones(X.shape[0])*(lengthscaleinv[dimX]**2)*self.variance + else: + return np.zeros(X.shape[0]) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdX(self, X, X2, dimX_0, dimX_1): + """ + Compute the second derivative of K with respect to: + dimension dimX_0 of set X, and + dimension dimX_1 of set X. + """ + return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdXdiag(self, X, dimX_0, dimX_1): + """ + Compute the second derivative of K with respect to: + dimension dimX_0 of set X, and + dimension dimX_1 of set X. + + Returns only diagonal elements. + """ + return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1) + + @Cache_this(limit=3, ignore_args=()) + def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + + term = dist[dimX_0]*(lengthscaleinv[dimX_0]**2) + term *= dist[dimX_1]*(lengthscaleinv[dimX_1]**2) + term *= dist[dimX2]*(lengthscaleinv[dimX2]**2) + if dimX_0 == dimX_1: + term -= dist[dimX2]*(lengthscaleinv[dimX2]**2)*(lengthscaleinv[dimX_0]**2) + if dimX_0 == dimX2: + term -= dist[dimX_1]*(lengthscaleinv[dimX_1]**2)*(lengthscaleinv[dimX_0]**2) + if dimX_1 == dimX2: + term -= dist[dimX_0]*(lengthscaleinv[dimX_0]**2)*(lengthscaleinv[dimX_1]**2) + return term*self._clean_K(X, X2) + + @Cache_this(limit=3, ignore_args=()) + def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements of the covariance matrix. + """ + return np.zeros(X.shape[0]) def dK_dr(self, r): return -r*self.K_of_r(r) @@ -80,73 +182,132 @@ class RBF(Stationary): def dK2_drdr_diag(self): return -self.variance # as the diagonal of r is always filled with zeros - + @Cache_this(limit=3, ignore_args=()) - def dK_dvariance(self,X,X2): - return self.K(X,X2)/self.variance - + def dK_dvariance(self, X, X2): + """ + Compute the derivative of K with respect to variance. + """ + return self._clean_K(X, X2)/self.variance + @Cache_this(limit=3, ignore_args=()) - def dK2_dvariancedX(self, X, X2, dim): - return self.dK_dX(X,X2, dim)/self.variance - + def dK_dlengthscale(self, X, X2): + """ + Compute the derivative(s) of K with respect to lengthscale(s). + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + + K = self._clean_K(X, X2) + + if self.ARD: + g = [] + for diml in range(self.input_dim): + g += [(dist[diml]**2)*(lengthscaleinv[diml]**3)*K] + else: + g = (lengthscaleinv[0]**3)*np.sum(dist**2, axis=0)*K + return g + @Cache_this(limit=3, ignore_args=()) - def dK2_dvariancedX2(self, X, X2, dim): - return self.dK_dX2(X,X2, dim)/self.variance - + def dK2_dvariancedX(self, X, X2, dimX): + """ + Compute the second derivative of K with respect to: + variance, and + dimension dimX of set X. + """ + return self._clean_dK_dX(X, X2, dimX)/self.variance + @Cache_this(limit=3, ignore_args=()) - def dK3_dvariancedXdX2(self, X, X2, dim, dimX2): - return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance + def dK2_dvariancedX2(self, X, X2, dimX2): + """ + Compute the second derivative of K with respect to: + variance, and + dimension dimX2 of set X2. + """ + return -self.dK2_dvariancedX(X, X2, dimX2) @Cache_this(limit=3, ignore_args=()) def dK2_dlengthscaledX(self, X, X2, dimX): - r = self._scaled_dist(X, X2) - K = self.K_of_r(r) - if X2 is None: - X2=X - dist = X[:,None,:]-X2[None,:,:] - lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) + """ + Compute the second derivative(s) of K with respect to: + lengthscale(s), and + dimension dimX of set X. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + + dK_dX = self._clean_dK_dX(X, X2, dimX) + dK_dl = self.dK_dlengthscale(X, X2) + if self.ARD: g = [] - for diml in range(X.shape[1]): - g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)] + for diml in range(self.input_dim): + term = -dist[dimX]*(lengthscaleinv[dimX]**2)*dK_dl[diml] + if diml == dimX: + term -= 2*lengthscaleinv[dimX]*dK_dX + g += [term] else: - g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K + term = -dist[dimX]*(lengthscaleinv[0]**2)*dK_dl + term -= 2*lengthscaleinv[0]*dK_dX + g = term return g - + @Cache_this(limit=3, ignore_args=()) def dK2_dlengthscaledX2(self, X, X2, dimX2): - tmp = self.dK2_dlengthscaledX(X, X2, dimX2) + """ + Compute the second derivative(s) of K with respect to: + lengthscale(s), and + dimension dimX2 of set X2. + """ + dK2_dlengthscaledX = self.dK2_dlengthscaledX(X, X2, dimX2) if self.ARD: - return [-1.*g for g in tmp] + return [-1.*g for g in dK2_dlengthscaledX] else: - return -1*tmp + return -1*dK2_dlengthscaledX + @Cache_this(limit=3, ignore_args=()) + def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2): + """ + Compute the third derivative of K with respect to: + variance, + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance + @Cache_this(limit=3, ignore_args=()) def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2): - r = self._scaled_dist(X, X2) - K = self.K_of_r(r) - if X2 is None: - X2=X - dist = X[:,None,:]-X2[None,:,:] - lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) - lengthscale2inv = lengthscaleinv**2 + """ + Compute the third derivative(s) of K with respect to: + lengthscale(s), + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + + K = self._clean_K(X, X2) + dK_dX = self._clean_dK_dX(X, X2, dimX) + dK_dX2 = self._clean_dK_dX(X, X2, dimX2) + dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2) + if self.ARD: g = [] - for diml in range(X.shape[1]): - tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3) - if dimX == dimX2: - tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2) + for diml in range(self.input_dim): + term = (dist[diml]**2)*(lengthscaleinv[diml]**3)*dK2_dXdX2 if diml == dimX: - tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3) + term -= 2*dist[dimX]*(lengthscaleinv[dimX]**3)*dK_dX2 if diml == dimX2: - tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3) - if dimX == dimX2: - tmp += -2.*K*(lengthscaleinv[dimX]**3) - g += [tmp] + term -= 2*dist[dimX2]*(lengthscaleinv[dimX2]**3)*dK_dX + if diml == dimX == dimX2: + term -= 2*(lengthscaleinv[dimX]**3)*K + g += [term] else: - g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5) + term = np.sum(dist**2, axis=0)*dK2_dXdX2 + term -= 4*dist[dimX2]*dK_dX if dimX == dimX2: - g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2) + term -= 2*K + g = (lengthscaleinv[0]**3)*term return g def __getstate__(self): diff --git a/GPy/kern/src/standard_periodic.py b/GPy/kern/src/standard_periodic.py index e7b67239..c8b91563 100644 --- a/GPy/kern/src/standard_periodic.py +++ b/GPy/kern/src/standard_periodic.py @@ -122,7 +122,6 @@ class StdPeriodic(Kern): pass - def K(self, X, X2=None): """Compute the covariance matrix between X and X2.""" if X2 is None: @@ -133,13 +132,372 @@ class StdPeriodic(Kern): return self.variance * exp_dist - def Kdiag(self, X): """Compute the diagonal of the covariance matrix associated to X.""" ret = np.empty(X.shape[0]) ret[:] = self.variance return ret + def dK_dX(self, X, X2, dimX): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX] + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + dist = X[:,None,dimX] - X2[None,:,dimX] + base = np.pi*periodinv*dist + + return -F*np.sin(2*base)*self._clean_K(X, X2) + + def dK_dXdiag(self, X, dimX): + """ + Compute the derivative of K with respect to: + dimension dimX of set X. + + Returns only diagonal elements. + """ + return np.zeros(X.shape[0]) + + def dK_dX2(self, X, X2, dimX2): + """ + Compute the derivative of K with respect to: + dimension dimX2 of set X2. + """ + return -self._clean_dK_dX(X, X2, dimX2) + + def dK_dX2diag(self, X, dimX2): + """ + Compute the derivative of K with respect to: + dimension dimX2 of set X2. + + Returns only diagonal elements. + """ + return np.zeros(X.shape[0]) + + def dK2_dXdX2(self, X, X2, dimX, dimX2): + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX2] + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + dist = X[:,None,dimX2] - X2[None,:,dimX2] + base = np.pi*periodinv*dist + + term = np.sin(2*base)*self._clean_dK_dX(X, X2, dimX) + if dimX == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_K(X, X2) + return F*term + + def dK2_dXdX2diag(self, X, dimX, dimX2): + """ + Compute the second derivative of K with respect to: + dimension dimX of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements. + """ + if dimX == dimX2: + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX2] + return (np.pi**2)*(lengthscaleinv**2)*(periodinv**2)*self.variance*np.ones(X.shape[0]) + else: + return np.zeros(X.shape[0]) + + def dK2_dXdX(self, X, X2, dimX_0, dimX_1): + """ + Compute the second derivative of K with respect to: + dimension dimX_0 of set X, and + dimension dimX_1 of set X. + """ + return -self._clean_dK2_dXdX2(X, X2, dimX_0, dimX_1) + + def dK2_dXdXdiag(self, X, dimX_0, dimX_1): + """ + Compute the second derivative of K with respect to: + dimension dimX_0 of set X, and + dimension dimX_1 of set X. + + Returns only diagonal elements. + """ + return -self._clean_dK2_dXdX2diag(X, dimX_0, dimX_1) + + def dK3_dXdXdX2(self, X, X2, dimX_0, dimX_1, dimX2): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX2] + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + dist = X[:,None,dimX2] - X2[None,:,dimX2] + base = np.pi*periodinv*dist + + term = np.sin(2*base)*self._clean_dK2_dXdX(X, X2, dimX_0, dimX_1) + if dimX_0 == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_1) + if dimX_1 == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*self._clean_dK_dX(X, X2, dimX_0) + if dimX_0 == dimX_1 == dimX2: + term -= 4*(np.pi**2)*(periodinv**2)*np.sin(2*base)*self._clean_K(X, X2) + return F*term + + def dK3_dXdXdX2diag(self, X, dimX_0, dimX_1, dimX2): + """ + Compute the third derivative of K with respect to: + dimension dimX_0 of set X, + dimension dimX_1 of set X, and + dimension dimX2 of set X2. + + Returns only diagonal elements of the covariance matrix. + """ + return np.zeros(X.shape[0]) + + def dK_dvariance(self, X, X2): + """ + Compute the derivative of K with respect to variance. + """ + return self._clean_K(X, X2)/self.variance + + def dK_dlengthscale(self, X, X2): + """ + Compute the derivative(s) of K with respect to lengthscale(s). + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + periodinv = (np.ones(X.shape[1])/(self.period)) + + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + base = np.pi*periodinv[:,None,None]*dist + + K = self._clean_K(X, X2) + + if self.ARD2: + g = [] + for diml in range(self.input_dim): + g += [(lengthscaleinv[diml]**3)*np.square(np.sin(base[diml]))*K] + else: + g = (lengthscaleinv[0]**3)*np.sum(np.square(np.sin(base)), axis=0)*K + return g + + def dK_dperiod(self, X, X2): + """ + Compute the derivative(s) of K with respect to period(s). + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale)) + periodinv = (np.ones(X.shape[1])/(self.period)) + + dist = np.rollaxis(X[:,None,:] - X2[None,:,:], 2, 0) + base = np.pi*periodinv[:,None,None]*dist + + K = self._clean_K(X, X2) + + if self.ARD1: + g = [] + for diml in range(self.input_dim): + g += [0.5*base[diml]*(lengthscaleinv[diml]**2)*periodinv[diml]*np.sin(2*base[diml])*K] + else: + g = 0.5*periodinv[0]*np.sum(base*(lengthscaleinv**2)[:,None,None]*np.sin(2*base), axis=0)*K + return g + + def dK2_dvariancedX(self, X, X2, dimX): + """ + Compute the second derivative of K with respect to: + variance, and + dimension dimX of set X. + """ + return self._clean_dK_dX(X, X2, dimX)/self.variance + + def dK2_dvariancedX2(self, X, X2, dimX2): + """ + Compute the second derivative of K with respect to: + variance, and + dimension dimX2 of set X2. + """ + return -self.dK2_dvariancedX(X, X2, dimX2) + + def dK2_dlengthscaledX(self, X, X2, dimX): + """ + Compute the second derivative(s) of K with respect to: + lengthscale(s), and + dimension dimX of set X. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX] + + dist = X[:,None,dimX] - X2[None,:,dimX] + base = np.pi*periodinv*dist + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + K = self._clean_K(X, X2) + dK_dl = self.dK_dlengthscale(X, X2) + + if self.ARD2: + g = [] + for diml in range(self.input_dim): + term = dK_dl[diml] + if diml == dimX: + term -= 2*lengthscaleinv*K + g += [-F*np.sin(2*base)*term] + else: + g = -F*np.sin(2*base)*(dK_dl - 2*lengthscaleinv*K) + return g + + def dK2_dlengthscaledX2(self, X, X2, dimX2): + """ + Compute the second derivative(s) of K with respect to: + lengthscale(s), and + dimension dimX2 of set X2. + """ + dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX2) + if self.ARD2: + return [-1*g for g in dK2_dldX] + else: + return -1*dK2_dldX + + def dK2_dperioddX(self, X, X2, dimX): + """ + Compute the second derivative(s) of K with respect to: + period(s), and + dimension dimX of set X. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX] + + dist = X[:,None,dimX] - X2[None,:,dimX] + base = np.pi*periodinv*dist + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + K = self._clean_K(X, X2) + dK_dT = self.dK_dperiod(X, X2) + + if self.ARD1: + g = [] + for dimT in range(self.input_dim): + term = np.sin(2*base)*dK_dT[dimT] + if dimT == dimX: + term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K + g += [-F*term] + else: + term = np.sin(2*base)*dK_dT + term -= periodinv*(np.sin(2*base)+2*base*np.cos(2*base))*K + g = -F*term + return g + + def dK2_dperioddX2(self, X, X2, dimX2): + """ + Compute the second derivative(s) of K with respect to: + period(s), and + dimension dimX2 of set X2. + """ + dK2_dperioddX = self.dK2_dperioddX(X, X2, dimX2) + if self.ARD1: + return [-1*g for g in dK2_dperioddX] + else: + return -1*dK2_dperioddX + + def dK3_dvariancedXdX2(self, X, X2, dimX, dimX2): + """ + Compute the third derivative of K with respect to: + variance, + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + return self._clean_dK2_dXdX2(X, X2, dimX, dimX2)/self.variance + + def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2): + """ + Compute the third derivative(s) of K with respect to: + lengthscale(s), + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX2] + + dist = X[:,None,dimX2] - X2[None,:,dimX2] + base = np.pi*periodinv*dist + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2) + dK_dl = self.dK_dlengthscale(X, X2) + dK2_dldX = self.dK2_dlengthscaledX(X, X2, dimX) + + if self.ARD2: + g = [] + for diml in range(self.input_dim): + term = np.sin(2*base)*dK2_dldX[diml] + if dimX == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl[diml] + term *= F + if diml == dimX2: + term -= 2*lengthscaleinv*dK2_dXdX2 + g += [term] + else: + term = np.sin(2*base)*dK2_dldX + if dimX == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*dK_dl + term *= F + term -= 2*lengthscaleinv*dK2_dXdX2 + g = term + return g + + def dK3_dperioddXdX2(self, X, X2, dimX, dimX2): + """ + Compute the third derivative(s) of K with respect to: + period(s), + dimension dimX of set X, and + dimension dimX2 of set X2. + """ + lengthscaleinv = (np.ones(X.shape[1])/(self.lengthscale))[dimX2] + periodinv = (np.ones(X.shape[1])/(self.period))[dimX2] + + dist = X[:,None,dimX2] - X2[None,:,dimX2] + base = np.pi*periodinv*dist + + F = 0.5*np.pi*(lengthscaleinv**2)*periodinv # multiplicative factor + + K = self._clean_K(X, X2) + dK_dX = self._clean_dK_dX(X, X2, dimX) + dK2_dXdX2 = self._clean_dK2_dXdX2(X, X2, dimX, dimX2) + dK_dT = self.dK_dperiod(X, X2) + dK2_dTdX = self.dK2_dperioddX(X, X2, dimX) + + if self.ARD1: + g = [] + for dimT in range(self.input_dim): + term = np.sin(2*base)*dK2_dTdX[dimT] + if dimT == dimX2: + term -= 2*periodinv*np.cos(2*base)*base*dK_dX + if dimX == dimX2: + term += 2*np.pi*periodinv*np.cos(2*base)*dK_dT[dimT] + if dimX == dimX2 == dimT: + term += 2*np.pi*(periodinv**2)*(2*base*np.sin(2*base)-np.cos(2*base))*K + term *= F + if dimT == dimX2: + term -= periodinv*dK2_dXdX2 + g += [term] + else: + term = np.sin(2*base)*dK2_dTdX-2*periodinv*base*np.cos(2*base)*dK_dX + if dimX == dimX2: + term += 2*np.pi*periodinv*(np.cos(2*base)*dK_dT+periodinv*(2*base*np.sin(2*base)-np.cos(2*base))*K) + g = F*term-periodinv*dK2_dXdX2 + return g + def update_gradients_full(self, dL_dK, X, X2=None): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: @@ -167,12 +525,52 @@ class StdPeriodic(Kern): else: # same lengthscales self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK) + def update_gradients_direct(self, dL_dVar, dL_dPer, dL_dLen): + self.variance.gradient = dL_dVar + self.period.gradient = dL_dPer + self.lengthscale.gradient = dL_dLen + + def reset_gradients(self): + self.variance.gradient = 0. + if not self.ARD1: + self.period.gradient = 0. + else: + self.period.gradient = np.zeros(self.input_dim) + if not self.ARD2: + self.lengthscale.gradient = 0. + else: + self.lengthscale.gradient = np.zeros(self.input_dim) + def update_gradients_diag(self, dL_dKdiag, X): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" self.variance.gradient = np.sum(dL_dKdiag) self.period.gradient = 0 self.lengthscale.gradient = 0 + def dgradients(self, X, X2): + g1 = self.dK_dvariance(X, X2) + g2 = self.dK_dperiod(X, X2) + g3 = self.dK_dlengthscale(X, X2) + return [g1, g2, g3] + + def dgradients_dX(self, X, X2, dimX): + g1 = self.dK2_dvariancedX(X, X2, dimX) + g2 = self.dK2_dperioddX(X, X2, dimX) + g3 = self.dK2_dlengthscaledX(X, X2, dimX) + return [g1, g2, g3] + + def dgradients_dX2(self, X, X2, dimX2): + g1 = self.dK2_dvariancedX2(X, X2, dimX2) + g2 = self.dK2_dperioddX2(X, X2, dimX2) + g3 = self.dK2_dlengthscaledX2(X, X2, dimX2) + return [g1, g2, g3] + + def dgradients2_dXdX2(self, X, X2, dimX, dimX2): + g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2) + g2 = self.dK3_dperioddXdX2(X, X2, dimX, dimX2) + g3 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2) + return [g1, g2, g3] + def gradients_X(self, dL_dK, X, X2=None): K = self.K(X, X2) if X2 is None: @@ -185,4 +583,4 @@ class StdPeriodic(Kern): return np.zeros(X.shape) def input_sensitivity(self, summarize=True): - return self.variance*np.ones(self.input_dim)/self.lengthscale**2 \ No newline at end of file + return self.variance*np.ones(self.input_dim)/self.lengthscale**2 diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 53bb98c2..e3931008 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -306,7 +306,12 @@ class Stationary(Kern): l4 = np.ones(X.shape[1])*self.lengthscale**2 return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],)) #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) - + + def dgradients(self, X, X2): + g1 = self.dK_dvariance(X, X2) + g2 = self.dK_dlengthscale(X, X2) + return [g1, g2] + def dgradients_dX(self, X, X2, dimX): g1 = self.dK2_dvariancedX(X, X2, dimX) g2 = self.dK2_dlengthscaledX(X, X2, dimX) diff --git a/GPy/models/multioutput_gp.py b/GPy/models/multioutput_gp.py index 2c45a0c2..98f2c4c0 100644 --- a/GPy/models/multioutput_gp.py +++ b/GPy/models/multioutput_gp.py @@ -9,6 +9,7 @@ from ..core.mapping import Mapping from .. import likelihoods from ..likelihoods.gaussian import Gaussian from .. import kern +from ..kern import DiffKern from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..util.normalizer import Standardize from .. import util @@ -69,39 +70,80 @@ class MultioutputGP(GP): if Y_metadata is None: Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood) - - def predictive_gradients(self, Xnew, kern=None): - if isinstance(Xnew, list): - Xnew, _, ind = util.multioutput.build_XY(Xnew, None) - #if Y_metadata is None: - #Y_metadata={'output_index': ind} - return super(MultioutputGP, self).predictive_gradients(Xnew, kern) - def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!! + def predictive_gradients(self, Xnew, kern=None): """ - Compute the derivatives of the predicted latent function with respect to X* + Compute the derivatives of the predicted latent function with respect + to X* + Given a set of points at which to predict X* (size [N*,Q]), compute the derivatives of the mean and variance. Resulting arrays are sized: - dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). - Note that this is not the same as computing the mean and variance of the derivative of the function! + dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP + (usually one). + + Note that this is not the same as computing the mean and variance of + the derivative of the function! + dv_dX* -- [N*, Q], (since all outputs have the same variance) :param X: The points at which to get the predictive gradients :type X: np.ndarray (Xnew x self.input_dim) :returns: dmu_dX, dv_dX :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ] + """ if isinstance(Xnew, list): Xnew, _, ind = util.multioutput.build_XY(Xnew, None) slices = index_to_slices(Xnew[:,-1]) - - for i in range(len(slices)): - if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0): - assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)" - + if kern is None: kern = self.kern + + if all([(isinstance(k, DiffKern)) for k in self.kern.kern[1:]]): + """ + Compute the gradients of the predicted latent function and predicted + partial derivatives with respect to X*. + + This works only for models that observe the gradient of the latent function. + + Xnew is given as a list of arrays, where each array X*_i (size [N_i*, Q]) + contains points at which to compute gradients for each predicted latent + function or partial derivative. + + Resulting arrays are sized [sum_i^D : N_i*, Q] + + Passing a list of only one array [X*] returns only gradients of + the predicted latent function and does not compute gradients of + predicted partial derivatives. + + In this case the resulting arrays are sized [N*, Q]. + + :param Xnew: points at which to compute predictive gradients + :type Xnew: list + :type Xnew[i]: np.darray (sum_i^D : N_i*, Q) + :returns: dmu_dX, dv_dX + :rtype: (np.ndarray (sum_i^D : N_i*, Q), np.ndarray (sum_i^D : N_i*, Q)) + """ + + dims = Xnew.shape[1] - 1 + + mean_jac = np.empty((Xnew.shape[0], dims)) + var_jac = np.empty((Xnew.shape[0], dims)) + + X = self._predictive_variable + alpha = self.posterior.woodbury_vector + Wi = self.posterior.woodbury_inv + + k = kern.K(Xnew, X) + for dimX in range(dims): + dk_dx = kern.dK_dX(Xnew, X, dimX) + dk_dxdiag = kern.dK_dXdiag(Xnew, dimX) + + mean_jac[:,dimX] = np.dot(dk_dx, alpha).flatten() + var_jac[:,dimX] = dk_dxdiag - 2*(np.dot(k, Wi)*dk_dx).sum(-1) + return mean_jac, var_jac + mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim)) for i in range(self.output_dim): mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1] diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index a241fc9c..bc2005be 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -5,6 +5,8 @@ from __future__ import division import unittest import numpy as np import GPy +from GPy.models import GradientChecker +from functools import reduce class MiscTests(unittest.TestCase): def setUp(self): @@ -1208,40 +1210,6 @@ class GradientTests(np.testing.TestCase): with self.assertRaises(RuntimeError): m._raw_posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) - - def test_multioutput_model_with_derivative_observations(self): - f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 - fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 - N=10 - M=10 - sigma=0.05 - sigmader=0.05 - x = np.array([np.linspace(1,10,N)]).T - y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) - - xd = np.array([np.linspace(2,8,M)]).T - yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1))) - - # squared exponential kernel: - se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) - # We need to generate separate kernel for the derivative observations and give the created kernel as an input: - se_der = GPy.kern.DiffKern(se, 0) - - #Then - gauss = GPy.likelihoods.Gaussian(variance=sigma**2) - gauss = GPy.likelihoods.Gaussian(variance=0.1) - gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2) - - # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs - # Now we have the regular observations first and derivative observations second, meaning that the kernels and - # the likelihoods must follow the same order - m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss]) - m.randomize() - self.assertTrue(m.checkgrad()) - - m.optimize(messages=0, ipython_notebook=False) - - self.assertTrue(m.checkgrad()) def test_multioutput_model_with_ep(self): f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 @@ -1308,6 +1276,173 @@ class GradientTests(np.testing.TestCase): c2 = model.predict(x, full_cov=True)[1] np.testing.assert_allclose(c1,c2) +class GradientMultioutputGPModelTests(np.testing.TestCase): + def setUp(self): + + # standard test function + self.period = 3 + self.w = 2*np.pi/self.period + self.f = lambda x: np.sum(np.square(np.sin(self.w*x)), axis=1) + self.df = lambda x: self.w*np.sin(2*self.w*x) + + self.noise_std = 1e-2 + + self.bounds = (-self.period, self.period) + + self.train_points = 5 + self.test_points = 25 + + def approximate_predictive_gradients(self, model, x_test, D, step=1e-6): + ''' + Approximates gradients of predicted posterior means and variances. + + This function is used as the frameworks for GradientChecker and + MultioutputGP do not easily combine when checking gradients of predicted + partial derivative posteriors. + ''' + + dmdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D)) + dvdx_aprx = np.zeros((x_test.shape[0]*(D + 1), D)) + + for d in range(D): + + x_over = x_test.copy() + x_over[:,d] += step + x_undr = x_test.copy() + x_undr[:,d] -= step + + m_over, v_over = model.predict([x_over]*(D + 1)) + m_undr, v_undr = model.predict([x_undr]*(D + 1)) + + dmdx_aprx[:,d,None] = (m_over - m_undr)/(2*step) + dvdx_aprx[:,d,None] = (v_over - v_undr)/(2*step) + + return dmdx_aprx, dvdx_aprx + + def check_model(self, kern): + ''' + Checks predictions, hyperparameter gradients, and gradients of predicted + posterior means and variances for MultioutputGP models that incorporate + observed latent function gradient information. + ''' + + D = kern.input_dim + + X_list = [] + Y_list = [] + for i in range(D + 1): + # sample inputs for either latent function or partial derivatives + X_i = np.random.uniform(*self.bounds, size=(self.train_points, D)) + # output of latent function or partial derivatives + Y_i = (self.f(X_i) if (i == 0) else self.df(X_i)[:,i - 1])[:,None] + # noisy observations + Y_i += np.random.normal(scale=self.noise_std, size=Y_i.shape) + + X_list.append(X_i) + Y_list.append(Y_i) + + # the kernel is accompanied with derivative kernels, one for each dimension + kernel_list = [kern] + [GPy.kern.DiffKern(kern, d) for d in range(D)] + + # create model and check its hyperparameter gradient + likelihood_list = [GPy.likelihoods.Gaussian(variance=self.noise_std**2)]*(D + 1) + model = GPy.models.MultioutputGP(X_list, Y_list, kernel_list, likelihood_list) + model.likelihood.constrain_fixed() + self.assertTrue(model.checkgrad(step=1e-3)) + + # optimize the model, and check its hyperparameter gradient again + model.optimize() + self.assertTrue(model.checkgrad(step=1e-3)) + + # check predictions + np.testing.assert_allclose(model.predict(X_list)[0], model.Y, atol=3*self.noise_std) + + # test inputs for checking predictive gradients + x_test = np.random.uniform(*self.bounds, size=(self.test_points, D)) + + # predictive gradients + dmdx, dvdx = model.predictive_gradients([x_test]*(D + 1)) + # approximated predictive gradients + dmdx_aprx, dvdx_aprx = self.approximate_predictive_gradients(model, x_test, D, step=1e-3) + # check predictive gradients + np.testing.assert_allclose(dmdx, dmdx_aprx, atol=3*self.noise_std) + np.testing.assert_allclose(dvdx, dvdx_aprx, atol=3*self.noise_std) + + def test_MultioutputGP_gradobs_RBF(self): + ''' + Testing gradient observing MultioutputGP model with an RBF kernel. + ''' + for D in range(1, 4): + kern = GPy.kern.RBF(input_dim=D) + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_RBF_ARD(self): + ''' + Testing gradient observing MultioutputGP model with an RBF (ARD) kernel. + ''' + for D in range(1, 4): + kern = GPy.kern.RBF(input_dim=D, ARD=True) + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_StdP(self): + ''' + Testing gradient observing MultioutputGP model with a StdP kernel. + ''' + for D in range(1, 4): + kern = GPy.kern.StdPeriodic(input_dim=D, period=self.period) + kern.period.constrain_fixed() + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_StdP_ARD(self): + ''' + Testing gradient observing MultioutputGP model with a StdP (ARD) kernel. + ''' + for D in range(1, 4): + kern = GPy.kern.StdPeriodic(input_dim=D, period=[self.period]*D, ARD1=True, ARD2=True) + kern.period.constrain_fixed() + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_prod_RBF(self): + ''' + Testing gradient observing MultioutputGP model with several RBF kernels. + ''' + for D in range(2, 4): + kerns = [GPy.kern.RBF(input_dim=1) for d in range(D)] + kern = reduce(lambda k0, k1: k0 * k1, kerns) + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_prod_StdP(self): + ''' + Testing gradient observing MultioutputGP model with several StdP kernels. + ''' + for D in range(2, 4): + kerns = [GPy.kern.StdPeriodic(input_dim=1, period=self.period) for d in range(D)] + kern = reduce(lambda k0, k1: k0 * k1, kerns) + [k.period.constrain_fixed() for k in kern.parts] + kern.randomize() + self.check_model(kern) + + def test_MultioutputGP_gradobs_prod_mix(self): + ''' + Testing gradient observing MultioutputGP model with a mix of kernel types. + ''' + for D in range(2, 4): + kerns = [] + for d in range(D): + if d % 2 == 0: + k = GPy.kern.RBF(input_dim=1) + else: + k = GPy.kern.StdPeriodic(input_dim=1, period=self.period) + k.period.constrain_fixed() + kerns.append(k) + kern = reduce(lambda k0, k1: k0 * k1, kerns) + kern.randomize() + self.check_model(kern) def _create_missing_data_model(kernel, Q): D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3