mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-12 05:22:38 +02:00
Update function kern.gradients_XX() to compute cross-covariance terms
This commit is contained in:
parent
1a3e6c3ea3
commit
a9c8ef817a
4 changed files with 69 additions and 42 deletions
|
|
@ -85,13 +85,20 @@ class Add(CombinationKernel):
|
|||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
|
||||
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
# def gradients_XX(self, dL_dK, X, X2, cov=True):
|
||||
# if cov==True: # full covarance
|
||||
# if X2 is None:
|
||||
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
|
||||
# else:
|
||||
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
|
||||
# else: # diagonal covariance
|
||||
# if X2 is None:
|
||||
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
|
||||
# else:
|
||||
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
|
||||
|
||||
# [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=True)) for p in self.parts]
|
||||
# return target
|
||||
|
||||
def gradients_XX_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ class Kern(Parameterized):
|
|||
raise NotImplementedError
|
||||
def gradients_X_X2(self, dL_dK, X, X2):
|
||||
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
def gradients_XX(self, dL_dK, X, X2, cov='False'):
|
||||
"""
|
||||
.. math::
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
|||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||
# put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
|
||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||
|
||||
|
|
@ -112,18 +112,23 @@ def _slice_gradients_X(f):
|
|||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_XX(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
N, M = X.shape[0], X.shape[0]
|
||||
else:
|
||||
N, M = X.shape[0], X2.shape[0]
|
||||
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
|
||||
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
# def _slice_gradients_XX(f):
|
||||
# @wraps(f)
|
||||
# def wrap(self, dL_dK, X, X2=None, cov=True):
|
||||
# if X2 is None:
|
||||
# N, M = X.shape[0], X.shape[0]
|
||||
# else:
|
||||
# N, M = X.shape[0], X2.shape[0]
|
||||
# if cov==True: # full covariance
|
||||
# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s:
|
||||
# #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
# else: # diagonal covariance
|
||||
# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
|
||||
# #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
# return ret
|
||||
# return wrap
|
||||
|
||||
def _slice_gradients_X_diag(f):
|
||||
@wraps(f)
|
||||
|
|
|
|||
|
|
@ -218,45 +218,60 @@ class Stationary(Kern):
|
|||
else:
|
||||
return self._gradients_X_pure(dL_dK, X, X2)
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2=None):
|
||||
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
|
||||
"""
|
||||
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||
|
||||
cov = Full: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
|
||||
cov = Diag: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
|
||||
or vectors (computationally more efficient if the full covariance matrix is not needed)
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X2}
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||
Thus, we return the second derivative in X2.
|
||||
dL2_dXdX2: [NxMxQ] in the cov=Diag case, or [NxMxQxQ] in the cov=full case,
|
||||
for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||
Thus, we return the second derivative in X2.
|
||||
"""
|
||||
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
|
||||
# According to multivariable chain rule, we can chain the second derivative through r:
|
||||
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
|
||||
invdist = self._inv_dist(X, X2)
|
||||
invdist2 = invdist**2
|
||||
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
dL_dr = self.dK_dr_via_X(X, X2) # * dL_dK we perofrm this product later
|
||||
tmp1 = dL_dr * invdist
|
||||
|
||||
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
|
||||
dL_drdr = self.dK2_drdr_via_X(X, X2) # * dL_dK we perofrm this product later
|
||||
tmp2 = dL_drdr * invdist2
|
||||
|
||||
l2 = np.ones(X.shape[1]) * self.lengthscale**2
|
||||
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
|
||||
print ['l2',l2]
|
||||
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
tmp1 -= np.eye(X.shape[0])*self.variance
|
||||
else:
|
||||
tmp1[X==X2.T] -= self.variance
|
||||
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||
#grad = np.empty(X.shape, dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
|
||||
#tmp1[X==X2.T] -= self.variance # Old version, to be removed
|
||||
# (seems to have a bug: it is subtracted to the first X1 anyway)
|
||||
tmp1[invdist2==0.] -= self.variance
|
||||
|
||||
if cov==True: # full covariance
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
for r in range(self.input_dim):
|
||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
|
||||
if r==q:
|
||||
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q])
|
||||
else:
|
||||
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q])
|
||||
else:
|
||||
# Diagonal covariance
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||
#grad = np.empty(X.shape, dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
|
||||
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
|
||||
return grad
|
||||
|
||||
def gradients_XX_diag(self, dL_dK, X):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue