Update function kern.gradients_XX() to compute cross-covariance terms

This commit is contained in:
alessandratosi 2016-04-20 12:09:25 +01:00
parent 1a3e6c3ea3
commit a9c8ef817a
4 changed files with 69 additions and 42 deletions

View file

@ -85,13 +85,20 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target return target
def gradients_XX(self, dL_dK, X, X2): # def gradients_XX(self, dL_dK, X, X2, cov=True):
if X2 is None: # if cov==True: # full covarance
target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) # if X2 is None:
else: # target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) # else:
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
return target # else: # diagonal covariance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
# [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=True)) for p in self.parts]
# return target
def gradients_XX_diag(self, dL_dKdiag, X): def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)

View file

@ -132,7 +132,7 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2): def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X) return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2): def gradients_XX(self, dL_dK, X, X2, cov='False'):
""" """
.. math:: .. math::

View file

@ -24,7 +24,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX) # put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
@ -112,18 +112,23 @@ def _slice_gradients_X(f):
return ret return ret
return wrap return wrap
def _slice_gradients_XX(f): # def _slice_gradients_XX(f):
@wraps(f) # @wraps(f)
def wrap(self, dL_dK, X, X2=None): # def wrap(self, dL_dK, X, X2=None, cov=True):
if X2 is None: # if X2 is None:
N, M = X.shape[0], X.shape[0] # N, M = X.shape[0], X.shape[0]
else: # else:
N, M = X.shape[0], X2.shape[0] # N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s: # if cov==True: # full covariance
#with _Slice_wrap(self, X, X2, ret_shape=None) as s: # with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) # #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
return ret # ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return wrap # else: # diagonal covariance
# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
# #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
# return ret
# return wrap
def _slice_gradients_X_diag(f): def _slice_gradients_X_diag(f):
@wraps(f) @wraps(f)

View file

@ -218,45 +218,60 @@ class Stationary(Kern):
else: else:
return self._gradients_X_pure(dL_dK, X, X2) return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None): def gradients_XX(self, dL_dK, X, X2=None, cov=True):
""" """
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
cov = Full: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
cov = Diag: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
or vectors (computationally more efficient if the full covariance matrix is not needed)
..math: ..math:
\frac{\partial^2 K}{\partial X\partial X2} \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns: ..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) dL2_dXdX2: [NxMxQ] in the cov=Diag case, or [NxMxQxQ] in the cov=full case,
Thus, we return the second derivative in X2. for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
""" """
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r: # According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2: # d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
invdist2 = invdist**2 invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) # * dL_dK we perofrm this product later
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp1 = dL_dr * invdist tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) # * dL_dK we perofrm this product later
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2 tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
l2 = np.ones(X.shape[1]) * self.lengthscale**2 print ['l2',l2]
if X2 is None: if X2 is None:
X2 = X X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance tmp1 -= np.eye(X.shape[0])*self.variance
else: else:
tmp1[X==X2.T] -= self.variance #tmp1[X==X2.T] -= self.variance # Old version, to be removed
# (seems to have a bug: it is subtracted to the first X1 anyway)
tmp1[invdist2==0.] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) if cov==True: # full covariance
#grad = np.empty(X.shape, dtype=np.float64) grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
for q in range(self.input_dim): for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 for r in range(self.input_dim):
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] if r==q:
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) else:
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q])
else:
# Diagonal covariance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad return grad
def gradients_XX_diag(self, dL_dK, X): def gradients_XX_diag(self, dL_dK, X):