Update function kern.gradients_XX() to compute cross-covariance terms

This commit is contained in:
alessandratosi 2016-04-20 12:09:25 +01:00
parent 1a3e6c3ea3
commit a9c8ef817a
4 changed files with 69 additions and 42 deletions

View file

@ -85,13 +85,20 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
# def gradients_XX(self, dL_dK, X, X2, cov=True):
# if cov==True: # full covarance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
# else: # diagonal covariance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
# [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=True)) for p in self.parts]
# return target
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)

View file

@ -132,7 +132,7 @@ class Kern(Parameterized):
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
def gradients_XX(self, dL_dK, X, X2, cov='False'):
"""
.. math::

View file

@ -24,7 +24,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
# put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
@ -112,18 +112,23 @@ def _slice_gradients_X(f):
return ret
return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
# def _slice_gradients_XX(f):
# @wraps(f)
# def wrap(self, dL_dK, X, X2=None, cov=True):
# if X2 is None:
# N, M = X.shape[0], X.shape[0]
# else:
# N, M = X.shape[0], X2.shape[0]
# if cov==True: # full covariance
# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1], X.shape[1])) as s:
# #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
# else: # diagonal covariance
# with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
# #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
# ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
# return ret
# return wrap
def _slice_gradients_X_diag(f):
@wraps(f)

View file

@ -218,45 +218,60 @@ class Stationary(Kern):
else:
return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None):
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
cov = Full: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
cov = Diag: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
or vectors (computationally more efficient if the full covariance matrix is not needed)
..math:
\frac{\partial^2 K}{\partial X\partial X2}
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
dL2_dXdX2: [NxMxQ] in the cov=Diag case, or [NxMxQxQ] in the cov=full case,
for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
dL_dr = self.dK_dr_via_X(X, X2) # * dL_dK we perofrm this product later
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
dL_drdr = self.dK2_drdr_via_X(X, X2) # * dL_dK we perofrm this product later
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
print ['l2',l2]
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
#tmp1[X==X2.T] -= self.variance # Old version, to be removed
# (seems to have a bug: it is subtracted to the first X1 anyway)
tmp1[invdist2==0.] -= self.variance
if cov==True: # full covariance
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
for q in range(self.input_dim):
for r in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
if r==q:
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q])
else:
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q])
else:
# Diagonal covariance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad
def gradients_XX_diag(self, dL_dK, X):