[gradsxx] putting tests in, not complete yet!

This commit is contained in:
Max Zwiessele 2016-06-07 09:24:38 +01:00
parent 4e833a4f3a
commit a3f458926b
4 changed files with 72 additions and 84 deletions

View file

@ -355,7 +355,7 @@ class GP(Model):
:param X: The points at which to get the predictive gradients. :param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim) :type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for. :param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the cross-covariance terms between :param boolean full_cov: whether to return the cross-covariance terms between
the N* Jacobian vectors the N* Jacobian vectors
:returns: dmu_dX, dv_dX :returns: dmu_dX, dv_dX
@ -377,9 +377,10 @@ class GP(Model):
if full_cov: if full_cov:
dK2_dXdX = kern.gradients_XX(one, Xnew) dK2_dXdX = kern.gradients_XX(one, Xnew)
else: else:
dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1])) dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
for i in range(Xnew.shape[0]): #dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:]) #for i in range(Xnew.shape[0]):
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
def compute_cov_inner(wi): def compute_cov_inner(wi):
if full_cov: if full_cov:
@ -424,7 +425,7 @@ class GP(Model):
Sigma = var_jac.sum(-1) Sigma = var_jac.sum(-1)
else: else:
Sigma = self.output_dim*var_jac Sigma = self.output_dim*var_jac
G = 0. G = 0.
if mean: if mean:
G += mumuT G += mumuT

View file

@ -128,34 +128,25 @@ def _slice_gradients_X_diag(f):
def _slice_gradients_XX(f): def _slice_gradients_XX(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dK, X, X2=None, cov=True): def wrap(self, dL_dK, X, X2=None):
if X2 is None: if X2 is None:
N, M = X.shape[0], X.shape[0] N, M = X.shape[0], X.shape[0]
Q1 = Q2 = X.shape[1] Q1 = Q2 = X.shape[1]
else: else:
N, M = X.shape[0], X2.shape[0] N, M = X.shape[0], X2.shape[0]
Q1, Q2 = X.shape[1], X2.shape[1] Q1, Q2 = X.shape[1], X2.shape[1]
if cov: # full covariance #with _Slice_wrap(self, X, X2, ret_shape=None) as s:
#with _Slice_wrap(self, X, X2, ret_shape=None) as s: with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s: ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
else: # diagonal covariance
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
return ret return ret
return wrap return wrap
def _slice_gradients_XX_diag(f): def _slice_gradients_XX_diag(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dKdiag, X, cov=True): def wrap(self, dL_dKdiag, X):
N, Q = X.shape N, Q = X.shape
if cov: # full covariance with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s: ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
else: # diagonal covariance
with _Slice_wrap(self, X, None, ret_shape=(N, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
return ret return ret
return wrap return wrap

View file

@ -218,13 +218,13 @@ class Stationary(Kern):
else: else:
return self._gradients_X_pure(dL_dK, X, X2) return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None, cov=True): def gradients_XX(self, dL_dK, X, X2=None):
""" """
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair the returned array is of shape [NxNxQxQ].
or vectors (computationally more efficient if the full covariance matrix is not needed)
..math: ..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2} \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
@ -242,45 +242,36 @@ class Stationary(Kern):
dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later
tmp2 = dL_drdr*invdist2 tmp2 = dL_drdr*invdist2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2) l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
if X2 is None: if X2 is None:
X2 = X X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance tmp1 -= np.eye(X.shape[0])*self.variance
else: else:
tmp1[invdist2==0.] -= self.variance tmp1[invdist2==0.] -= self.variance
if cov: # full covariance #grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64) dist = X[:,None,:] - X2[None,:,:]
dist = X[:,None,:] - X2[None,:,:] dist = (dist[:,:,:,None]*dist[:,:,None,:])
dist = (dist[:,:,:,None]*dist[:,:,None,:]) I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1])) grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
else: # Diagonal covariance, old code
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad return grad
def gradients_XX_diag(self, d2L_dK, X, cov=True): def gradients_XX_diag(self, dL_dK_diag, X):
""" """
Given the derivative of the objective d2L_dK, compute the second derivative of K wrt X: Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
..math: ..math:
\frac{\partial^2 K}{\partial X\partial X} \frac{\partial^2 K}{\partial X\partial X}
..returns: ..returns:
dL2_dXdX: [NxQ], for X [NxQ] if cov is False, [NxQxQ] if cov is True dL2_dXdX: [NxQxQ]
""" """
if cov: dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
tmp = np.ones(X.shape+(X.shape[1],)) assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
return tmp * (d2L_dK * self.variance/self.lengthscale**2)[:,None,None]# np.zeros(X.shape+(X.shape[1],))
return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) l2 = np.ones(X.shape[1])*self.lengthscale**2
return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None): def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)

View file

@ -104,37 +104,42 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def parameters_changed(self): def parameters_changed(self):
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
class Kern_check_d2K_dXdX_cov(Kern_check_model): class Kern_check_d2K_dXdX(Kern_check_model):
"""This class allows gradient checks for the secondderivative of a kernel with respect to X. """ """This class allows gradient checks for the secondderivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.X = Param('X',X) self.X = Param('X',X.copy())
self.link_parameter(self.X) self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self): def log_likelihood(self):
if self.X2 is None:
return self.kernel.gradients_X(self.dL_dK, self.X, self.Xc).sum()
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum() return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum()
def parameters_changed(self): def parameters_changed(self):
#if self.kernel.name == 'rbf': #if self.kernel.name == 'rbf':
# import ipdb;ipdb.set_trace() # import ipdb;ipdb.set_trace()
if self.X2 is None: X2 = self.X if self.X2 is None:
else: X2 = self.X2 grads = -self.kernel.gradients_XX(self.dL_dK, self.X).sum(1).sum(1)
grads = self.kernel.gradients_XX(self.dL_dK.T, X2, self.X, cov=True) else:
self.X.gradient[:] = grads.sum(-1).sum(0) grads = -self.kernel.gradients_XX(self.dL_dK.T, self.X2, self.X).sum(0).sum(1)
self.X.gradient[:] = grads
class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model): class Kern_check_d2Kdiag_dXdX(Kern_check_model):
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """ """This class allows gradient checks for the second derivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X)
self.X = Param('X',X) self.X = Param('X',X)
self.link_parameter(self.X) self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self): def log_likelihood(self):
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(),self.X)) return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
def parameters_changed(self): def parameters_changed(self):
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X, cov=True) grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
self.X.gradient[:] = grads.sum(-1) self.X.gradient[:] = grads.sum(-1)
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None): def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
""" """
@ -273,29 +278,9 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
return False return False
if verbose: if verbose:
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions") print("Checking gradients of dK(X, X2) wrt X2 with full cov in dimensions")
try: try:
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=None) testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose:
print("Checking gradients of dK(X, X2) wrt X with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=X2)
if fixed_X_dims is not None: if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix() testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose) result = testmodel.checkgrad(verbose=verbose)
@ -312,10 +297,30 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
pass_checks = False pass_checks = False
return False return False
if verbose:
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose: if verbose:
print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions") print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions")
try: try:
testmodel = Kern_check_d2Kdiag_dXdX_cov(kern, X=X, X2=None) testmodel = Kern_check_d2Kdiag_dXdX(kern, X=X)
if fixed_X_dims is not None: if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix() testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose) result = testmodel.checkgrad(verbose=verbose)