[gradsxx] putting tests in, not complete yet!

This commit is contained in:
Max Zwiessele 2016-06-07 09:24:38 +01:00
parent 4e833a4f3a
commit a3f458926b
4 changed files with 72 additions and 84 deletions

View file

@ -355,7 +355,7 @@ class GP(Model):
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the cross-covariance terms between
:param boolean full_cov: whether to return the cross-covariance terms between
the N* Jacobian vectors
:returns: dmu_dX, dv_dX
@ -377,9 +377,10 @@ class GP(Model):
if full_cov:
dK2_dXdX = kern.gradients_XX(one, Xnew)
else:
dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
for i in range(Xnew.shape[0]):
dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
#for i in range(Xnew.shape[0]):
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
def compute_cov_inner(wi):
if full_cov:
@ -424,7 +425,7 @@ class GP(Model):
Sigma = var_jac.sum(-1)
else:
Sigma = self.output_dim*var_jac
G = 0.
if mean:
G += mumuT

View file

@ -128,34 +128,25 @@ def _slice_gradients_X_diag(f):
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None, cov=True):
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
Q1 = Q2 = X.shape[1]
else:
N, M = X.shape[0], X2.shape[0]
Q1, Q2 = X.shape[1], X2.shape[1]
if cov: # full covariance
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
else: # diagonal covariance
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_XX_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X, cov=True):
def wrap(self, dL_dKdiag, X):
N, Q = X.shape
if cov: # full covariance
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
else: # diagonal covariance
with _Slice_wrap(self, X, None, ret_shape=(N, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap

View file

@ -218,13 +218,13 @@ class Stationary(Kern):
else:
return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
def gradients_XX(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
or vectors (computationally more efficient if the full covariance matrix is not needed)
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
@ -242,45 +242,36 @@ class Stationary(Kern):
dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later
tmp2 = dL_drdr*invdist2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[invdist2==0.] -= self.variance
if cov: # full covariance
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
dist = X[:,None,:] - X2[None,:,:]
dist = (dist[:,:,:,None]*dist[:,:,None,:])
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
else: # Diagonal covariance, old code
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
#grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
dist = X[:,None,:] - X2[None,:,:]
dist = (dist[:,:,:,None]*dist[:,:,None,:])
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
return grad
def gradients_XX_diag(self, d2L_dK, X, cov=True):
def gradients_XX_diag(self, dL_dK_diag, X):
"""
Given the derivative of the objective d2L_dK, compute the second derivative of K wrt X:
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
..math:
\frac{\partial^2 K}{\partial X\partial X}
..returns:
dL2_dXdX: [NxQ], for X [NxQ] if cov is False, [NxQxQ] if cov is True
dL2_dXdX: [NxQxQ]
"""
if cov:
tmp = np.ones(X.shape+(X.shape[1],))
return tmp * (d2L_dK * self.variance/self.lengthscale**2)[:,None,None]# np.zeros(X.shape+(X.shape[1],))
return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
l2 = np.ones(X.shape[1])*self.lengthscale**2
return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)

View file

@ -104,37 +104,42 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def parameters_changed(self):
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
class Kern_check_d2K_dXdX_cov(Kern_check_model):
class Kern_check_d2K_dXdX(Kern_check_model):
"""This class allows gradient checks for the secondderivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.X = Param('X',X)
self.X = Param('X',X.copy())
self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self):
if self.X2 is None:
return self.kernel.gradients_X(self.dL_dK, self.X, self.Xc).sum()
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum()
def parameters_changed(self):
#if self.kernel.name == 'rbf':
# import ipdb;ipdb.set_trace()
if self.X2 is None: X2 = self.X
else: X2 = self.X2
grads = self.kernel.gradients_XX(self.dL_dK.T, X2, self.X, cov=True)
self.X.gradient[:] = grads.sum(-1).sum(0)
if self.X2 is None:
grads = -self.kernel.gradients_XX(self.dL_dK, self.X).sum(1).sum(1)
else:
grads = -self.kernel.gradients_XX(self.dL_dK.T, self.X2, self.X).sum(0).sum(1)
self.X.gradient[:] = grads
class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model):
class Kern_check_d2Kdiag_dXdX(Kern_check_model):
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X)
self.X = Param('X',X)
self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self):
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(),self.X))
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
def parameters_changed(self):
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X, cov=True)
self.X.gradient[:] = grads.sum(-1)
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
self.X.gradient[:] = grads.sum(-1)
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
"""
@ -273,29 +278,9 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
return False
if verbose:
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
print("Checking gradients of dK(X, X2) wrt X2 with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose:
print("Checking gradients of dK(X, X2) wrt X with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=X2)
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
@ -312,10 +297,30 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
pass_checks = False
return False
if verbose:
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose:
print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions")
try:
testmodel = Kern_check_d2Kdiag_dXdX_cov(kern, X=X, X2=None)
testmodel = Kern_check_d2Kdiag_dXdX(kern, X=X)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)