mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-12 13:32:39 +02:00
[gradsxx] putting tests in, not complete yet!
This commit is contained in:
parent
4e833a4f3a
commit
a3f458926b
4 changed files with 72 additions and 84 deletions
|
|
@ -355,7 +355,7 @@ class GP(Model):
|
|||
:param X: The points at which to get the predictive gradients.
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:param kern: The kernel to compute the jacobian for.
|
||||
:param boolean full_cov: whether to return the cross-covariance terms between
|
||||
:param boolean full_cov: whether to return the cross-covariance terms between
|
||||
the N* Jacobian vectors
|
||||
|
||||
:returns: dmu_dX, dv_dX
|
||||
|
|
@ -377,9 +377,10 @@ class GP(Model):
|
|||
if full_cov:
|
||||
dK2_dXdX = kern.gradients_XX(one, Xnew)
|
||||
else:
|
||||
dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
|
||||
for i in range(Xnew.shape[0]):
|
||||
dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
|
||||
dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
|
||||
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
|
||||
#for i in range(Xnew.shape[0]):
|
||||
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
|
||||
|
||||
def compute_cov_inner(wi):
|
||||
if full_cov:
|
||||
|
|
@ -424,7 +425,7 @@ class GP(Model):
|
|||
Sigma = var_jac.sum(-1)
|
||||
else:
|
||||
Sigma = self.output_dim*var_jac
|
||||
|
||||
|
||||
G = 0.
|
||||
if mean:
|
||||
G += mumuT
|
||||
|
|
|
|||
|
|
@ -128,34 +128,25 @@ def _slice_gradients_X_diag(f):
|
|||
|
||||
def _slice_gradients_XX(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None, cov=True):
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
N, M = X.shape[0], X.shape[0]
|
||||
Q1 = Q2 = X.shape[1]
|
||||
else:
|
||||
N, M = X.shape[0], X2.shape[0]
|
||||
Q1, Q2 = X.shape[1], X2.shape[1]
|
||||
if cov: # full covariance
|
||||
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
|
||||
else: # diagonal covariance
|
||||
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2, cov=cov))
|
||||
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_XX_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X, cov=True):
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
N, Q = X.shape
|
||||
if cov: # full covariance
|
||||
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
|
||||
else: # diagonal covariance
|
||||
with _Slice_wrap(self, X, None, ret_shape=(N, Q)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dKdiag, s.X, cov=cov))
|
||||
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
|
|
|
|||
|
|
@ -218,13 +218,13 @@ class Stationary(Kern):
|
|||
else:
|
||||
return self._gradients_X_pure(dL_dK, X, X2)
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
|
||||
def gradients_XX(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||
|
||||
cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
|
||||
cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
|
||||
or vectors (computationally more efficient if the full covariance matrix is not needed)
|
||||
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
|
||||
the returned array is of shape [NxNxQxQ].
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
|
||||
|
|
@ -242,45 +242,36 @@ class Stationary(Kern):
|
|||
dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later
|
||||
tmp2 = dL_drdr*invdist2
|
||||
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
|
||||
|
||||
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
tmp1 -= np.eye(X.shape[0])*self.variance
|
||||
else:
|
||||
tmp1[invdist2==0.] -= self.variance
|
||||
|
||||
if cov: # full covariance
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
|
||||
dist = X[:,None,:] - X2[None,:,:]
|
||||
dist = (dist[:,:,:,None]*dist[:,:,None,:])
|
||||
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
|
||||
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
|
||||
else: # Diagonal covariance, old code
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||
#grad = np.empty(X.shape, dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
|
||||
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
|
||||
#grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
|
||||
dist = X[:,None,:] - X2[None,:,:]
|
||||
dist = (dist[:,:,:,None]*dist[:,:,None,:])
|
||||
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
|
||||
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
|
||||
return grad
|
||||
|
||||
def gradients_XX_diag(self, d2L_dK, X, cov=True):
|
||||
def gradients_XX_diag(self, dL_dK_diag, X):
|
||||
"""
|
||||
Given the derivative of the objective d2L_dK, compute the second derivative of K wrt X:
|
||||
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX: [NxQ], for X [NxQ] if cov is False, [NxQxQ] if cov is True
|
||||
dL2_dXdX: [NxQxQ]
|
||||
"""
|
||||
if cov:
|
||||
tmp = np.ones(X.shape+(X.shape[1],))
|
||||
return tmp * (d2L_dK * self.variance/self.lengthscale**2)[:,None,None]# np.zeros(X.shape+(X.shape[1],))
|
||||
return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||
dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
|
||||
assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
|
||||
|
||||
l2 = np.ones(X.shape[1])*self.lengthscale**2
|
||||
return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
|
||||
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||
|
||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||
invdist = self._inv_dist(X, X2)
|
||||
|
|
|
|||
|
|
@ -104,37 +104,42 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
|||
def parameters_changed(self):
|
||||
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
|
||||
|
||||
class Kern_check_d2K_dXdX_cov(Kern_check_model):
|
||||
class Kern_check_d2K_dXdX(Kern_check_model):
|
||||
"""This class allows gradient checks for the secondderivative of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
self.X = Param('X',X)
|
||||
self.X = Param('X',X.copy())
|
||||
self.link_parameter(self.X)
|
||||
self.Xc = X.copy()
|
||||
|
||||
def log_likelihood(self):
|
||||
if self.X2 is None:
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.Xc).sum()
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum()
|
||||
|
||||
def parameters_changed(self):
|
||||
#if self.kernel.name == 'rbf':
|
||||
# import ipdb;ipdb.set_trace()
|
||||
if self.X2 is None: X2 = self.X
|
||||
else: X2 = self.X2
|
||||
grads = self.kernel.gradients_XX(self.dL_dK.T, X2, self.X, cov=True)
|
||||
self.X.gradient[:] = grads.sum(-1).sum(0)
|
||||
if self.X2 is None:
|
||||
grads = -self.kernel.gradients_XX(self.dL_dK, self.X).sum(1).sum(1)
|
||||
else:
|
||||
grads = -self.kernel.gradients_XX(self.dL_dK.T, self.X2, self.X).sum(0).sum(1)
|
||||
self.X.gradient[:] = grads
|
||||
|
||||
class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model):
|
||||
class Kern_check_d2Kdiag_dXdX(Kern_check_model):
|
||||
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X)
|
||||
self.X = Param('X',X)
|
||||
self.link_parameter(self.X)
|
||||
self.Xc = X.copy()
|
||||
|
||||
def log_likelihood(self):
|
||||
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(),self.X))
|
||||
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
|
||||
|
||||
def parameters_changed(self):
|
||||
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X, cov=True)
|
||||
self.X.gradient[:] = grads.sum(-1)
|
||||
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
|
||||
self.X.gradient[:] = grads.sum(-1)
|
||||
|
||||
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
|
||||
"""
|
||||
|
|
@ -273,29 +278,9 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
|
||||
print("Checking gradients of dK(X, X2) wrt X2 with full cov in dimensions")
|
||||
try:
|
||||
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=None)
|
||||
if fixed_X_dims is not None:
|
||||
testmodel.X[:,fixed_X_dims].fix()
|
||||
result = testmodel.checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print(("gradients_X not implemented for " + kern.name))
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
|
||||
testmodel.checkgrad(verbose=True)
|
||||
assert(result)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of dK(X, X2) wrt X with full cov in dimensions")
|
||||
try:
|
||||
testmodel = Kern_check_d2K_dXdX_cov(kern, X=X, X2=X2)
|
||||
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2)
|
||||
if fixed_X_dims is not None:
|
||||
testmodel.X[:,fixed_X_dims].fix()
|
||||
result = testmodel.checkgrad(verbose=verbose)
|
||||
|
|
@ -312,10 +297,30 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
|
||||
try:
|
||||
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None)
|
||||
if fixed_X_dims is not None:
|
||||
testmodel.X[:,fixed_X_dims].fix()
|
||||
result = testmodel.checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print(("gradients_X not implemented for " + kern.name))
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
|
||||
testmodel.checkgrad(verbose=True)
|
||||
assert(result)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions")
|
||||
try:
|
||||
testmodel = Kern_check_d2Kdiag_dXdX_cov(kern, X=X, X2=None)
|
||||
testmodel = Kern_check_d2Kdiag_dXdX(kern, X=X)
|
||||
if fixed_X_dims is not None:
|
||||
testmodel.X[:,fixed_X_dims].fix()
|
||||
result = testmodel.checkgrad(verbose=verbose)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue