[grads x] diagonal entries fixed and add kernel adjusted

This commit is contained in:
Max Zwiessele 2016-06-08 13:45:32 +01:00
parent 787168a394
commit 0c6e3bc88f
4 changed files with 44 additions and 24 deletions

View file

@ -101,22 +101,40 @@ class Linear(Kern):
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
#if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
def gradients_XX(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
if X2 is None:
return 2*self.variances
else:
return self.variances
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
#if X2 is None:
# return np.ones(np.repeat(X.shape, 2)) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
#else:
# return np.ones((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
dims = X.shape
if cov:
dims += (X.shape[1],)
return 2*np.ones(dims)*self.variances
def gradients_XX_diag(self, dL_dKdiag, X):
return np.zeros((X.shape[0], X.shape[1], X.shape[1]))
#dims = X.shape
#if cov:
# dims += (X.shape[1],)
#return 2*np.ones(dims)*self.variances
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim) * self.variances

View file

@ -39,6 +39,8 @@ class RBF(Stationary):
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:

View file

@ -25,18 +25,13 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_XX(self, dL_dK, X, X2=None, cov=True):
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
if cov:
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
else:
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X, cov=False):
if cov:
return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
else:
return np.zeros(X.shape, dtype=np.float64)
return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
@ -195,7 +190,7 @@ class Fixed(Static):
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -259,5 +254,4 @@ class Precomputed(Fixed):
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))

View file

@ -85,6 +85,11 @@ class Stationary(Kern):
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_diag(self):
"Second order derivative of K in r_{i,i}. The diagonal entries are always zero, so we do not give it here."
raise NotImplementedError("implement second derivative of covariance wrt r_diag to use this method")
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None):
"""
@ -253,7 +258,8 @@ class Stationary(Kern):
dist = X[:,None,:] - X2[None,:,:]
dist = (dist[:,:,:,None]*dist[:,:,None,:])
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
grad = (np.einsum('kl,klij->klij',dL_dK*(tmp1*invdist2 - tmp2), dist) /l2[None,None,:,None] - np.einsum('kl,klij->klij',dL_dK*tmp1, I))/l2[None,None,None,:]
grad = (((dL_dK*(tmp1*invdist2 - tmp2))[:,:,None,None] * dist)/l2[None,None,:,None]
- (dL_dK*tmp1)[:,:,None,None] * I)/l2[None,None,None,:]
return grad
def gradients_XX_diag(self, dL_dK_diag, X):
@ -270,7 +276,7 @@ class Stationary(Kern):
assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None):