[gradients xx] getting there

This commit is contained in:
Max Zwiessele 2016-05-04 09:11:04 +01:00
parent 4efb92c554
commit 5d19039d90
3 changed files with 52 additions and 51 deletions

View file

@ -449,7 +449,7 @@ class GP(Model):
:param bool covariance: whether to include the covariance of the wishart embedding. :param bool covariance: whether to include the covariance of the wishart embedding.
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]] :param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
""" """
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance) G = self.predict_wishart_embedding(Xnew, kern, mean, covariance)
if dimensions is None: if dimensions is None:
dimensions = self.get_most_significant_input_dimensions()[:2] dimensions = self.get_most_significant_input_dimensions()[:2]
G = G[:, dimensions][:,:,dimensions] G = G[:, dimensions][:,:,dimensions]

View file

@ -70,11 +70,11 @@ class _Slice_wrap(object):
ret[:, self.k._all_dims_active] = return_val ret[:, self.k._all_dims_active] = return_val
elif len(self.shape) == 3: # derivative for X2!=None elif len(self.shape) == 3: # derivative for X2!=None
if self.diag: if self.diag:
ret[:, :, self.k._all_dims_active][:, self.k._all_dims_active] = return_val ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
else: else:
ret[:, :, self.k._all_dims_active] = return_val ret[:, :, self.k._all_dims_active] = return_val
elif len(self.shape) == 4: # second order derivative elif len(self.shape) == 4: # second order derivative
ret[:, :, self.k._all_dims_active][:, :, :, self.k._all_dims_active] = return_val ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
return ret return ret
return return_val return return_val

View file

@ -223,7 +223,7 @@ class Stationary(Kern):
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors cov = True: returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors
cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair cov = False: returns the diagonal of the covariance matrix [QxQ] of the input dimensionfor each pair
or vectors (computationally more efficient if the full covariance matrix is not needed) or vectors (computationally more efficient if the full covariance matrix is not needed)
..math: ..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2} \frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
@ -247,20 +247,21 @@ class Stationary(Kern):
X2 = X X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance tmp1 -= np.eye(X.shape[0])*self.variance
else: else:
#tmp1[X==X2.T] -= self.variance # Old version, to be removed #tmp1[X==X2.T] -= self.variance # Old version, to be removed
# (seems to have a bug: it is subtracted to the first X1 anyway) # (seems to have a bug: it is subtracted to the first X1 anyway)
tmp1[invdist2==0.] -= self.variance tmp1[invdist2==0.] -= self.variance
if cov: # full covariance if cov: # full covariance
grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64) grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
for q in range(self.input_dim): for q in range(self.input_dim):
tmpdist = (X[:,[q]]-X2[:,[q]].T)
for r in range(self.input_dim): for r in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T)*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance tmpdist2 = tmpdist*(X[:,[r]]-X2[:,[r]].T) # Introduce temporary distance
if r==q: if r==q:
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q]) grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r] - tmp1)/l2[q])
else: else:
grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q]) grad[:, :, q, r] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[r])/l2[q])
else: else:
# Diagonal covariance, old code # Diagonal covariance, old code
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64) #grad = np.empty(X.shape, dtype=np.float64)
@ -336,18 +337,18 @@ class Exponential(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -self.K_of_r(r) return -self.K_of_r(r)
# def sde(self): # def sde(self):
# """ # """
# Return the state space representation of the covariance. # Return the state space representation of the covariance.
# """ # """
# F = np.array([[-1/self.lengthscale]]) # F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]]) # L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]]) # Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]]) # H = np.array([[1]])
# Pinf = np.array([[self.variance]]) # Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well # # TODO: return the derivatives as well
# #
# return (F, L, Qc, H, Pinf) # return (F, L, Qc, H, Pinf)
@ -416,41 +417,41 @@ class Matern32(Stationary):
F1lower = np.array([f(lower) for f in F1])[:, None] F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
def sde(self): def sde(self):
""" """
Return the state space representation of the covariance. Return the state space representation of the covariance.
""" """
variance = float(self.variance.values) variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values) lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]]) F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]]) L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]]) H = np.array([[1, 0]])
Pinf = np.array([[variance, 0], Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]]) [0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives # Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2]) dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives # The partial derivatives
dFvariance = np.zeros([2,2]) dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0], dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3]) dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]]) dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0], dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]]) [0,-6*variance/lengthscale**3]])
# Combine the derivatives # Combine the derivatives
dF[:,:,0] = dFvariance dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale dPinf[:,:,1] = dPinflengthscale
return (F, L, Qc, H, Pinf, dF, dQc, dPinf) return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class Matern52(Stationary): class Matern52(Stationary):
""" """