Merge pull request #374 from SheffieldML/gradientsxx

Changes in kern.gradients_XX
This commit is contained in:
Max Zwiessele 2016-06-09 12:56:39 +02:00
commit 67444e22a5
13 changed files with 379 additions and 213 deletions

View file

@ -337,8 +337,7 @@ class GP(Model):
dv_dX += kern.gradients_X(alpha, Xnew, self._predictive_variable)
return mean_jac, dv_dX
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
def predict_jacobian(self, Xnew, kern=None, full_cov=False):
"""
Compute the derivatives of the posterior of the GP.
@ -356,15 +355,11 @@ class GP(Model):
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the full covariance of the jacobian.
:param boolean full_cov: whether to return the cross-covariance terms between
the N* Jacobian vectors
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
Note: We always return sum in input_dim gradients, as the off-diagonals
in the input_dim are not needed for further calculations.
This is a compromise for increase in speed. Mathematically the jacobian would
have another dimension in Q.
"""
if kern is None:
kern = self.kern
@ -383,24 +378,26 @@ class GP(Model):
dK2_dXdX = kern.gradients_XX(one, Xnew)
else:
dK2_dXdX = kern.gradients_XX_diag(one, Xnew)
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
#for i in range(Xnew.shape[0]):
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
def compute_cov_inner(wi):
if full_cov:
# full covariance gradients:
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
var_jac = dK2_dXdX - np.einsum('qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) # n,s = Xnew.shape[0], m = pred_var.shape[0]
else:
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
var_jac = dK2_dXdX - np.einsum('qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
return var_jac
if self.posterior.woodbury_inv.ndim == 3: # Missing data:
if full_cov:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac
@ -424,10 +421,11 @@ class GP(Model):
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
Sigma = np.zeros(mumuT.shape)
if var_jac.ndim == 3:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
if var_jac.ndim == 4: # Missing data
Sigma = var_jac.sum(-1)
else:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
Sigma = self.output_dim*var_jac
G = 0.
if mean:
G += mumuT
@ -451,7 +449,7 @@ class GP(Model):
:param bool covariance: whether to include the covariance of the wishart embedding.
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
"""
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
G = self.predict_wishart_embedding(Xnew, kern, mean, covariance)
if dimensions is None:
dimensions = self.get_most_significant_input_dimensions()[:2]
G = G[:, dimensions][:,:,dimensions]

View file

@ -87,14 +87,19 @@ class Add(CombinationKernel):
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#else: # diagonal covariance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target = np.zeros(X.shape+(X.shape[1],))
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@ -182,7 +187,7 @@ class Add(CombinationKernel):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
from .static import White, Bias
for p1 in self.parts:
@ -194,9 +199,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -213,7 +218,7 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@ -221,7 +226,7 @@ class Add(CombinationKernel):
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
from .static import White, Bias
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
@ -234,9 +239,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads
@ -249,7 +254,7 @@ class Add(CombinationKernel):
# other.unlink_parameter(p)
# parts.extend(other.parts)
# #self.link_parameters(*other_params)
#
#
# else:
# #self.link_parameter(other)
# parts.append(other)
@ -265,7 +270,7 @@ class Add(CombinationKernel):
else:
return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
@ -277,12 +282,12 @@ class Add(CombinationKernel):
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
@ -306,51 +311,51 @@ class Add(CombinationKernel):
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -10,10 +10,10 @@ class Integral(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel between...
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Integral, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
@ -22,7 +22,7 @@ class Integral(Kern): #todo do I need to inherit from Stationary
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
@ -36,13 +36,13 @@ class Integral(Kern): #todo do I need to inherit from Stationary
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],self.lengthscale[0]) #TODO Multiple length scales
dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
#print "V%0.5f" % self.variances.gradient
#print "L%0.5f" % self.lengthscale.gradient
else: #we're finding dK_xf/Dtheta
print "NEED TO HANDLE TODO!"
else: #we're finding dK_xf/Dtheta
print("NEED TO HANDLE TODO!")
#useful little function to help calculate the covariances.
def g(self,z):
@ -52,15 +52,15 @@ class Integral(Kern): #todo do I need to inherit from Stationary
def k_xx(self,t,tprime,l):
return 0.5 * (l**2) * ( self.g(t/l) - self.g((t - tprime)/l) + self.g(tprime/l) - 1)
def k_ff(self,t,tprime,l):
def k_ff(self,t,tprime,l):
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
#covariance between the gradient and the actual value
def k_xf(self,t,tprime,l):
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf(tprime/l))
def K(self, X, X2=None):
if X2 is None:
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
@ -73,7 +73,7 @@ class Integral(Kern): #todo do I need to inherit from Stationary
K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0])
#print self.variances[0]
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!

View file

@ -10,10 +10,10 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel, can include limits on each integral value.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
@ -22,27 +22,27 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl = np.zeros([X.shape[0],X.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
#print "V%0.5f" % self.variances.gradient
#print "L%0.5f" % self.lengthscale.gradient
else: #we're finding dK_xf/Dtheta
print "NEED TO HANDLE TODO!"
else: #we're finding dK_xf/Dtheta
print("NEED TO HANDLE TODO!")
#useful little function to help calculate the covariances.
def g(self,z):
@ -50,28 +50,28 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def K(self, X, X2=None):
if X2 is None:
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
@ -83,7 +83,7 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
for j,x2 in enumerate(X2):
K_xf[i,j] = self.k_xf(x[0],x2[0],x[1],self.lengthscale[0]) #x2[1] unused, see k_xf docstring for explanation.
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!

View file

@ -15,10 +15,10 @@ class Kern(Parameterized):
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
# __meataclass__ is ignored in Python 3 - needs to be put in the function definiton
#__metaclass__ = KernCallsViaSlicerMeta
#Here, we use the Python module six to support Py3 and Py2 simultaneously
# __metaclass__ = KernCallsViaSlicerMeta
# Here, we use the Python module six to support Py3 and Py2 simultaneously
#===========================================================================
_support_GPU=False
_support_GPU = False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
"""
The base class for a kernel: a positive definite function
@ -62,7 +62,7 @@ class Kern(Parameterized):
self.psicomp = PSICOMP_GH()
def __setstate__(self, state):
self._all_dims_active = np.arange(0, max(state['active_dims'])+1)
self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
super(Kern, self).__setstate__(state)
@property
@ -132,18 +132,18 @@ class Kern(Parameterized):
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
def gradients_XX(self, dL_dK, X, X2, cov=True):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
"""
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X):
raise NotImplementedError("This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
"""
The diagonal of the second derivative w.r.t. X and X2
"""
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
raise NotImplementedError("This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X):
"""
The diagonal of the derivative w.r.t. X
@ -292,11 +292,11 @@ class Kern(Parameterized):
"""
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from .prod import Prod
#kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other.parameters)
#else: kernels.append(other)
# kernels = []
# if isinstance(self, Prod): kernels.extend(self.parameters)
# else: kernels.append(self)
# if isinstance(other, Prod): kernels.extend(other.parameters)
# else: kernels.append(other)
return Prod([self, other], name)
def _check_input_dim(self, X):

View file

@ -13,7 +13,7 @@ from paramz.parameterized import ParametersChangedMeta
def put_clean(dct, name, func):
if name in dct:
#dct['_clean_{}'.format(name)] = dct[name]
dct['_clean_{}'.format(name)] = dct[name]
dct[name] = func(dct[name])
class KernCallsViaSlicerMeta(ParametersChangedMeta):
@ -25,7 +25,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi)
@ -38,15 +38,16 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object):
def __init__(self, k, X, X2=None, ret_shape=None):
def __init__(self, k, X, X2=None, diag=False, ret_shape=None):
self.k = k
self.diag = diag
if ret_shape is None:
self.shape = X.shape
else:
self.shape = ret_shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
assert X.ndim == 2, "need at least column vectors as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
assert X2.ndim == 2, "need at least column vectors as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
if (self.k._all_dims_active is not None) and (self.k._sliced_X == 0):
self.k._check_active_dims(X)
self.X = self.k._slice_X(X)
@ -67,8 +68,13 @@ class _Slice_wrap(object):
ret = np.zeros(self.shape)
if len(self.shape) == 2:
ret[:, self.k._all_dims_active] = return_val
elif len(self.shape) == 3:
ret[:, :, self.k._all_dims_active] = return_val
elif len(self.shape) == 3: # derivative for X2!=None
if self.diag:
ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
else:
ret[:, :, self.k._all_dims_active] = return_val
elif len(self.shape) == 4: # second order derivative
ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
return ret
return return_val
@ -112,23 +118,34 @@ def _slice_gradients_X(f):
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
Q1 = Q2 = X.shape[1]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
Q1, Q2 = X.shape[1], X2.shape[1]
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
def _slice_gradients_XX_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
N, Q = X.shape
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap

View file

@ -102,17 +102,39 @@ class Linear(Kern):
return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
if X2 is None:
return 2*np.ones(X.shape)*self.variances
else:
return np.ones(X.shape)*self.variances
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
#if X2 is None:
# return np.ones(np.repeat(X.shape, 2)) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
#else:
# return np.ones((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
def gradients_XX_diag(self, dL_dKdiag, X):
return 2*np.ones(X.shape)*self.variances
return np.zeros((X.shape[0], X.shape[1], X.shape[1]))
#dims = X.shape
#if cov:
# dims += (X.shape[1],)
#return 2*np.ones(dims)*self.variances
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim) * self.variances

View file

@ -10,10 +10,10 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
"""
Integral kernel, can include limits on each integral value.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Multidimensional_Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
@ -22,38 +22,38 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
def update_gradients_full(self, dL_dK, X, X2=None):
#print self.variances
if X2 is None: #we're finding dK_xx/dTheta
dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for il,l in enumerate(self.lengthscale):
idx = il*2
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl_term[i,j,il] = self.dk_dl(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
for il,l in enumerate(self.lengthscale):
k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
for il,l in enumerate(self.lengthscale):
dK_dl = self.variances[0] * dK_dl_term[:,:,il]
for jl, l in enumerate(self.lengthscale):
if jl!=il:
dK_dl *= k_term[:,:,jl]
#dK_dl = np.dot(dK_dl,k_term[:,:,il])
#print k_term[:,:,il]
self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
print "NEED TO HANDLE TODO!"
self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
print("NEED TO HANDLE TODO!")
#print self.variances[0],self.lengthscale[0],self.lengthscale[1] #np.sum(dK_dv*dL_dK)
@ -63,38 +63,38 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def calc_K_xx_wo_variance(self,X):
"""Calculates K_xx without the variance term"""
K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension
K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension
for i,x in enumerate(X):
for j,x2 in enumerate(X):
for il,l in enumerate(self.lengthscale):
idx = il*2 #each pair of input dimensions describe the limits on one actual dimension in the data
K_xx[i,j] *= self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
return K_xx
def K(self, X, X2=None):
if X2 is None:
if X2 is None:
#print "X x X"
K_xx = self.calc_K_xx_wo_variance(X)
return K_xx * self.variances[0]
@ -107,7 +107,7 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
idx = il*2
K_xf[i,j] *= self.k_xf(x[idx],x2[idx],x[idx+1],l)
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!

View file

@ -39,6 +39,8 @@ class RBF(Stationary):
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:

View file

@ -6,6 +6,7 @@ from .kern import Kern
import numpy as np
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this
class Static(Kern):
def __init__(self, input_dim, variance, active_dims, name):
@ -24,12 +25,13 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_XX(self, dL_dK, X, X2):
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X, cov=False):
return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
@ -172,13 +174,19 @@ class Fixed(Static):
super(Fixed, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
if X2 is None:
return self.variance * self.fixed_K
else:
return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self, X):
return self.variance * self.fixed_K.diagonal()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
if X2 is None:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
else:
self.variance.gradient = 0
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
@ -224,21 +232,26 @@ class Precomputed(Fixed):
:param variance: the variance of the kernel
:type variance: float
"""
assert input_dim==1, "Precomputed only implemented in one dimension. Use multiple Precomputed kernels to have more dimensions by making use of active_dims"
super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name)
def K(self, X, X2=None):
@Cache_this(limit=2)
def _index(self, X, X2):
if X2 is None:
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]
i1 = i2 = X.astype('int').flat
else:
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')]
i1, i2 = X.astype('int').flat, X2.astype('int').flat
return self.fixed_K[i1,:][:,i2]
def K(self, X, X2=None):
return self.variance * self._index(X, X2)
def Kdiag(self, X):
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')].diagonal()
return self.variance * self._index(X,None).diagonal()
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')])
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._index(X, X2))
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))

View file

@ -85,6 +85,11 @@ class Stationary(Kern):
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_diag(self):
"Second order derivative of K in r_{i,i}. The diagonal entries are always zero, so we do not give it here."
raise NotImplementedError("implement second derivative of covariance wrt r_diag to use this method")
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None):
"""
@ -222,54 +227,57 @@ class Stationary(Kern):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X\partial X2}
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
dL_dr = self.dK_dr_via_X(X, X2) #* dL_dK # we perform this product later
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later
tmp2 = dL_drdr*invdist2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
tmp1[invdist2==0.] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
#grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
dist = X[:,None,:] - X2[None,:,:]
dist = (dist[:,:,:,None]*dist[:,:,None,:])
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
grad = (((dL_dK*(tmp1*invdist2 - tmp2))[:,:,None,None] * dist)/l2[None,None,:,None]
- (dL_dK*tmp1)[:,:,None,None] * I)/l2[None,None,None,:]
return grad
def gradients_XX_diag(self, dL_dK, X):
def gradients_XX_diag(self, dL_dK_diag, X):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
\frac{\partial^2 K}{\partial X\partial X}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
dL2_dXdX: [NxQxQ]
"""
return np.ones(X.shape) * self.variance/self.lengthscale**2
dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1)
assert (dL_dK_diag.size == X.shape[0]) or (dL_dK_diag.size == 1), "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
@ -320,18 +328,18 @@ class Exponential(Stationary):
def dK_dr(self, r):
return -self.K_of_r(r)
# def sde(self):
# """
# Return the state space representation of the covariance.
# """
# F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]])
# Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well
#
# return (F, L, Qc, H, Pinf)
# def sde(self):
# """
# Return the state space representation of the covariance.
# """
# F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]])
# Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well
#
# return (F, L, Qc, H, Pinf)
@ -400,41 +408,41 @@ class Matern32(Stationary):
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
def sde(self):
"""
Return the state space representation of the covariance.
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class Matern52(Stationary):
"""

View file

@ -50,6 +50,8 @@ def inject_plotting():
GP.plot_samples = gpy_plot.gp_plots.plot_samples
GP.plot = gpy_plot.gp_plots.plot
GP.plot_f = gpy_plot.gp_plots.plot_f
GP.plot_latent = gpy_plot.gp_plots.plot_f
GP.plot_noiseless = gpy_plot.gp_plots.plot_f
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
from ..models import StateSpace
@ -62,7 +64,9 @@ def inject_plotting():
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
StateSpace.plot = gpy_plot.gp_plots.plot
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
StateSpace.plot_latent = gpy_plot.gp_plots.plot_f
StateSpace.plot_noiseless = gpy_plot.gp_plots.plot_f
from ..core import SparseGP
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing

View file

@ -104,7 +104,45 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def parameters_changed(self):
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
class Kern_check_d2K_dXdX(Kern_check_model):
"""This class allows gradient checks for the secondderivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.X = Param('X',X.copy())
self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self):
if self.X2 is None:
return self.kernel.gradients_X(self.dL_dK, self.X, self.Xc).sum()
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum()
def parameters_changed(self):
#if self.kernel.name == 'rbf':
# import ipdb;ipdb.set_trace()
if self.X2 is None:
grads = -self.kernel.gradients_XX(self.dL_dK, self.X).sum(1).sum(1)
else:
grads = -self.kernel.gradients_XX(self.dL_dK.T, self.X2, self.X).sum(0).sum(1)
self.X.gradient[:] = grads
class Kern_check_d2Kdiag_dXdX(Kern_check_model):
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X)
self.X = Param('X',X)
self.link_parameter(self.X)
self.Xc = X.copy()
def log_likelihood(self):
l = 0.
for i in range(self.X.shape[0]):
l += self.kernel.gradients_X(self.dL_dK[[i],[i]], self.X[[i]], self.Xc[[i]]).sum()
return l
def parameters_changed(self):
grads = -self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
self.X.gradient[:] = grads.sum(-1)
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
"""
@ -242,6 +280,66 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
assert(result)
return False
if verbose:
print("Checking gradients of dK(X, X2) wrt X2 with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose:
print("Checking gradients of dK(X, X) wrt X with full cov in dimensions")
try:
testmodel = Kern_check_d2K_dXdX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dK(X, X) wrt X with full cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
if verbose:
print("Checking gradients of dKdiag(X, X) wrt X with cov in dimensions")
try:
testmodel = Kern_check_d2Kdiag_dXdX(kern, X=X)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("gradients_X not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
print(("Gradient of dKdiag(X, X) wrt X with cov in dimensions failed for " + kern.name + " covariance function. Gradient values as follows:"))
testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False
return False
return pass_checks
@ -249,8 +347,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
class KernelGradientTestsContinuous(unittest.TestCase):
def setUp(self):
self.N, self.D = 10, 5
self.X = np.random.randn(self.N,self.D)
self.X2 = np.random.randn(self.N+10,self.D)
self.X = np.random.randn(self.N,self.D+1)
self.X2 = np.random.randn(self.N+10,self.D+1)
continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
@ -299,7 +397,7 @@ class KernelGradientTestsContinuous(unittest.TestCase):
def test_Add_dims(self):
k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertRaises(IndexError, k.K, self.X)
self.assertRaises(IndexError, k.K, self.X[:, :self.D])
k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
# assert it runs:
@ -314,7 +412,7 @@ class KernelGradientTestsContinuous(unittest.TestCase):
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_RBF(self):
k = GPy.kern.RBF(self.D, ARD=True)
k = GPy.kern.RBF(self.D-1, ARD=True)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
@ -329,9 +427,8 @@ class KernelGradientTestsContinuous(unittest.TestCase):
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Fixed(self):
Xall = np.concatenate([self.X, self.X])
cov = np.dot(Xall, Xall.T)
X = np.arange(self.N).reshape(1,self.N)
cov = np.dot(self.X, self.X.T)
X = np.arange(self.N).reshape(self.N, 1)
k = GPy.kern.Fixed(1, cov)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=None, verbose=verbose))
@ -354,11 +451,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
def test_Precomputed(self):
Xall = np.concatenate([self.X, self.X2])
cov = np.dot(Xall, Xall.T)
X = np.arange(self.N).reshape(1,self.N)
X2 = np.arange(self.N,2*self.N+10).reshape(1,self.N+10)
X = np.arange(self.N).reshape(self.N, 1)
X2 = np.arange(self.N,2*self.N+10).reshape(self.N+10, 1)
k = GPy.kern.Precomputed(1, cov)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=X2, verbose=verbose))
self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=X2, verbose=verbose, fixed_X_dims=[0]))
class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):