psi_stat slices for kernels

This commit is contained in:
Max Zwiessele 2014-03-12 12:03:37 +00:00
parent dfb63860ca
commit 54239555a1
5 changed files with 74 additions and 36 deletions

View file

@ -17,6 +17,11 @@ class Add(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
"""
Add all kernels together.
If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance.
"""
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
@ -25,6 +30,22 @@ class Add(CombinationKernel):
which_parts = [which_parts] which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts)) return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X. """Compute the gradient of the objective function with respect to X.
@ -36,18 +57,9 @@ class Add(CombinationKernel):
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape) target = np.zeros(X.shape)
for p in self.parts: [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
return target return target
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
return sum([p.Kdiag(X) for p in which_parts])
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@ -56,7 +68,7 @@ class Add(CombinationKernel):
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
return psi2 #return psi2
# compute the "cross" terms # compute the "cross" terms
from static import White, Bias from static import White, Bias
from rbf import RBF from rbf import RBF
@ -65,23 +77,24 @@ class Add(CombinationKernel):
#ffrom fixed import Fixed #ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2): for p1, p2 in itertools.combinations(self.parts, 2):
i1, i2 = p1.active_dims, p2.active_dims # i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything # white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White): if isinstance(p1, White) or isinstance(p2, White):
pass pass
# rbf X bias # rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
# manual override for slicing: tmp = p2.psi1(Z, variational_posterior)
p2._sliced_X = p1._sliced_X = True
tmp = p2.psi1(Z[:,i2], variational_posterior[:, i1])
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
# manual override for slicing: tmp = p1.psi1(Z, variational_posterior)
p2._sliced_X = p1._sliced_X = True
tmp = p1.psi1(Z[:,i1], variational_posterior[:, i2])
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2
@ -98,7 +111,7 @@ class Add(CombinationKernel):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@ -114,9 +127,9 @@ class Add(CombinationKernel):
if isinstance(p2, White): if isinstance(p2, White):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target return target

View file

@ -15,6 +15,7 @@ class Kern(Parameterized):
# found in kernel_slice_operations # found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta __metaclass__ = KernCallsViaSlicerMeta
#=========================================================================== #===========================================================================
_debug=False
def __init__(self, input_dim, name, *a, **kw): def __init__(self, input_dim, name, *a, **kw):
""" """
The base class for a kernel: a positive definite function The base class for a kernel: a positive definite function
@ -27,12 +28,12 @@ class Kern(Parameterized):
""" """
super(Kern, self).__init__(name=name, *a, **kw) super(Kern, self).__init__(name=name, *a, **kw)
if isinstance(input_dim, int): if isinstance(input_dim, int):
self.active_dims = slice(0, input_dim) self.active_dims = np.r_[0:input_dim]
self.input_dim = input_dim self.input_dim = input_dim
else: else:
self.active_dims = input_dim self.active_dims = np.r_[input_dim]
self.input_dim = len(self.active_dims) self.input_dim = len(self.active_dims)
self._sliced_X = False self._sliced_X = 0
@Cache_this(limit=10)#, ignore_args = (0,)) @Cache_this(limit=10)#, ignore_args = (0,))
def _slice_X(self, X): def _slice_X(self, X):
@ -60,14 +61,13 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2): def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters.""" """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
raise NotImplementedError raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
""" """
Set the gradients of all parameters when doing inference with Set the gradients of all parameters when doing inference with
@ -188,7 +188,7 @@ class Kern(Parameterized):
class CombinationKernel(Kern): class CombinationKernel(Kern):
def __init__(self, kernels, name): def __init__(self, kernels, name):
assert all([isinstance(k, Kern) for k in kernels]) assert all([isinstance(k, Kern) for k in kernels])
input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels)) input_dim = reduce(np.union1d, (x.active_dims for x in kernels))
super(CombinationKernel, self).__init__(input_dim, name) super(CombinationKernel, self).__init__(input_dim, name)
self.add_parameters(*kernels) self.add_parameters(*kernels)
@ -196,12 +196,6 @@ class CombinationKernel(Kern):
def parts(self): def parts(self):
return self._parameters_ return self._parameters_
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def input_sensitivity(self): def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim)) in_sen = np.zeros((self.num_params, self.input_dim))
for i, p in enumerate(self.parts): for i, p in enumerate(self.parts):

View file

@ -147,7 +147,6 @@ class Linear(Kern):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
mu2S = np.square(mu)+S mu2S = np.square(mu)+S
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\ grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance) np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
@ -175,7 +174,7 @@ class Linear(Kern):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) _, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)

View file

@ -19,7 +19,6 @@ class RBF(Stationary):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name) super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
self.weave_options = {} self.weave_options = {}
@ -81,6 +80,8 @@ class RBF(Stationary):
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
if self._debug:
num_grad = self.lengthscale.gradient.copy()
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
#from psi1 #from psi1
@ -100,6 +101,8 @@ class RBF(Stationary):
else: else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
if self._debug:
import ipdb;ipdb.set_trace()
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
else: else:
@ -150,6 +153,7 @@ class RBF(Stationary):
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2 #psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)

View file

@ -89,3 +89,31 @@ class Bias(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Bias, self).__init__(input_dim, variance, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
def Kdiag(self, X):
return self.variance * self.fixed_K.diag()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()