mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 03:22:38 +02:00
psi_stat slices for kernels
This commit is contained in:
parent
dfb63860ca
commit
54239555a1
5 changed files with 74 additions and 36 deletions
|
|
@ -17,6 +17,11 @@ class Add(CombinationKernel):
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
def K(self, X, X2=None, which_parts=None):
|
def K(self, X, X2=None, which_parts=None):
|
||||||
|
"""
|
||||||
|
Add all kernels together.
|
||||||
|
If a list of parts (of this kernel!) `which_parts` is given, only
|
||||||
|
the parts of the list are taken to compute the covariance.
|
||||||
|
"""
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
|
|
@ -25,6 +30,22 @@ class Add(CombinationKernel):
|
||||||
which_parts = [which_parts]
|
which_parts = [which_parts]
|
||||||
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||||
|
|
||||||
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
|
def Kdiag(self, X, which_parts=None):
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
# if only one part is given
|
||||||
|
which_parts = [which_parts]
|
||||||
|
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dK, X):
|
||||||
|
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""Compute the gradient of the objective function with respect to X.
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
|
|
@ -36,18 +57,9 @@ class Add(CombinationKernel):
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||||
|
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
for p in self.parts:
|
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||||
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
|
|
||||||
return target
|
return target
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
|
||||||
def Kdiag(self, X, which_parts=None):
|
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
if which_parts is None:
|
|
||||||
which_parts = self.parts
|
|
||||||
return sum([p.Kdiag(X) for p in which_parts])
|
|
||||||
|
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||||
|
|
||||||
|
|
@ -56,7 +68,7 @@ class Add(CombinationKernel):
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||||
return psi2
|
#return psi2
|
||||||
# compute the "cross" terms
|
# compute the "cross" terms
|
||||||
from static import White, Bias
|
from static import White, Bias
|
||||||
from rbf import RBF
|
from rbf import RBF
|
||||||
|
|
@ -65,23 +77,24 @@ class Add(CombinationKernel):
|
||||||
#ffrom fixed import Fixed
|
#ffrom fixed import Fixed
|
||||||
|
|
||||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||||
i1, i2 = p1.active_dims, p2.active_dims
|
# i1, i2 = p1.active_dims, p2.active_dims
|
||||||
# white doesn;t combine with anything
|
# white doesn;t combine with anything
|
||||||
if isinstance(p1, White) or isinstance(p2, White):
|
if isinstance(p1, White) or isinstance(p2, White):
|
||||||
pass
|
pass
|
||||||
# rbf X bias
|
# rbf X bias
|
||||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||||
# manual override for slicing:
|
tmp = p2.psi1(Z, variational_posterior)
|
||||||
p2._sliced_X = p1._sliced_X = True
|
|
||||||
tmp = p2.psi1(Z[:,i2], variational_posterior[:, i1])
|
|
||||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||||
# manual override for slicing:
|
tmp = p1.psi1(Z, variational_posterior)
|
||||||
p2._sliced_X = p1._sliced_X = True
|
|
||||||
tmp = p1.psi1(Z[:,i1], variational_posterior[:, i2])
|
|
||||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||||
|
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||||
|
tmp1 = p1.psi1(Z, variational_posterior)
|
||||||
|
tmp2 = p2.psi1(Z, variational_posterior)
|
||||||
|
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
return psi2
|
return psi2
|
||||||
|
|
@ -98,7 +111,7 @@ class Add(CombinationKernel):
|
||||||
continue
|
continue
|
||||||
elif isinstance(p2, Bias):
|
elif isinstance(p2, Bias):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
else:
|
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
|
|
||||||
|
|
@ -114,9 +127,9 @@ class Add(CombinationKernel):
|
||||||
if isinstance(p2, White):
|
if isinstance(p2, White):
|
||||||
continue
|
continue
|
||||||
elif isinstance(p2, Bias):
|
elif isinstance(p2, Bias):
|
||||||
eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.variance * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
else:
|
else:
|
||||||
eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ class Kern(Parameterized):
|
||||||
# found in kernel_slice_operations
|
# found in kernel_slice_operations
|
||||||
__metaclass__ = KernCallsViaSlicerMeta
|
__metaclass__ = KernCallsViaSlicerMeta
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
_debug=False
|
||||||
def __init__(self, input_dim, name, *a, **kw):
|
def __init__(self, input_dim, name, *a, **kw):
|
||||||
"""
|
"""
|
||||||
The base class for a kernel: a positive definite function
|
The base class for a kernel: a positive definite function
|
||||||
|
|
@ -27,12 +28,12 @@ class Kern(Parameterized):
|
||||||
"""
|
"""
|
||||||
super(Kern, self).__init__(name=name, *a, **kw)
|
super(Kern, self).__init__(name=name, *a, **kw)
|
||||||
if isinstance(input_dim, int):
|
if isinstance(input_dim, int):
|
||||||
self.active_dims = slice(0, input_dim)
|
self.active_dims = np.r_[0:input_dim]
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
else:
|
else:
|
||||||
self.active_dims = input_dim
|
self.active_dims = np.r_[input_dim]
|
||||||
self.input_dim = len(self.active_dims)
|
self.input_dim = len(self.active_dims)
|
||||||
self._sliced_X = False
|
self._sliced_X = 0
|
||||||
|
|
||||||
@Cache_this(limit=10)#, ignore_args = (0,))
|
@Cache_this(limit=10)#, ignore_args = (0,))
|
||||||
def _slice_X(self, X):
|
def _slice_X(self, X):
|
||||||
|
|
@ -60,14 +61,13 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2):
|
def update_gradients_full(self, dL_dK, X, X2):
|
||||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
|
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
"""
|
"""
|
||||||
Set the gradients of all parameters when doing inference with
|
Set the gradients of all parameters when doing inference with
|
||||||
|
|
@ -188,7 +188,7 @@ class Kern(Parameterized):
|
||||||
class CombinationKernel(Kern):
|
class CombinationKernel(Kern):
|
||||||
def __init__(self, kernels, name):
|
def __init__(self, kernels, name):
|
||||||
assert all([isinstance(k, Kern) for k in kernels])
|
assert all([isinstance(k, Kern) for k in kernels])
|
||||||
input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels))
|
input_dim = reduce(np.union1d, (x.active_dims for x in kernels))
|
||||||
super(CombinationKernel, self).__init__(input_dim, name)
|
super(CombinationKernel, self).__init__(input_dim, name)
|
||||||
self.add_parameters(*kernels)
|
self.add_parameters(*kernels)
|
||||||
|
|
||||||
|
|
@ -196,12 +196,6 @@ class CombinationKernel(Kern):
|
||||||
def parts(self):
|
def parts(self):
|
||||||
return self._parameters_
|
return self._parameters_
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
|
||||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dK, X):
|
|
||||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
in_sen = np.zeros((self.num_params, self.input_dim))
|
in_sen = np.zeros((self.num_params, self.input_dim))
|
||||||
for i, p in enumerate(self.parts):
|
for i, p in enumerate(self.parts):
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,6 @@ class Linear(Kern):
|
||||||
mu = variational_posterior.mean
|
mu = variational_posterior.mean
|
||||||
S = variational_posterior.variance
|
S = variational_posterior.variance
|
||||||
mu2S = np.square(mu)+S
|
mu2S = np.square(mu)+S
|
||||||
|
|
||||||
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||||
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
|
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
|
||||||
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
|
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
|
||||||
|
|
@ -175,7 +174,7 @@ class Linear(Kern):
|
||||||
mu = variational_posterior.mean
|
mu = variational_posterior.mean
|
||||||
S = variational_posterior.variance
|
S = variational_posterior.variance
|
||||||
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||||
|
|
||||||
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
|
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
|
||||||
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
|
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,6 @@ class RBF(Stationary):
|
||||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
|
||||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||||
self.weave_options = {}
|
self.weave_options = {}
|
||||||
|
|
@ -81,6 +80,8 @@ class RBF(Stationary):
|
||||||
|
|
||||||
#contributions from psi0:
|
#contributions from psi0:
|
||||||
self.variance.gradient = np.sum(dL_dpsi0)
|
self.variance.gradient = np.sum(dL_dpsi0)
|
||||||
|
if self._debug:
|
||||||
|
num_grad = self.lengthscale.gradient.copy()
|
||||||
self.lengthscale.gradient = 0.
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
#from psi1
|
#from psi1
|
||||||
|
|
@ -100,6 +101,8 @@ class RBF(Stationary):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
||||||
|
|
||||||
|
if self._debug:
|
||||||
|
import ipdb;ipdb.set_trace()
|
||||||
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -150,6 +153,7 @@ class RBF(Stationary):
|
||||||
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
||||||
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
||||||
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
||||||
|
|
||||||
#psi2
|
#psi2
|
||||||
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||||
|
|
|
||||||
|
|
@ -89,3 +89,31 @@ class Bias(Static):
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
||||||
|
|
||||||
|
class Fixed(Static):
|
||||||
|
def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
|
||||||
|
"""
|
||||||
|
:param input_dim: the number of input dimensions
|
||||||
|
:type input_dim: int
|
||||||
|
:param variance: the variance of the kernel
|
||||||
|
:type variance: float
|
||||||
|
"""
|
||||||
|
super(Bias, self).__init__(input_dim, variance, name)
|
||||||
|
self.fixed_K = covariance_matrix
|
||||||
|
def K(self, X, X2):
|
||||||
|
return self.variance * self.fixed_K
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
return self.variance * self.fixed_K.diag()
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
|
||||||
|
|
||||||
|
def psi2(self, Z, variational_posterior):
|
||||||
|
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||||
|
|
||||||
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
self.variance.gradient = dL_dpsi0.sum()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue