everything is broken

This commit is contained in:
James Hensman 2014-02-20 14:04:16 +00:00
parent de51ad638a
commit d636c8c30c
13 changed files with 325 additions and 323 deletions

View file

@ -34,7 +34,7 @@ class Add(Kern):
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handles this as X2 == X.
handLes this as X2 == X.
"""
assert X.shape[1] == self.input_dim
if X2 is None:
@ -48,9 +48,6 @@ class Add(Kern):
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
@ -69,123 +66,125 @@ class Add(Kern):
return target
def Kdiag(self, X):
"""Compute the diagonal of the covariance function for inputs X."""
assert X.shape[1] == self.input_dim
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.size)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0)
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.size))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))):
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
target += crossterms
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.size)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
p1, p2 = self._parameters_[i1], self._parameters_[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
target_mu = np.zeros(mu.shape)
target_S = np.zeros(S.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# target *= 2
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
return target * 2
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a
target_S += b
return target_mu, target_S
def plot(self, *args, **kwargs):