From 54239555a1a099d423f28458ba8ebc5bb25a33ad Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 12 Mar 2014 12:03:37 +0000 Subject: [PATCH] psi_stat slices for kernels --- GPy/kern/_src/add.py | 55 +++++++++++++++++++++++++---------------- GPy/kern/_src/kern.py | 18 +++++--------- GPy/kern/_src/linear.py | 3 +-- GPy/kern/_src/rbf.py | 6 ++++- GPy/kern/_src/static.py | 28 +++++++++++++++++++++ 5 files changed, 74 insertions(+), 36 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8a3cefaf..fdebdfac 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -17,6 +17,11 @@ class Add(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def K(self, X, X2=None, which_parts=None): + """ + Add all kernels together. + If a list of parts (of this kernel!) `which_parts` is given, only + the parts of the list are taken to compute the covariance. + """ assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts @@ -25,6 +30,22 @@ class Add(CombinationKernel): which_parts = [which_parts] return reduce(np.add, (p.K(X, X2) for p in which_parts)) + @Cache_this(limit=2, force_kwargs=['which_parts']) + def Kdiag(self, X, which_parts=None): + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.add, (p.Kdiag(X) for p in which_parts)) + + def update_gradients_full(self, dL_dK, X, X2=None): + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + + def update_gradients_diag(self, dL_dK, X): + [p.update_gradients_diag(dL_dK, X) for p in self.parts] + def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -36,18 +57,9 @@ class Add(CombinationKernel): :type X2: np.ndarray (num_inducing x input_dim)""" target = np.zeros(X.shape) - for p in self.parts: - target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2) + [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] return target - @Cache_this(limit=2, force_kwargs=['which_parts']) - def Kdiag(self, X, which_parts=None): - assert X.shape[1] == self.input_dim - if which_parts is None: - which_parts = self.parts - return sum([p.Kdiag(X) for p in which_parts]) - - def psi0(self, Z, variational_posterior): return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) @@ -56,7 +68,7 @@ class Add(CombinationKernel): def psi2(self, Z, variational_posterior): psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) - return psi2 + #return psi2 # compute the "cross" terms from static import White, Bias from rbf import RBF @@ -65,23 +77,24 @@ class Add(CombinationKernel): #ffrom fixed import Fixed for p1, p2 in itertools.combinations(self.parts, 2): - i1, i2 = p1.active_dims, p2.active_dims + # i1, i2 = p1.active_dims, p2.active_dims # white doesn;t combine with anything if isinstance(p1, White) or isinstance(p2, White): pass # rbf X bias #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): - # manual override for slicing: - p2._sliced_X = p1._sliced_X = True - tmp = p2.psi1(Z[:,i2], variational_posterior[:, i1]) + tmp = p2.psi1(Z, variational_posterior) psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): - # manual override for slicing: - p2._sliced_X = p1._sliced_X = True - tmp = p1.psi1(Z[:,i1], variational_posterior[:, i2]) + tmp = p1.psi1(Z, variational_posterior) psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)): + assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far" + tmp1 = p1.psi1(Z, variational_posterior) + tmp2 = p2.psi1(Z, variational_posterior) + psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :]) else: raise NotImplementedError, "psi2 cannot be computed for this kernel" return psi2 @@ -98,7 +111,7 @@ class Add(CombinationKernel): continue elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. - else: + else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) @@ -114,9 +127,9 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.variance * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 8feb9a04..8a24e24a 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -15,6 +15,7 @@ class Kern(Parameterized): # found in kernel_slice_operations __metaclass__ = KernCallsViaSlicerMeta #=========================================================================== + _debug=False def __init__(self, input_dim, name, *a, **kw): """ The base class for a kernel: a positive definite function @@ -27,12 +28,12 @@ class Kern(Parameterized): """ super(Kern, self).__init__(name=name, *a, **kw) if isinstance(input_dim, int): - self.active_dims = slice(0, input_dim) + self.active_dims = np.r_[0:input_dim] self.input_dim = input_dim else: - self.active_dims = input_dim + self.active_dims = np.r_[input_dim] self.input_dim = len(self.active_dims) - self._sliced_X = False + self._sliced_X = 0 @Cache_this(limit=10)#, ignore_args = (0,)) def _slice_X(self, X): @@ -60,14 +61,13 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X_diag(self, dL_dKdiag, X): raise NotImplementedError - + def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError def update_gradients_diag(self, dL_dKdiag, X): """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters.""" raise NotImplementedError - def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): """ Set the gradients of all parameters when doing inference with @@ -188,7 +188,7 @@ class Kern(Parameterized): class CombinationKernel(Kern): def __init__(self, kernels, name): assert all([isinstance(k, Kern) for k in kernels]) - input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels)) + input_dim = reduce(np.union1d, (x.active_dims for x in kernels)) super(CombinationKernel, self).__init__(input_dim, name) self.add_parameters(*kernels) @@ -196,12 +196,6 @@ class CombinationKernel(Kern): def parts(self): return self._parameters_ - def update_gradients_full(self, dL_dK, X, X2=None): - [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] - - def update_gradients_diag(self, dL_dK, X): - [p.update_gradients_diag(dL_dK, X) for p in self.parts] - def input_sensitivity(self): in_sen = np.zeros((self.num_params, self.input_dim)) for i, p in enumerate(self.parts): diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 60645d11..f2ac0124 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -147,7 +147,6 @@ class Linear(Kern): mu = variational_posterior.mean S = variational_posterior.variance mu2S = np.square(mu)+S - _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\ np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance) @@ -175,7 +174,7 @@ class Linear(Kern): mu = variational_posterior.mean S = variational_posterior.variance _, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) - + grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 7ba1f35d..341d46a7 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -19,7 +19,6 @@ class RBF(Stationary): k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) """ - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'): super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name) self.weave_options = {} @@ -81,6 +80,8 @@ class RBF(Stationary): #contributions from psi0: self.variance.gradient = np.sum(dL_dpsi0) + if self._debug: + num_grad = self.lengthscale.gradient.copy() self.lengthscale.gradient = 0. #from psi1 @@ -100,6 +101,8 @@ class RBF(Stationary): else: self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) + if self._debug: + import ipdb;ipdb.set_trace() self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance else: @@ -150,6 +153,7 @@ class RBF(Stationary): grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) + #psi2 grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index f344357c..387c92c6 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -89,3 +89,31 @@ class Bias(Static): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() +class Fixed(Static): + def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'): + """ + :param input_dim: the number of input dimensions + :type input_dim: int + :param variance: the variance of the kernel + :type variance: float + """ + super(Bias, self).__init__(input_dim, variance, name) + self.fixed_K = covariance_matrix + def K(self, X, X2): + return self.variance * self.fixed_K + + def Kdiag(self, X): + return self.variance * self.fixed_K.diag() + + def update_gradients_full(self, dL_dK, X, X2=None): + self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K) + + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K) + + def psi2(self, Z, variational_posterior): + return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) + + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + self.variance.gradient = dL_dpsi0.sum() +