From 54239555a1a099d423f28458ba8ebc5bb25a33ad Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 12 Mar 2014 12:03:37 +0000
Subject: [PATCH] psi_stat slices for kernels

---
 GPy/kern/_src/add.py    | 55 +++++++++++++++++++++++++----------------
 GPy/kern/_src/kern.py   | 18 +++++---------
 GPy/kern/_src/linear.py |  3 +--
 GPy/kern/_src/rbf.py    |  6 ++++-
 GPy/kern/_src/static.py | 28 +++++++++++++++++++++
 5 files changed, 74 insertions(+), 36 deletions(-)

diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 8a3cefaf..fdebdfac 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -17,6 +17,11 @@ class Add(CombinationKernel):
 
     @Cache_this(limit=2, force_kwargs=['which_parts'])
     def K(self, X, X2=None, which_parts=None):
+        """
+        Add all kernels together.
+        If a list of parts (of this kernel!) `which_parts` is given, only
+        the parts of the list are taken to compute the covariance.
+        """
         assert X.shape[1] == self.input_dim
         if which_parts is None:
             which_parts = self.parts
@@ -25,6 +30,22 @@ class Add(CombinationKernel):
             which_parts = [which_parts]
         return reduce(np.add, (p.K(X, X2) for p in which_parts))
 
+    @Cache_this(limit=2, force_kwargs=['which_parts'])
+    def Kdiag(self, X, which_parts=None):
+        assert X.shape[1] == self.input_dim
+        if which_parts is None:
+            which_parts = self.parts
+        elif not isinstance(which_parts, (list, tuple)):
+            # if only one part is given
+            which_parts = [which_parts]
+        return reduce(np.add, (p.Kdiag(X) for p in which_parts))
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
+
+    def update_gradients_diag(self, dL_dK, X):
+        [p.update_gradients_diag(dL_dK, X) for p in self.parts]
+
     def gradients_X(self, dL_dK, X, X2=None):
         """Compute the gradient of the objective function with respect to X.
 
@@ -36,18 +57,9 @@ class Add(CombinationKernel):
         :type X2: np.ndarray (num_inducing x input_dim)"""
 
         target = np.zeros(X.shape)
-        for p in self.parts:
-            target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
+        [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
         return target
 
-    @Cache_this(limit=2, force_kwargs=['which_parts'])
-    def Kdiag(self, X, which_parts=None):
-        assert X.shape[1] == self.input_dim
-        if which_parts is None:
-            which_parts = self.parts
-        return sum([p.Kdiag(X) for p in which_parts])
-
-
     def psi0(self, Z, variational_posterior):
         return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
 
@@ -56,7 +68,7 @@ class Add(CombinationKernel):
 
     def psi2(self, Z, variational_posterior):
         psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
-        return psi2
+        #return psi2
         # compute the "cross" terms
         from static import White, Bias
         from rbf import RBF
@@ -65,23 +77,24 @@ class Add(CombinationKernel):
         #ffrom fixed import Fixed
 
         for p1, p2 in itertools.combinations(self.parts, 2):
-            i1, i2 = p1.active_dims, p2.active_dims
+            # i1, i2 = p1.active_dims, p2.active_dims
             # white doesn;t combine with anything
             if isinstance(p1, White) or isinstance(p2, White):
                 pass
             # rbf X bias
             #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
             elif isinstance(p1,  Bias) and isinstance(p2, (RBF, Linear)):
-                # manual override for slicing:
-                p2._sliced_X = p1._sliced_X = True
-                tmp = p2.psi1(Z[:,i2], variational_posterior[:, i1])
+                tmp = p2.psi1(Z, variational_posterior)
                 psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
             #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
             elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
-                # manual override for slicing:
-                p2._sliced_X = p1._sliced_X = True
-                tmp = p1.psi1(Z[:,i1], variational_posterior[:, i2])
+                tmp = p1.psi1(Z, variational_posterior)
                 psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
+            elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
+                assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
+                tmp1 = p1.psi1(Z, variational_posterior)
+                tmp2 = p2.psi1(Z, variational_posterior)
+                psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
         return psi2
@@ -98,7 +111,7 @@ class Add(CombinationKernel):
                     continue
                 elif isinstance(p2, Bias):
                     eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
-                else:
+                else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
                     eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
             p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
 
@@ -114,9 +127,9 @@ class Add(CombinationKernel):
                 if isinstance(p2, White):
                     continue
                 elif isinstance(p2, Bias):
-                    eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.variance * 2.
+                    eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                 else:
-                    eff_dL_dpsi1 += 0#dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
+                    eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
             target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
         return target
 
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index 8feb9a04..8a24e24a 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -15,6 +15,7 @@ class Kern(Parameterized):
     # found in kernel_slice_operations
     __metaclass__ = KernCallsViaSlicerMeta
     #===========================================================================
+    _debug=False
     def __init__(self, input_dim, name, *a, **kw):
         """
         The base class for a kernel: a positive definite function
@@ -27,12 +28,12 @@ class Kern(Parameterized):
         """
         super(Kern, self).__init__(name=name, *a, **kw)
         if isinstance(input_dim, int):
-            self.active_dims = slice(0, input_dim)
+            self.active_dims = np.r_[0:input_dim]
             self.input_dim = input_dim
         else:
-            self.active_dims = input_dim
+            self.active_dims = np.r_[input_dim]
             self.input_dim = len(self.active_dims)
-        self._sliced_X = False
+        self._sliced_X = 0
 
     @Cache_this(limit=10)#, ignore_args = (0,))
     def _slice_X(self, X):
@@ -60,14 +61,13 @@ class Kern(Parameterized):
         raise NotImplementedError
     def gradients_X_diag(self, dL_dKdiag, X):
         raise NotImplementedError
-    
+
     def update_gradients_full(self, dL_dK, X, X2):
         """Set the gradients of all parameters when doing full (N) inference."""
         raise NotImplementedError
     def update_gradients_diag(self, dL_dKdiag, X):
         """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
         raise NotImplementedError
-    
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
         """
         Set the gradients of all parameters when doing inference with
@@ -188,7 +188,7 @@ class Kern(Parameterized):
 class CombinationKernel(Kern):
     def __init__(self, kernels, name):
         assert all([isinstance(k, Kern) for k in kernels])
-        input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels))
+        input_dim = reduce(np.union1d, (x.active_dims for x in kernels))
         super(CombinationKernel, self).__init__(input_dim, name)
         self.add_parameters(*kernels)
 
@@ -196,12 +196,6 @@ class CombinationKernel(Kern):
     def parts(self):
         return self._parameters_
 
-    def update_gradients_full(self, dL_dK, X, X2=None):
-        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
-
-    def update_gradients_diag(self, dL_dK, X):
-        [p.update_gradients_diag(dL_dK, X) for p in self.parts]
-
     def input_sensitivity(self):
         in_sen = np.zeros((self.num_params, self.input_dim))
         for i, p in enumerate(self.parts):
diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py
index 60645d11..f2ac0124 100644
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@@ -147,7 +147,6 @@ class Linear(Kern):
             mu = variational_posterior.mean
             S = variational_posterior.variance
             mu2S = np.square(mu)+S
-            
             _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
             grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
                  np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
@@ -175,7 +174,7 @@ class Linear(Kern):
             mu = variational_posterior.mean
             S = variational_posterior.variance
             _, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
-            
+
             grad =  np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
                  np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
             
diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py
index 7ba1f35d..341d46a7 100644
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@@ -19,7 +19,6 @@ class RBF(Stationary):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
 
     """
-
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
         super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
         self.weave_options = {}
@@ -81,6 +80,8 @@ class RBF(Stationary):
 
             #contributions from psi0:
             self.variance.gradient = np.sum(dL_dpsi0)
+            if self._debug:
+                num_grad = self.lengthscale.gradient.copy()
             self.lengthscale.gradient = 0.
 
             #from psi1
@@ -100,6 +101,8 @@ class RBF(Stationary):
             else:
                 self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
 
+            if self._debug:
+                import ipdb;ipdb.set_trace()
             self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
 
         else:
@@ -150,6 +153,7 @@ class RBF(Stationary):
             grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
             grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
             grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
+
             #psi2
             grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
             grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index f344357c..387c92c6 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -89,3 +89,31 @@ class Bias(Static):
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
         self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
 
+class Fixed(Static):
+    def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
+        """
+        :param input_dim: the number of input dimensions
+        :type input_dim: int
+        :param variance: the variance of the kernel
+        :type variance: float
+        """
+        super(Bias, self).__init__(input_dim, variance, name)
+        self.fixed_K = covariance_matrix
+    def K(self, X, X2):
+        return self.variance * self.fixed_K
+
+    def Kdiag(self, X):
+        return self.variance * self.fixed_K.diag()
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
+
+    def update_gradients_diag(self, dL_dKdiag, X):
+        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
+
+    def psi2(self, Z, variational_posterior):
+        return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
+
+    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
+        self.variance.gradient = dL_dpsi0.sum()
+