diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index c72de182..1ae72556 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -58,6 +58,7 @@ class SparseGP(GP):
         if not self.Z.is_fixed:
             if self.X_variance is None:
                 self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
+                print self.Z.gradient
             else:
                 self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
                 print self.Z.gradient
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index acc69fd4..d5515d98 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -71,7 +71,7 @@ class Add(Kern):
 
 
     def psi0(self, Z, mu, S):
-        return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0)
+        return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
 
     def psi1(self, Z, mu, S):
         return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
@@ -93,7 +93,7 @@ class Add(Kern):
                 pass
             # rbf X bias
             #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
-            elif isinstance(p1,  Bias) and isinstance(p2, (RBF, Linear))):
+            elif isinstance(p1,  Bias) and isinstance(p2, (RBF, Linear)):
                 tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
                 psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
             #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index dd87200e..63c5b458 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -26,33 +26,15 @@ class Kern(Parameterized):
         raise NotImplementedError
     def Kdiag(self, Xa ,target):
         raise NotImplementedError
-    def _param_grad_helper(self, dL_dK,X, X2, target):
-        raise NotImplementedError
     def psi0(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target):
-        raise NotImplementedError
-    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
-        raise NotImplementedError
     def psi1(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi1_dtheta(self,Z,mu,S,target):
-        raise NotImplementedError
-    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
-        raise NotImplementedError
-    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
-        raise NotImplementedError
     def psi2(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
+    def gradients_X(self, dL_dK, X, X2):
         raise NotImplementedError
-    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
-        raise NotImplementedError
-    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
-        raise NotImplementedError
-    def gradients_X(self, dL_dK, X, X2, target):
-        raise NotImplementedError
-    def dKdiag_dX(self, dL_dK, X, target):
+    def gradients_X_diag(self, dL_dK, X):
         raise NotImplementedError
     def update_gradients_full(self, dL_dK, X):
         """Set the gradients of all parameters when doing full (N) inference."""
@@ -63,6 +45,10 @@ class Kern(Parameterized):
     def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
         """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
         raise NotImplementedError
+    def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
+        grad = self.gradients_X(dL_dKmm, Z)
+        grad += self.gradients_X(dL_dKnm.T, Z, X)
+        return grad
 
     def plot_ARD(self, *args):
         """If an ARD kernel is present, plot a bar representation using matplotlib