diff --git a/GPy/kern/src/basis_funcs.py b/GPy/kern/src/basis_funcs.py
index 569a12f1..809a9d3c 100644
--- a/GPy/kern/src/basis_funcs.py
+++ b/GPy/kern/src/basis_funcs.py
@@ -8,14 +8,15 @@ from paramz.caching import Cache_this
 from ...util.linalg import tdot, mdot
 
 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel', _warn_input_dim=True):
         """
         Abstract superclass for kernels with explicit basis functions for use in GPy.
 
         This class does NOT automatically add an offset to the design matrix phi!
         """
         super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
-        assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
+        if _warn_input_dim:
+            assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
         self.ARD = ARD
         if self.ARD:
             phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
@@ -42,6 +43,27 @@ class BasisFuncKernel(Kern):
     def Kdiag(self, X, X2=None):
         return np.diag(self._K(X, X2))
 
+    def d_K_d_theta_through_K(self, phi1, dL_dK, dphi1_d_theta, phi2=None, dphi2_d_theta=None, ARD=True):
+        """
+        Helper to push inner parameter gradients through to K.
+        
+        Give phi(X) and the gradient of phi(X) wrt the parameter theta.
+        
+        Remember to set the gradient of the respective theta yourself! 
+        """
+        if phi2 is None or phi2 is phi1:
+            if ARD:
+                gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_d_theta)
+            else:
+                gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_d_theta.T)).sum())
+        else:
+            if ARD:
+                gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_d_theta) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_d_theta))
+            else:
+                gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_d_theta.T)).sum() + (dL_dK * phi2.dot(dphi1_d_theta.T)).sum())
+        return np.where(np.isnan(gradient), 0, gradient)
+    
+
     def update_gradients_full(self, dL_dK, X, X2=None):
         if self.ARD:
             phi1 = self.phi(X)
@@ -162,7 +184,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
 
 class DomainKernel(LinearSlopeBasisFuncKernel):
     """
-    Create a constant plateou of correlation between start and stop and zero
+    Create a constant plateau of correlation between start and stop and zero
     elsewhere. This is a constant shift of the outputs along the yaxis
     in the range from start to stop.
     """
@@ -202,25 +224,17 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
 
     def update_gradients_full(self, dL_dK, X, X2=None):
         super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
-        if X2 is None or X is X2:
-            phi1 = self.phi(X)
-            if phi1.ndim != 2:
-                phi1 = phi1[:, None]
-            dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
-            if self.ARD_slope:
-                self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
-            else:
-                self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum())
-        else:
-            phi1 = self.phi(X)
+        
+        phi1 = self.phi(X)
+        if phi1.ndim != 2:
+            phi1 = phi1[:, None]
+        dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
+        
+        phi2 = dphi2_dl = None
+        if not(X2 is None or X is X2):
             phi2 = self.phi(X2)
-            if phi1.ndim != 2:
-                phi1 = phi1[:, None]
+            if phi2.ndim != 2:
                 phi2 = phi2[:, None]
-            dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
             dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
-            if self.ARD_slope:
-                self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
-            else:
-                self.slope.gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum())
-        self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)
+            
+        self.slope.gradient = self.d_K_d_theta_through_K(phi1, dL_dK, dphi1_dl, phi2, dphi2_dl, ARD=self.ARD_slope)