diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
index 1b300661..af4a7188 100644
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@@ -5,33 +5,59 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
 from ...util.caching import Cache_this
-from ...util.linalg import tdot
+from ...util.linalg import tdot, mdot
 
 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
         """
         Abstract superclass for kernels with explicit basis functions for use in GPy.
         
         This class does NOT automatically add an offset to the design matrix phi!
         """
         super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
+        self.ARD = ARD
+        if self.ARD:
+            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
+            variance = variance * np.ones(phi_test.shape[1])
+        else:
+            variance = np.array(variance)
         self.variance = Param('variance', variance, Logexp())
         self.link_parameter(self.variance)
     
+    def parameters_changed(self):
+        self.alpha = np.sqrt(self.variance)
+        #self.beta = 1./self.variance
+    
+    @Cache_this(limit=3, ignore_args=())
     def phi(self, X):
-        raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi')
+        return self._phi(X)
+
+    def _phi(self, X):
+        raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
         
     def K(self, X, X2=None):
-        return self.variance * self._K(X, X2)
+        return self._K(X, X2)
 
     def Kdiag(self, X, X2=None):
-        return self.variance * np.diag(self._K(X, X2))
+        return np.diag(self._K(X, X2))
     
     def update_gradients_full(self, dL_dK, X, X2=None):
-        self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+        if self.ARD:
+            phi1 = self.phi(X)
+            if X2 is None or X is X2:
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
+            else:
+                phi2 = self.phi(X2)
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
+        else:
+            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
         
     def update_gradients_diag(self, dL_dKdiag, X):
-        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X))
+        if self.ARD:
+            phi1 = self.phi(X)
+            self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
+        else:
+            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X))
         
     def concatenate_offset(self, X):
         return np.c_[np.ones((X.shape[0], 1)), X]
@@ -52,19 +78,19 @@ class BasisFuncKernel(Kern):
                 posterior = self._highest_parent_.posterior
             except NameError:
                 raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
-        phi = self.phi(X)
-        return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi)))
+        phi_alpha = self.phi(X) * self.variance
+        return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
     
     @Cache_this(limit=3, ignore_args=())
     def _K(self, X, X2):
         if X2 is None or X is X2:
-            phi = self.phi(X)
+            phi = self.phi(X) * self.alpha
             if phi.ndim != 2:
                 phi = phi[:, None]
             return tdot(phi)
         else:
-            phi1 = self.phi(X)
-            phi2 = self.phi(X2)
+            phi1 = self.phi(X) * self.alpha
+            phi2 = self.phi(X2) * self.alpha
             if phi1.ndim != 2:
                 phi1 = phi1[:, None]
                 phi2 = phi2[:, None]
@@ -72,30 +98,43 @@ class BasisFuncKernel(Kern):
         
         
 class LinearSlopeBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'):
-        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
+        """
+        A linear segment transformation. The segments start at start, \
+        are then linear to stop and constant again. The segments are 
+        normalized, so that they have exactly as much mass above 
+        as below the origin. 
+        
+        Start and stop can be tuples or lists of starts and stops. 
+        Behaviour of start stop is as np.where(X<start) would do.
+        """
+        
         self.start = np.array(start)
         self.stop = np.array(stop)
+        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
     
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         phi = np.where(X < self.start, self.start, X)
         phi = np.where(phi > self.stop, self.stop, phi)
         return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
     
 class ChangePointBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'):
-        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
         self.changepoint = changepoint
+        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
     
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))
 
 class DomainKernel(LinearSlopeBasisFuncKernel):
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
+        super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
+    
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
         return phi#((phi-self.start)/(self.stop-self.start))-.5
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 23818007..014909ef 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -68,8 +68,6 @@ class Periodic(Kern):
         return np.diag(self.K(X))
 
 
-
-
 class PeriodicExponential(Periodic):
     """
     Kernel of the periodic subspace (up to a given frequency) of a exponential