diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 1e386c01..97afd1f0 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -9,7 +9,9 @@ from kern import CombinationKernel
 class Add(CombinationKernel):
     """
     Add given list of kernels together.
-    propagates gradients thorugh.
+    propagates gradients through.
+    
+    This kernel will take over the active dims of it's subkernels passed in.
     """
     def __init__(self, subkerns, name='add'):
         super(Add, self).__init__(subkerns, name)
diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py
index 81b57a25..aeb11fa3 100644
--- a/GPy/kern/_src/brownian.py
+++ b/GPy/kern/_src/brownian.py
@@ -17,9 +17,9 @@ class Brownian(Kern):
     :param variance:
     :type variance: float
     """
-    def __init__(self, input_dim=1, variance=1., name='Brownian'):
+    def __init__(self, input_dim=1, variance=1., active_dims=None, name='Brownian'):
         assert input_dim==1, "Brownian motion in 1D only"
-        super(Brownian, self).__init__(input_dim, name)
+        super(Brownian, self).__init__(input_dim, active_dims, name)
 
         self.variance = Param('variance', variance, Logexp())
         self.add_parameters(self.variance)
diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index 3503bbd6..7eccff3d 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -34,8 +34,8 @@ class Coregionalize(Kern):
 
     .. note: see coregionalization examples in GPy.examples.regression for some usage.
     """
-    def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, name='coregion'):
-        super(Coregionalize, self).__init__(input_dim, name=name)
+    def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
+        super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
         self.output_dim = output_dim
         self.rank = rank
         if self.rank>output_dim:
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index dc6eceb4..cb38416c 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -16,26 +16,24 @@ class Kern(Parameterized):
     __metaclass__ = KernCallsViaSlicerMeta
     #===========================================================================
     _debug=False
-    def __init__(self, input_dim, name, *a, **kw):
+    def __init__(self, input_dim, active_dims, name, *a, **kw):
         """
         The base class for a kernel: a positive definite function
         which forms of a covariance function (kernel).
 
-        :param input_dim: the number of input dimensions to the function
-        :type input_dim: int
+        :param int input_dim: the number of input dimensions to the function
+        :param array-like|slice active_dims: list of indices on which dimensions this kernel works on
 
         Do not instantiate.
         """
         super(Kern, self).__init__(name=name, *a, **kw)
-        if isinstance(input_dim, int):
-            self.active_dims = np.r_[0:input_dim]
-            self.input_dim = input_dim
-        else:
-            self.active_dims = np.r_[input_dim]
-            self.input_dim = len(self.active_dims)
+        self.active_dims = active_dims or slice(0, input_dim)
+        self.input_dim = input_dim
+        assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
+        assert self.active_dims.size == self.input_dim, "input_dim {} does not match len(active_dim) {}".format(self.input_dim, self.active_dims.size)
         self._sliced_X = 0
 
-    @Cache_this(limit=10)#, ignore_args = (0,))
+    @Cache_this(limit=10)
     def _slice_X(self, X):
         return X[:, self.active_dims]
 
@@ -69,9 +67,7 @@ class Kern(Parameterized):
     def update_gradients_full(self, dL_dK, X, X2):
         """Set the gradients of all parameters when doing full (N) inference."""
         raise NotImplementedError
-    def update_gradients_diag(self, dL_dKdiag, X):
-        """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
-        raise NotImplementedError
+
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
         """
         Set the gradients of all parameters when doing inference with
@@ -193,13 +189,29 @@ class Kern(Parameterized):
         super(Kern, self)._setstate(state)
 
 class CombinationKernel(Kern):
-    def __init__(self, kernels, name):
+    """
+    Abstract super class for combination kernels.
+    A combination kernel combines (a list of) kernels and works on those.
+    Examples are the HierarchicalKernel or Add and Prod kernels.
+    """
+    def __init__(self, kernels, name, extra_dims=[]):
+        """
+        Abstract super class for combination kernels.
+        A combination kernel combines (a list of) kernels and works on those.
+        Examples are the HierarchicalKernel or Add and Prod kernels.
+
+        :param list kernels: List of kernels to combine (can be only one element)
+        :param str name: name of the combination kernel
+        :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
+        """
         assert all([isinstance(k, Kern) for k in kernels])
+        import itertools
         # make sure the active dimensions of all underlying kernels are covered:
-        ma = reduce(lambda a,b: max(a, max(b)), (x.active_dims for x in kernels), 0)
+        ma = reduce(lambda a,b: max(a, b.stop if isinstance(b, slice) else max(b)), itertools.chain((x.active_dims for x in kernels), [extra_dims]), 0)
         input_dim = np.r_[0:ma+1]
         # initialize the kernel with the full input_dim
         super(CombinationKernel, self).__init__(input_dim, name)
+        self.extra_dims = extra_dims
         self.add_parameters(*kernels)
 
     @property
diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py
index f2ac0124..15e23d5c 100644
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@@ -34,8 +34,8 @@ class Linear(Kern):
 
     """
 
-    def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
-        super(Linear, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='linear'):
+        super(Linear, self).__init__(input_dim, active_dims, name)
         self.ARD = ARD
         if not ARD:
             if variances is not None:
diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index ee15d967..0b561d4b 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -31,8 +31,8 @@ class MLP(Kern):
 
     """
 
-    def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., name='mlp'):
-        super(MLP, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
+        super(MLP, self).__init__(input_dim, active_dims, name)
         self.variance = Param('variance', variance, Logexp())
         self.weight_variance = Param('weight_variance', weight_variance, Logexp())
         self.bias_variance = Param('bias_variance', bias_variance, Logexp())
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 6b423a57..a8573a05 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -10,7 +10,7 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
 
 class Periodic(Kern):
-    def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, name):
+    def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
         """
         :type input_dim: int
         :param variance: the variance of the Matern kernel
@@ -25,7 +25,7 @@ class Periodic(Kern):
         """
 
         assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
-        super(Periodic, self).__init__(input_dim, name)
+        super(Periodic, self).__init__(input_dim, active_dims, name)
         self.input_dim = input_dim
         self.lower,self.upper = lower, upper
         self.n_freq = n_freq
@@ -77,8 +77,8 @@ class PeriodicExponential(Periodic):
     Only defined for input_dim=1.
     """
 
-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_exponential'):
-        super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_exponential'):
+        super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
 
     def parameters_changed(self):
         self.a = [1./self.lengthscale, 1.]
@@ -187,8 +187,8 @@ class PeriodicMatern32(Periodic):
 
     """
 
-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern32'):
-        super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern32'):
+        super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
     def parameters_changed(self):
         self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
         self.b = [1,self.lengthscale**2/3]
@@ -300,8 +300,8 @@ class PeriodicMatern52(Periodic):
 
     """
 
-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern52'):
-        super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern52'):
+        super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
 
     def parameters_changed(self):
         self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py
index 341d46a7..c2877d06 100644
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@@ -19,8 +19,8 @@ class RBF(Stationary):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
 
     """
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
-        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
+        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
         self.weave_options = {}
 
     def K_of_r(self, r):
diff --git a/GPy/kern/_src/ssrbf.py b/GPy/kern/_src/ssrbf.py
index c566c414..bf87bf76 100644
--- a/GPy/kern/_src/ssrbf.py
+++ b/GPy/kern/_src/ssrbf.py
@@ -33,9 +33,9 @@ class SSRBF(Stationary):
     .. Note: this object implements both the ARD and 'spherical' version of the function
     """
 
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, name='SSRBF'):
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, active_dims=None, name='SSRBF'):
         assert ARD==True, "Not Implemented!"
-        super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
+        super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
         
     def K_of_r(self, r):
         return self.variance * np.exp(-0.5 * r**2)
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index 387c92c6..4c9d943c 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -9,7 +9,7 @@ from ...core.parameterization.transformations import Logexp
 import numpy as np
 
 class Static(Kern):
-    def __init__(self, input_dim, variance, name):
+    def __init__(self, input_dim, variance, active_dims, name):
         super(Static, self).__init__(input_dim, name)
         self.variance = Param('variance', variance, Logexp())
         self.add_parameters(self.variance)
@@ -43,8 +43,8 @@ class Static(Kern):
 
 
 class White(Static):
-    def __init__(self, input_dim, variance=1., name='white'):
-        super(White, self).__init__(input_dim, variance, name)
+    def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
+        super(White, self).__init__(input_dim, variance, active_dims, name)
 
     def K(self, X, X2=None):
         if X2 is None:
@@ -66,8 +66,8 @@ class White(Static):
 
 
 class Bias(Static):
-    def __init__(self, input_dim, variance=1., name='bias'):
-        super(Bias, self).__init__(input_dim, variance, name)
+    def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
+        super(Bias, self).__init__(input_dim, variance, active_dims, name)
 
     def K(self, X, X2=None):
         shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
@@ -90,14 +90,14 @@ class Bias(Static):
         self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
 
 class Fixed(Static):
-    def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
+    def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
         """
         :param input_dim: the number of input dimensions
         :type input_dim: int
         :param variance: the variance of the kernel
         :type variance: float
         """
-        super(Bias, self).__init__(input_dim, variance, name)
+        super(Bias, self).__init__(input_dim, variance, active_dims, name)
         self.fixed_K = covariance_matrix
     def K(self, X, X2):
         return self.variance * self.fixed_K
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 725f8660..df7ba058 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -41,8 +41,8 @@ class Stationary(Kern):
           
     """
     
-    def __init__(self, input_dim, variance, lengthscale, ARD, name):
-        super(Stationary, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
+        super(Stationary, self).__init__(input_dim, active_dims, name)
         self.ARD = ARD
         if not ARD:
             if lengthscale is None:
@@ -186,8 +186,8 @@ class Stationary(Kern):
         return np.ones(self.input_dim)/self.lengthscale
 
 class Exponential(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
-        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
+        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
     def K_of_r(self, r):
         return self.variance * np.exp(-0.5 * r)
@@ -205,8 +205,8 @@ class Matern32(Stationary):
 
     """
 
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
-        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
+        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
     def K_of_r(self, r):
         return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
@@ -249,8 +249,8 @@ class Matern52(Stationary):
 
        k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) 
        """
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
-        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
+        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
     def K_of_r(self, r):
         return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
@@ -291,8 +291,8 @@ class Matern52(Stationary):
 
 
 class ExpQuad(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
-        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
+        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
     def K_of_r(self, r):
         return self.variance * np.exp(-0.5 * r**2)
@@ -301,8 +301,8 @@ class ExpQuad(Stationary):
         return -r*self.K_of_r(r)
 
 class Cosine(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'):
-        super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
+        super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
     def K_of_r(self, r):
         return self.variance * np.cos(r)
@@ -322,8 +322,8 @@ class RatQuad(Stationary):
     """
 
 
-    def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, name='ExpQuad'):
-        super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='ExpQuad'):
+        super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
         self.power = Param('power', power, Logexp())
         self.add_parameters(self.power)
 
diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py
index 3f6b5445..6f066e98 100644
--- a/GPy/kern/_src/sympykern.py
+++ b/GPy/kern/_src/sympykern.py
@@ -26,13 +26,13 @@ class Sympykern(Kern):
      - to handle multiple inputs, call them x_1, z_1, etc
      - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
     """
-    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
+    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):
 
         if name is None:
             name='sympykern'
         if k is None:
             raise ValueError, "You must provide an argument for the covariance function."
-        super(Sympykern, self).__init__(input_dim, name)
+        super(Sympykern, self).__init__(input_dim, active_dims, name)
 
         self._sp_k = k