diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index c1911e75..8a31968e 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -176,7 +176,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
     if plot:
         y = m.Y
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
-        m.plot_latent(ax=latent_axes)
+        m.plot_latent(ax=latent_axes, labels=m.data_labels)
         data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
         lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
             m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index bacc87e3..252bc095 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -20,7 +20,7 @@ except ImportError:
     sympy_available=False
 
 if sympy_available:
-    from _src.symbolic2 import Symbolic
+    from _src.symbolic import Symbolic
     from _src.eq import Eq
     from _src.heat_eqinit import Heat_eqinit
     #from _src.ode1_eq_lfm import Ode1_eq_lfm
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index fb0e114b..88b8e40c 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -167,4 +167,10 @@ class Add(CombinationKernel):
         else:
             self.add_parameter(other)
         self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
-        return self
\ No newline at end of file
+        return self
+
+    def input_sensitivity(self):
+        in_sen = np.zeros(self.input_dim)
+        for i, p in enumerate(self.parts):
+            in_sen[p.active_dims] += p.input_sensitivity()
+        return in_sen
diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index 4a9671aa..ce711f6b 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -32,20 +32,21 @@ def index_to_slices(index):
     [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
     return ret
 
-class IndependentOutputs(CombinationKernel):
+class IndependentOutputs(Kern):
     """
-    A kernel which can represent several independent functions.
-    this kernel 'switches off' parts of the matrix where the output indexes are different.
+    A kernel which can represent several independent functions.  this kernel
+    'switches off' parts of the matrix where the output indexes are different.
 
-    The index of the functions is given by the last column in the input X
-    the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
-    
-    :param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels 
-    the indices in the index_dim, index the kernels you gave!
+    The index of the functions is given by the last column in the input X the
+    rest of the columns of X are passed to the underlying kernel for
+    computation (in blocks).
+
+    :param kernels: either a kernel, or list of kernels to work with. If it is
+    a list of kernels the indices in the index_dim, index the kernels you gave!
     """
     def __init__(self, kernels, index_dim=-1, name='independ'):
-        assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
-        if not isinstance(kernels, list): 
+        assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the index"
+        if not isinstance(kernels, list):
             self.single_kern = True
             self.kern = kernels
             kernels = [kernels]
@@ -142,38 +143,41 @@ class IndependentOutputs(CombinationKernel):
         if self.single_kern: kern.gradient = target
         else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
 
-class Hierarchical(CombinationKernel):
+class Hierarchical(Kern):
     """
-    A kernel which can reopresent a simple hierarchical model.
+    A kernel which can represent a simple hierarchical model.
 
     See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
     series across irregularly sampled replicates and clusters"
     http://www.biomedcentral.com/1471-2105/14/252
 
-    The index of the functions is given by additional columns in the input X.
+    To construct this kernel, you must pass a list of kernels. the first kernel
+    will be assumed to be the 'base' kernel, and will be computed everywhere.
+    For every additional kernel, we assume another layer in the hierachy, with
+    a corresponding column of the input matrix which indexes which function the
+    data are in at that level.
 
+    For more, see the ipython notebook documentation on Hierarchical
+    covariances.
     """
-    def __init__(self, kern, name='hierarchy'):
-        assert all([k.input_dim==kerns[0].input_dim for k in kerns])
-        super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
-        kerns = kerns
-        self.add_parameters(kerns)
+    def __init__(self, kernels, name='hierarchy'):
+        assert all([k.input_dim==kernels[0].input_dim for k in kernels])
+        assert len(kernels) > 1
+        self.levels = len(kernels) -1
+        input_max = max([k.input_dim for k in kernels])
+        super(Hierarchical, self).__init__(kernels=kernels, extra_dims = range(input_max, input_max + len(kernels)-1), name=name)
 
     def K(self,X ,X2=None):
-        X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
-        K = kerns[0].K(X, X2)
+        K = self.parts[0].K(X, X2) # compute 'base' kern everywhere
+        slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
         if X2 is None:
-            [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
+            pass
+            #[[[np.add(K[s,s], k.K(X[s], None), K[s, s]) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.parts[1:], slices)]
+            #[[[K.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.parts[1:], slices)]
         else:
             X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-            [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)]
-        return target
-
-    def Kdiag(self,X):
-        X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
-        K = kerns[0].K(X, X2)
-        [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
-        return target
+            [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(parts[1:], slices, slices2)]
+        return K
 
     def update_gradients_full(self,dL_dK,X,X2=None):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index 6daff739..70bd42b9 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -201,6 +201,13 @@ class Kern(Parameterized):
         #else: kernels.append(other)
         return Prod([self, other], name)
 
+    def _check_input_dim(self, X):
+        assert X.shape[1] == self.input_dim, "You did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(X.shape[1], self.input_dim)
+    
+    def _check_active_dims(self, X):
+        assert X.shape[1] >= len(np.r_[self.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.active_dims]), X.shape)
+
+            
 class CombinationKernel(Kern):
     """
     Abstract super class for combination kernels.
@@ -238,7 +245,11 @@ class CombinationKernel(Kern):
         return input_dim, active_dims
 
     def input_sensitivity(self):
-        in_sen = np.zeros((self.num_params, self.input_dim))
-        for i, p in enumerate(self.parts):
-            in_sen[i, p.active_dims] = p.input_sensitivity()
-        return in_sen
+        raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
+
+    def _check_input_dim(self, X):
+        return
+
+    def _check_input_dim(self, X):
+        # As combination kernels cannot always know, what their inner kernels have as input dims, the check will be done inside them, respectively
+        return
diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py
index 10dbacee..c1c8d7f1 100644
--- a/GPy/kern/_src/kernel_slice_operations.py
+++ b/GPy/kern/_src/kernel_slice_operations.py
@@ -37,11 +37,12 @@ class _Slice_wrap(object):
         if X2 is not None:
             assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
         if (self.k.active_dims is not None) and (self.k._sliced_X == 0):
-            assert X.shape[1] >= len(np.r_[self.k.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.k.active_dims]), X.shape)
+            self.k._check_active_dims(X)
             self.X = self.k._slice_X(X)
             self.X2 = self.k._slice_X(X2) if X2 is not None else X2
             self.ret = True
         else:
+            self.k._check_input_dim(X)
             self.X = X
             self.X2 = X2
             self.ret = False
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index cf457633..89d36a7d 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -131,10 +131,10 @@ class lvm(matplotlib_show):
 
     def modify(self, vals):
         """When latent values are modified update the latent representation and ulso update the output visualization."""
-        self.vals = vals[None,:].copy()
+        self.vals = vals.copy()
         y = self.model.predict(self.vals)[0]
         self.data_visualize.modify(y)
-        self.latent_handle.set_data(self.vals[:,self.latent_index[0]], self.vals[:,self.latent_index[1]])
+        self.latent_handle.set_data(self.vals[0,self.latent_index[0]], self.vals[0,self.latent_index[1]])
         self.axes.figure.canvas.draw()
 
 
@@ -153,8 +153,8 @@ class lvm(matplotlib_show):
         if event.inaxes!=self.latent_axes: return
         if self.called and self.move_on:
             # Call modify code on move
-            self.latent_values[self.latent_index[0]]=event.xdata
-            self.latent_values[self.latent_index[1]]=event.ydata
+            self.latent_values[:, self.latent_index[0]]=event.xdata
+            self.latent_values[:, self.latent_index[1]]=event.ydata
             self.modify(self.latent_values)
 
     def show_sensitivities(self):