diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index aa9dca80..6f8b7be1 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -8,7 +8,7 @@ import itertools
 
 def index_to_slices(index):
     """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index.
 
     e.g.
     >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@@ -79,10 +79,10 @@ class IndependentOutputs(CombinationKernel):
 
     def update_gradients_full(self,dL_dK,X,X2=None):
         slices = index_to_slices(X[:,self.index_dim])
-        if self.single_kern: 
+        if self.single_kern:
             target = np.zeros(self.kern.size)
             kerns = itertools.repeat(self.kern)
-        else: 
+        else:
             kerns = self.kern
             target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
         def collate_grads(kern, i, dL, X, X2):
@@ -94,7 +94,7 @@ class IndependentOutputs(CombinationKernel):
         else:
             slices2 = index_to_slices(X2[:,self.index_dim])
             [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
-        if self.single_kern: 
+        if self.single_kern:
             self.kern.gradient = target
         else:
             [kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
@@ -104,12 +104,14 @@ class IndependentOutputs(CombinationKernel):
         kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
         if X2 is None:
             # TODO: make use of index_to_slices
+            # FIXME: Broken as X is already sliced out
+            print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
             values = np.unique(X[:,self.index_dim])
             slices = [X[:,self.index_dim]==i for i in values]
             [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
               for kern, s in zip(kerns, slices)]
             #slices = index_to_slices(X[:,self.index_dim])
-            #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) 
+            #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
             #  for s in slices_i] for kern, slices_i in zip(kerns, slices)]
             #import ipdb;ipdb.set_trace()
             #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 7cbd69eb..e0f6c0bc 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
     def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
                  Z=None, kernel=None, inference_method=None, likelihood=None,
                  name='bayesian gplvm', mpi_comm=None, normalizer=None,
-                 missing_data=False, stochastic=False, batchsize=1):
+                 missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
 
         self.logger = logging.getLogger(self.__class__.__name__)
         if X is None:
@@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
                                            name=name, inference_method=inference_method,
                                            normalizer=normalizer, mpi_comm=mpi_comm,
                                            variational_prior=self.variational_prior,
+                                           Y_metadata=None
                                            )
         self.link_parameter(self.X, index=0)
 
@@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
     def parameters_changed(self):
         super(BayesianGPLVM,self).parameters_changed()
         if isinstance(self.inference_method, VarDTC_minibatch):
-            return        
+            return
 
         kl_fctr = 1.
         self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py
index 74026f8e..c2cde834 100644
--- a/GPy/models/gradient_checker.py
+++ b/GPy/models/gradient_checker.py
@@ -5,6 +5,8 @@ from ..core.model import Model
 import itertools
 import numpy
 from ..core.parameterization import Param
+np = numpy
+from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
 
 def get_shape(x):
     if isinstance(x, numpy.ndarray):
@@ -111,3 +113,261 @@ class GradientChecker(Model):
         #for name, shape in zip(self.names, self.shapes):
             #_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
         #return _param_names
+
+
+class HessianChecker(GradientChecker):
+
+    def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
+        """
+        :param f: Function (only used for numerical hessian gradient)
+        :param df: Gradient of function to check
+        :param ddf: Analytical gradient function
+        :param x0:
+            Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
+            Can be a list of arrays, if takes a list of arrays. This list will be passed
+            to f and df in the same order as given here.
+            If only one argument, make sure not to pass a list!!!
+
+        :type x0: [array-like] | array-like | float | int
+        :param names:
+            Names to print, when performing gradcheck. If a list was passed to x0
+            a list of names with the same length is expected.
+        :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
+
+        """
+        super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
+        self._f = f
+        self._df = df
+        self._ddf = ddf
+
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
+        """
+        Overwrite checkgrad method to check whole block instead of looping through
+
+        Shows diagnostics using matshow instead
+
+        :param verbose: If True, print a "full" checking of each parameter
+        :type verbose: bool
+        :param step: The size of the step around which to linearise the objective
+        :type step: float (default 1e-6)
+        :param tolerance: the tolerance allowed (see note)
+        :type tolerance: float (default 1e-3)
+
+        Note:-
+           The gradient is considered correct if the ratio of the analytical
+           and numerical gradients is within <tolerance> of unity.
+        """
+        try:
+            import numdifftools as nd
+        except:
+            raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
+
+        if target_param:
+            raise NotImplementedError('Only basic functionality is provided with this gradchecker')
+
+        #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
+        #variables
+        current_index = 0
+        for name, shape in zip(self.names, self.shapes):
+            current_size = numpy.prod(shape)
+            x = self.optimizer_array.copy()
+            #x = self._get_params_transformed().copy()
+            x = x[current_index:current_index + current_size].reshape(shape)
+
+            # Check gradients
+            analytic_hess = self._ddf(x)
+            if analytic_hess.shape[1] == 1:
+                analytic_hess = numpy.diagflat(analytic_hess)
+
+            #From the docs:
+            #x0 : vector location
+            #at which to differentiate fun
+            #If x0 is an N x M array, then fun is assumed to be a function
+            #of N*M variables., thus we must have it flat, not (N,1), but just (N,)
+            #numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
+            numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
+            #numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
+            numeric_hess = numeric_hess_partial(x)
+
+            check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
+            current_index += current_size
+        return check_passed
+
+    def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
+        """
+        Checkgrad a block matrix
+        """
+        if analytic_hess.dtype is np.dtype('object'):
+            #Make numeric hessian also into a block matrix
+            real_size = get_block_shapes(analytic_hess)
+            num_elements = np.sum(real_size)
+            if (num_elements, num_elements) == numeric_hess.shape:
+                #If the sizes are the same we assume they are the same
+                #(we have not fixed any values so the numeric is the whole hessian)
+                numeric_hess = get_blocks(numeric_hess, real_size)
+            else:
+                #Make a fake empty matrix and fill out the correct block
+                tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
+                tmp_numeric_hess[block_indices] = numeric_hess.copy()
+                numeric_hess = tmp_numeric_hess
+
+        if block_indices is not None:
+            #Extract the right block
+            analytic_hess = analytic_hess[block_indices]
+            numeric_hess = numeric_hess[block_indices]
+        else:
+            #Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
+            if analytic_hess.dtype is np.dtype('object'):
+                analytic_hess = unblock(analytic_hess)
+                numeric_hess = unblock(numeric_hess)
+
+        ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
+        difference = numpy.abs(analytic_hess - numeric_hess)
+
+        check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
+
+        if verbose:
+            if block_indices:
+                print "\nBlock {}".format(block_indices)
+            else:
+                print "\nAll blocks"
+
+            header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
+            header_string = map(lambda x: ' | '.join(header), [header])
+            separator = '-' * len(header_string[0])
+            print '\n'.join([header_string[0], separator])
+            min_r = '%.6f' % float(numpy.min(ratio))
+            max_r = '%.6f' % float(numpy.max(ratio))
+            max_d = '%.6f' % float(numpy.max(difference))
+            min_d = '%.6f' % float(numpy.min(difference))
+            cols = [max_r, min_r, min_d, max_d]
+
+            if check_passed:
+                checked = "\033[92m  True \033[0m"
+            else:
+                checked = "\033[91m  False \033[0m"
+
+            grad_string = "{} | {}  | {} |    {}    |   {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
+            print grad_string
+
+            if plot:
+                import pylab as pb
+                fig, axes = pb.subplots(2, 2)
+                max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
+                min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
+                msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
+                axes[0,0].set_title('Analytic hessian')
+                axes[0,0].xaxis.set_ticklabels([None])
+                axes[0,0].yaxis.set_ticklabels([None])
+                axes[0,0].xaxis.set_ticks([None])
+                axes[0,0].yaxis.set_ticks([None])
+                msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
+                pb.colorbar(msn, ax=axes[0,1])
+                axes[0,1].set_title('Numeric hessian')
+                axes[0,1].xaxis.set_ticklabels([None])
+                axes[0,1].yaxis.set_ticklabels([None])
+                axes[0,1].xaxis.set_ticks([None])
+                axes[0,1].yaxis.set_ticks([None])
+                msr = axes[1,0].matshow(ratio)
+                pb.colorbar(msr, ax=axes[1,0])
+                axes[1,0].set_title('Ratio')
+                axes[1,0].xaxis.set_ticklabels([None])
+                axes[1,0].yaxis.set_ticklabels([None])
+                axes[1,0].xaxis.set_ticks([None])
+                axes[1,0].yaxis.set_ticks([None])
+                msd = axes[1,1].matshow(difference)
+                pb.colorbar(msd, ax=axes[1,1])
+                axes[1,1].set_title('difference')
+                axes[1,1].xaxis.set_ticklabels([None])
+                axes[1,1].yaxis.set_ticklabels([None])
+                axes[1,1].xaxis.set_ticks([None])
+                axes[1,1].yaxis.set_ticks([None])
+                if block_indices:
+                    fig.suptitle("Block: {}".format(block_indices))
+                pb.show()
+
+        return check_passed
+
+class SkewChecker(HessianChecker):
+
+    def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
+        """
+        :param df: gradient of function
+        :param ddf: Gradient of function to check (hessian)
+        :param dddf: Analytical gradient function (third derivative)
+        :param x0:
+            Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
+            Can be a list of arrays, if takes a list of arrays. This list will be passed
+            to f and df in the same order as given here.
+            If only one argument, make sure not to pass a list!!!
+
+        :type x0: [array-like] | array-like | float | int
+        :param names:
+            Names to print, when performing gradcheck. If a list was passed to x0
+            a list of names with the same length is expected.
+        :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
+
+        """
+        super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
+
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
+        """
+        Gradient checker that just checks each hessian individually
+
+        super_plot will plot the hessian wrt every parameter, plot will just do the first one
+        """
+        try:
+            import numdifftools as nd
+        except:
+            raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
+
+        if target_param:
+            raise NotImplementedError('Only basic functionality is provided with this gradchecker')
+
+        #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
+        #variables
+        current_index = 0
+        for name, n_shape in zip(self.names, self.shapes):
+            current_size = numpy.prod(n_shape)
+            x = self.optimizer_array.copy()
+            #x = self._get_params_transformed().copy()
+            x = x[current_index:current_index + current_size].reshape(n_shape)
+
+            # Check gradients
+            #Actually the third derivative
+            analytic_hess = self._ddf(x)
+
+            #Can only calculate jacobian for one variable at a time
+            #From the docs:
+            #x0 : vector location
+            #at which to differentiate fun
+            #If x0 is an N x M array, then fun is assumed to be a function
+            #of N*M variables., thus we must have it flat, not (N,1), but just (N,)
+            #numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
+            #Actually _df is already the hessian
+            numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
+            numeric_hess = numeric_hess_partial(x)
+
+            print "Done making numerical hessian"
+            if analytic_hess.dtype is np.dtype('object'):
+                #Blockify numeric_hess aswell
+                blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
+                #HACK
+                real_block_size = np.sum(blocksizes)
+                numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
+                #numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
+            else:
+                numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
+
+            #Check every block individually (for ease)
+            check_passed = [False]*numeric_hess.shape[2]
+            for block_ind in xrange(numeric_hess.shape[2]):
+                #Unless super_plot is set, just plot the first one
+                p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
+                if verbose:
+                    print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
+                check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
+
+            current_index += current_size
+        return np.all(check_passed)
+
diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py
index a047abc6..e1e04aaa 100644
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@@ -1,9 +1,37 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2014-2015, Alan Saul
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 
+def get_blocks_3d(A, blocksizes, pagesizes=None):
+    """
+    Given a 3d matrix, make a block matrix, where the first and second dimensions are blocked according
+    to blocksizes, and the pages are blocked using pagesizes
+    """
+    assert (A.shape[0]==A.shape[1]) and len(A.shape)==3, "can't blockify this non-square matrix, may need to use 2d version"
+    N = np.sum(blocksizes)
+    assert A.shape[0] == N, "bad blocksizes"
+    num_blocks = len(blocksizes)
+    if pagesizes == None:
+        #Assume each page of A should be its own dimension
+        pagesizes = range(A.shape[2])#[0]*A.shape[2]
+    num_pages = len(pagesizes)
+    B = np.empty(shape=(num_blocks, num_blocks, num_pages), dtype=np.object)
+    count_k = 0
+    #for Bk, k in enumerate(pagesizes):
+    for Bk in pagesizes:
+        count_i = 0
+        for Bi, i in enumerate(blocksizes):
+            count_j = 0
+            for Bj, j in enumerate(blocksizes):
+                #We want to have it count_k:count_k + k but its annoying as it makes a NxNx1 array is page sizes are set to 1
+                B[Bi, Bj, Bk] = A[count_i:count_i + i, count_j:count_j + j, Bk]
+                count_j += j
+            count_i += i
+        #count_k += k
+    return B
+
 def get_blocks(A, blocksizes):
-    assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix"
+    assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can't blockify this non-square matrix"
     N = np.sum(blocksizes)
     assert A.shape[0] == N, "bad blocksizes"
     num_blocks = len(blocksizes)
@@ -17,6 +45,11 @@ def get_blocks(A, blocksizes):
         count_i += i
     return B
 
+def get_block_shapes_3d(B):
+    assert B.dtype is np.dtype('object'), "Must be a block matrix"
+    #FIXME: This isn't general AT ALL...
+    return get_block_shapes(B[:,:,0]), B.shape[2]
+
 def get_block_shapes(B):
     assert B.dtype is np.dtype('object'), "Must be a block matrix"
     return [B[b,b].shape[0] for b in range(0, B.shape[0])]
@@ -35,7 +68,7 @@ def unblock(B):
         count_i += i
     return A
 
-def block_dot(A, B):
+def block_dot(A, B, diagonal=False):
     """
     Element wise dot product on block matricies
 
@@ -48,21 +81,30 @@ def block_dot(A, B):
     +-------------+   +------+------+    +-------+-------+
 
     ..Note
+        If any block of either (A or B) are stored as 1d vectors then we assume
+        that it denotes a diagonal matrix efficient dot product using numpy
+        broadcasting will be used, i.e. A11*B11
+
         If either (A or B) of the diagonal matrices are stored as vectors then a more
         efficient dot product using numpy broadcasting will be used, i.e. A11*B11
     """
     #Must have same number of blocks and be a block matrix
     assert A.dtype is np.dtype('object'), "Must be a block matrix"
     assert B.dtype is np.dtype('object'), "Must be a block matrix"
-    Ashape = A.shape
-    Bshape = B.shape
-    assert Ashape == Bshape
-    def f(A,B):
-        if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
-            #FIXME: Careful if one is transpose of other, would make a matrix
-            return A*B
+    assert A.shape == B.shape
+    def f(C,D):
+        """
+        C is an element of A, D is the associated element of B
+        """
+        Cshape = C.shape
+        Dshape = D.shape
+        if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
+                or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
+            print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
+            return C*D
         else:
-            return np.dot(A,B)
+            print "Dotting, C: {} C:{}".format(C.shape, D.shape)
+            return np.dot(C,D)
     dot = np.vectorize(f, otypes = [np.object])
     return dot(A,B)