diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index be60b5f4..15fe9265 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -7,6 +7,7 @@ from matplotlib import pyplot as plt, pyplot
 
 import GPy
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
+from GPy.util.datasets import simulation_BGPLVM
 
 default_seed = np.random.seed(123344)
 
@@ -129,9 +130,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
     Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
 
-    Y1 += .3 * np.random.randn(*Y1.shape)
-    Y2 += .3 * np.random.randn(*Y2.shape)
-    Y3 += .3 * np.random.randn(*Y3.shape)
+    Y1 += .2 * np.random.randn(*Y1.shape)
+    Y2 += .2 * np.random.randn(*Y2.shape)
+    Y3 += .2 * np.random.randn(*Y3.shape)
 
     Y1 -= Y1.mean(0)
     Y2 -= Y2.mean(0)
@@ -162,11 +163,31 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
 
     return slist, [S1, S2, S3], Ylist
 
+def bgplvm_simulation_matlab_compare():
+    sim_data = simulation_BGPLVM()
+    Y = sim_data['Y']
+    S = sim_data['S']
+    mu = sim_data['mu']
+    M, [_, Q] = 20, mu.shape
+
+    from GPy.models import mrd
+    from GPy import kern
+    reload(mrd); reload(kern)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
+                       # X=mu,
+                       # X_variance=S,
+                       _debug=True)
+    m.ensure_default_constraints()
+    m['noise'] = .01  # Y.var() / 100.
+    m['linear_variance'] = .01
+    return m
+
 def bgplvm_simulation(burnin='scg', plot_sim=False,
                       max_burnin=100, true_X=False,
                       do_opt=True,
                       max_f_eval=1000):
-    D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
+    D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
@@ -176,11 +197,13 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
 
     Y = Ylist[0]
 
-    k = kern.linear(Q, ARD=True) + kern.white(Q, .00001)  # + kern.bias(Q)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))  # + kern.bias(Q)
 #     k = kern.white(Q, .00001) + kern.bias(Q)
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
     # m.set('noise',)
     m.ensure_default_constraints()
+    m['noise'] = Y.var() / 100.
+    m['linear_variance'] = .001
 #     m.auto_scale_factor = True
 #     m.scale_factor = 1.
 
@@ -207,7 +230,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
 #     cstr = 'X_variance'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
 
-    m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
+    # m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
 
 #     cstr = "iip"
 #     m.unconstrain(cstr); m.constrain_fixed(cstr)
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 30488dc9..59b6bb15 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -11,6 +11,8 @@ from ..likelihoods import Gaussian
 from .. import kern
 from numpy.linalg.linalg import LinAlgError
 import itertools
+from matplotlib.colors import colorConverter
+from matplotlib.figure import SubplotParams
 
 class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
@@ -31,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             X = self.initialise_latent(init, Q, Y)
 
         if X_variance is None:
-            X_variance = np.ones_like(X) * 0.5
+            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
 
         if Z is None:
             Z = np.random.permutation(X.copy())[:M]
@@ -45,10 +47,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         self._debug = _debug
 
         if self._debug:
+            self.fcall = 0
             self._count = itertools.count()
             self._savedklll = []
             self._savedparams = []
-
+            self._savedgradients = []
+            self._savederrors = []
+            self._savedpsiKmm = []
         sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
 
     @property
@@ -88,6 +93,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             self.oldps = x
         except (LinAlgError, FloatingPointError, ZeroDivisionError):
             print "\rWARNING: Caught LinAlgError, continueing without setting            "
+            if self._debug:
+                self._savederrors.append(self.fcall)
 #             if save_count > 10:
 #                 raise
 #             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
@@ -121,12 +128,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #             kl = 5E4 + np.random.randn()
 
         if self._debug:
-            f_call = self._count.next()
-            self._savedklll.append([f_call, ll, kl])
-            if f_call % 1 == 0:
-                self._savedparams.append([f_call, self._get_params()])
-
-
+            self.f_call = self._count.next()
+            if self.f_call % 1 == 0:
+                self._savedklll.append([self.f_call, ll, kl])
+                self._savedparams.append([self.f_call, self._get_params()])
+                self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
+                self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
         # print "\nkl:", kl, "ll:", ll
         return ll - kl
 
@@ -212,16 +219,27 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         theta = x[start:]
         return X, X_v, Z, theta
 
+
+    def _debug_get_axis(self, figs):
+        if figs[-1].axes:
+            ax1 = figs[-1].axes[0]
+            ax1.cla()
+        else:
+            ax1 = figs[-1].add_subplot(111)
+        return ax1
+
     def _debug_plot(self):
         assert self._debug, "must enable _debug, to debug-plot"
         import pylab
-        from mpl_toolkits.mplot3d import Axes3D
-        fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
-        fig.clf()
+#         from mpl_toolkits.mplot3d import Axes3D
+        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4),
+                             tight_layout=True)]
+#         fig.clf()
 
         # log like
-        splotshape = (6, 4)
-        ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+#         splotshape = (6, 4)
+#         ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+        ax1 = self._debug_get_axis(figs)
         ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
                  ha='center', va='center')
         kllls = np.array(self._savedklll)
@@ -229,52 +247,141 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
         L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
 
-        drawn = dict(self._savedparams)
-        iters = np.array(drawn.keys())
+        param_dict = dict(self._savedparams)
+        gradient_dict = dict(self._savedgradients)
+        kmm_dict = dict(self._savedpsiKmm)
+        iters = np.array(param_dict.keys())
         self.showing = 0
 
-        ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+#         ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+        figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
+        ax2 = self._debug_get_axis(figs)
         ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
                  ha='center', va='center')
-        ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
+        ax3 = self._debug_get_axis(figs)
         ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
                  ha='center', va='center')
-        ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
+        ax4 = self._debug_get_axis(figs)
         ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
                  ha='center', va='center')
-        ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
+        ax5 = self._debug_get_axis(figs)
         ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
                  ha='center', va='center')
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
+        fig = figs[-1]
+        ax6 = fig.add_subplot(121)
+        ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
+                 ha='center', va='center')
+        ax7 = fig.add_subplot(122)
+        ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
+                 ha='center', va='center')
 
-        X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+        X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
+        Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
+#         Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
+
+        quiver_units = 'xy'
+        quiver_scale = 1
+        quiver_scale_units = 'xy'
         Xlatentplts = ax2.plot(X, ls="-", marker="x")
+        colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
+        Ulatent = np.zeros_like(X)
+        xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
+        Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
         Slatentplts = ax3.plot(S, ls="-", marker="x")
+        Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
+        xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
+        UZ = np.zeros_like(Z)
         Zplts = ax4.plot(Z, ls="-", marker="x")
-        thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
+        Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
+        xtheta = np.arange(len(theta))
+        Utheta = np.zeros_like(theta)
+        thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
+        thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale,
+                                  edgecolors=('k',), linewidths=[1])
+        pylab.setp(thetaplts, zorder=0)
+        pylab.setp(thetagrads, zorder=10)
         ax5.set_xticks(np.arange(len(theta)))
         ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
 
-        Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+        imkmm = ax6.imshow(kmm_dict[self.showing][0])
+        from mpl_toolkits.axes_grid1 import make_axes_locatable
+        divider = make_axes_locatable(ax6)
+        caxkmm = divider.append_axes("right", "5%", pad="1%")
+        cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
+
+        imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
+        divider = make_axes_locatable(ax7)
+        caxkmmdl = divider.append_axes("right", "5%", pad="1%")
+        cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
+
+#         Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+#                    loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+#                    borderaxespad=0, mode="expand")
+        ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
                    borderaxespad=0, mode="expand")
         Lleg = ax1.legend()
         Lleg.draggable()
-        ax1.add_artist(Qleg)
+#         ax1.add_artist(Qleg)
 
         indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
         indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
         indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
+        for err in self._savederrors:
+            ax1.plot(kllls[err, 0], kllls[err, 2], "*", c=KL.get_color())
+            ax1.plot(kllls[err, 0], kllls[err, 1] - kllls[err, 2], "*", c=LL.get_color())
+            ax1.plot(kllls[err, 0], kllls[err, 1], "*", c=L.get_color())
 
-        try:
-            pylab.draw()
-            pylab.tight_layout(box=(0, .1, 1, .9))
-        except:
-            pass
+#         try:
+#             for f in figs:
+#                 f.canvas.draw()
+#                 f.tight_layout(box=(0, .15, 1, .9))
+# #             pylab.draw()
+# #             pylab.tight_layout(box=(0, .1, 1, .9))
+#         except:
+#             pass
 
         # parameter changes
         # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
-        def onclick(event):
-            if event.inaxes is ax1 and event.button == 1:
+        button_options = [0, 0]  # [0]: clicked -- [1]: dragged
+
+        def update_plots(event):
+            if button_options[0] and not button_options[1]:
 #               event.button, event.x, event.y, event.xdata, event.ydata)
                 tmp = np.abs(iters - event.xdata)
                 closest_hit = iters[tmp == tmp.min()][0]
@@ -287,15 +394,37 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                     indicatorKL.set_data(self.showing, kllls[self.showing, 2])
                     indicatorL.set_data(self.showing, kllls[self.showing, 1])
 
-                    X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+                    X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
+                    Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
+#                     Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
+
                     for i, Xlatent in enumerate(Xlatentplts):
                         Xlatent.set_ydata(X[:, i])
+                    Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
+                    Xlatentgrads.set_UVC(Ulatent, Xg)
+
                     for i, Slatent in enumerate(Slatentplts):
                         Slatent.set_ydata(S[:, i])
+                    Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
+                    Slatentgrads.set_UVC(Ulatent, Sg)
+
                     for i, Zlatent in enumerate(Zplts):
                         Zlatent.set_ydata(Z[:, i])
+                    Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
+                    Zgrads.set_UVC(UZ, Zg)
+
                     for p, t in zip(thetaplts, theta):
                         p.set_height(t)
+                    thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
+                    thetagrads.set_UVC(Utheta, thetag)
+
+                    imkmm.set_data(kmm_dict[self.showing][0])
+                    imkmm.autoscale()
+                    cbarkmm.update_normal(imkmm)
+
+                    imkmmdl.set_data(kmm_dict[self.showing][1])
+                    imkmmdl.autoscale()
+                    cbarkmmdl.update_normal(imkmmdl)
 
                     ax2.relim()
                     ax3.relim()
@@ -305,8 +434,20 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                     ax3.autoscale()
                     ax4.autoscale()
                     ax5.autoscale()
-                    fig.canvas.draw()
 
-        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+                    [fig.canvas.draw() for fig in figs]
+            button_options[0] = 0
+            button_options[1] = 0
 
-        return ax1, ax2, ax3, ax4, ax5
+        def onclick(event):
+            if event.inaxes is ax1 and event.button == 1:
+                button_options[0] = 1
+        def motion(event):
+            if button_options[0]:
+                button_options[1] = 1
+
+        cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
+        cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
+        cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
+
+        return ax1, ax2, ax3, ax4, ax5, ax6, ax7
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 932690ec..0e0929c7 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -4,14 +4,14 @@ import numpy as np
 import GPy
 import scipy.sparse
 import scipy.io
-data_path = os.path.join(os.path.dirname(__file__),'datasets')
-default_seed =10000
+data_path = os.path.join(os.path.dirname(__file__), 'datasets')
+default_seed = 10000
 
 # Some general utilities.
 def sample_class(f):
-    p = 1./(1.+np.exp(-f))
-    c = np.random.binomial(1,p)
-    c = np.where(c,1,-1)
+    p = 1. / (1. + np.exp(-f))
+    c = np.random.binomial(1, p)
+    c = np.where(c, 1, -1)
     return c
 
 def della_gatta_TRP63_gene_expression(gene_number=None):
@@ -25,6 +25,15 @@ def della_gatta_TRP63_gene_expression(gene_number=None):
             Y = Y[:, None]
     return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
 
+def simulation_BGPLVM():
+    mat_data = scipy.io.loadmat(os.path.join(data_path, 'BGPLVMSimulation.mat'))
+    Y = np.array(mat_data['Y'], dtype=float)
+    S = np.array(mat_data['initS'], dtype=float)
+    mu = np.array(mat_data['initMu'], dtype=float)
+    return {'Y': Y, 'S': S,
+            'mu' : mu,
+            'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
+
 
 # The data sets
 def oil():
@@ -32,7 +41,7 @@ def oil():
     X = np.fromfile(fid, sep='\t').reshape((-1, 12))
     fid.close()
     fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
-    Y = np.fromfile(fid, sep='\t').reshape((-1, 3))*2.-1.
+    Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
     fid.close()
     return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
 
@@ -74,9 +83,9 @@ def silhouette():
     inMean = np.mean(mat_data['Y'])
     inScales = np.sqrt(np.var(mat_data['Y']))
     X = mat_data['Y'] - inMean
-    X = X/inScales
+    X = X / inScales
     Xtest = mat_data['Y_test'] - inMean
-    Xtest = Xtest/inScales
+    Xtest = Xtest / inScales
     Y = mat_data['Z']
     Ytest = mat_data['Z_test']
     return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
@@ -102,13 +111,13 @@ def toy_rbf_1d(seed=default_seed):
     np.random.seed(seed=seed)
     numIn = 1
     N = 500
-    X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
+    X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn))
     X.sort(axis=0)
     rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
     white = GPy.kern.white(numIn, variance=1e-2)
     kernel = rbf + white
     K = kernel.K(X)
-    y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N,1))
+    y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1))
     return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
 
 def toy_rbf_1d_50(seed=default_seed):
@@ -124,15 +133,15 @@ def toy_rbf_1d_50(seed=default_seed):
 
 def toy_linear_1d_classification(seed=default_seed):
     np.random.seed(seed=seed)
-    x1 = np.random.normal(-3,5,20)
-    x2 = np.random.normal(3,5,20)
-    X = (np.r_[x1,x2])[:,None]
+    x1 = np.random.normal(-3, 5, 20)
+    x2 = np.random.normal(3, 5, 20)
+    X = (np.r_[x1, x2])[:, None]
     return {'X': X, 'Y':  sample_class(2.*X), 'F': 2.*X}
 
 def rogers_girolami_olympics():
     olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
     X = olympic_data[:, 0][:, None]
-    Y= olympic_data[:, 1][:, None]
+    Y = olympic_data[:, 1][:, None]
     return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
 # def movielens_small(partNo=1,seed=default_seed):
 #     np.random.seed(seed=seed)
@@ -169,7 +178,7 @@ def rogers_girolami_olympics():
 
 
 
-def crescent_data(num_data=200,seed=default_seed):
+def crescent_data(num_data=200, seed=default_seed):
     """Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
     :param num_data_part: number of data to be sampled (default is 200).
     :type num_data: int
@@ -178,7 +187,7 @@ def crescent_data(num_data=200,seed=default_seed):
     np.random.seed(seed=seed)
     sqrt2 = np.sqrt(2)
     # Rotation matrix
-    R = np.array([[sqrt2/2, -sqrt2/2], [sqrt2/2, sqrt2/2]])
+    R = np.array([[sqrt2 / 2, -sqrt2 / 2], [sqrt2 / 2, sqrt2 / 2]])
     # Scaling matrices
     scales = []
     scales.append(np.array([[3, 0], [0, 1]]))
@@ -195,9 +204,9 @@ def crescent_data(num_data=200,seed=default_seed):
     num_data_part = []
     num_data_total = 0
     for i in range(0, 4):
-        num_data_part.append(round(((i+1)*num_data)/4.))
+        num_data_part.append(round(((i + 1) * num_data) / 4.))
         num_data_part[i] -= num_data_total
-        #print num_data_part[i]
+        # print num_data_part[i]
         part = np.random.normal(size=(num_data_part[i], 2))
         part = np.dot(np.dot(part, scales[i]), R) + means[i]
         Xparts.append(part)
@@ -205,7 +214,7 @@ def crescent_data(num_data=200,seed=default_seed):
     X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
 
 
-    Y = np.vstack((np.ones((num_data_part[0]+num_data_part[1], 1)), -np.ones((num_data_part[2]+num_data_part[3], 1))))
+    Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
     return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
 
 
@@ -214,6 +223,6 @@ def creep_data():
     y = all_data[:, 1:2].copy()
     features = [0]
     features.extend(range(2, 31))
-    X = all_data[:,features].copy()
+    X = all_data[:, features].copy()
     return {'X': X, 'y' : y}