diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 03b041f1..97601b0f 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -176,13 +176,12 @@ def bgplvm_simulation_matlab_compare():
     Y = sim_data['Y']
     S = sim_data['S']
     mu = sim_data['mu']
-    M, [_, Q] = 30, mu.shape
-    Q = 2
+    M, [_, Q] = 20, mu.shape
 
     from GPy.models import mrd
     from GPy import kern
     reload(mrd); reload(kern)
-    #k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    # k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
 #                        X=mu,
@@ -191,14 +190,11 @@ def bgplvm_simulation_matlab_compare():
     m.ensure_default_constraints()
     m.auto_scale_factor = True
     m['noise'] = Y.var() / 100.
+    m['linear_variance'] = .01
 
-    lscstr = '{}'.format(k.parts[0].name)
+#     lscstr = 'X_variance'
 #     m[lscstr] = .01
-    m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10)
-
-    lscstr = 'X_variance'
-#     m[lscstr] = .01
-    m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
+#     m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
 
 #     cstr = 'white'
 #     m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
diff --git a/GPy/inference/conjugate_gradient_descent.py b/GPy/inference/conjugate_gradient_descent.py
index 93dac6df..c9981fe8 100644
--- a/GPy/inference/conjugate_gradient_descent.py
+++ b/GPy/inference/conjugate_gradient_descent.py
@@ -63,14 +63,15 @@ class _Async_Optimization(Thread):
         return f_w
 
     def callback(self, *a):
-        self.outq.put(a)
+        if self.outq is not None:
+            self.outq.put(a)
 #         self.parent and self.parent.callback(*a, **kw)
         pass
         # print "callback done"
 
     def callback_return(self, *a):
         self.callback(*a)
-        self.outq.put(self.SENTINEL)
+        self.callback(self.SENTINEL)
         self.runsignal.clear()
 
     def run(self, *args, **kwargs):
@@ -166,25 +167,27 @@ class Async_Optimize(object):
             except Empty:
                 pass
 
-    def fmin_async(self, f, df, x0, callback, update_rule=FletcherReeves,
+    def opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
                    messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
                    report_every=10, *args, **kwargs):
         self.runsignal.set()
-        outqueue = Queue()
+        c = None
+        outqueue = None
         if callback:
+            outqueue = Queue()
             self.callback = callback
-        c = Thread(target=self.async_callback_collect, args=(outqueue,))
-        c.start()
+            c = Thread(target=self.async_callback_collect, args=(outqueue,))
+            c.start()
         p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
                  report_every=report_every, messages=messages, maxiter=maxiter,
                  max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
-        p.run()
+        p.start()
         return p, c
 
-    def fmin(self, f, df, x0, callback=None, update_rule=FletcherReeves,
+    def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
                    messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
                    report_every=10, *args, **kwargs):
-        p, c = self.fmin_async(f, df, x0, callback, update_rule, messages,
+        p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
                             maxiter, max_f_eval, gtol,
                             report_every, *args, **kwargs)
         while self.runsignal.is_set():
@@ -195,7 +198,8 @@ class Async_Optimize(object):
                 # print "^C"
                 self.runsignal.clear()
                 p.join()
-        if c.is_alive():
+                c.join()
+        if c and c.is_alive():
             print "WARNING: callback still running, optimisation done!"
         return p.result
 
@@ -208,11 +212,11 @@ class CGD(Async_Optimize):
     if df returns tuple (grad, natgrad) it will optimize according 
     to natural gradient rules
     '''
-    name = "Conjugate Gradient Descent"
+    opt_name = "Conjugate Gradient Descent"
 
-    def fmin_async(self, *a, **kw):
+    def opt_async(self, *a, **kw):
         """
-        fmin_async(self, f, df, x0, callback, update_rule=FletcherReeves,
+        opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
                messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
                report_every=10, *args, **kwargs)
         
@@ -240,11 +244,11 @@ class CGD(Async_Optimize):
         
         at end of optimization!
         """
-        return super(CGD, self).fmin_async(*a, **kw)
+        return super(CGD, self).opt_async(*a, **kw)
 
-    def fmin(self, *a, **kw):
+    def opt(self, *a, **kw):
         """
-        fmin(self, f, df, x0, callback=None, update_rule=FletcherReeves,
+        opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
                messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
                report_every=10, *args, **kwargs)
         
@@ -267,5 +271,5 @@ class CGD(Async_Optimize):
         
         at end of optimization
         """
-        return super(CGD, self).fmin(*a, **kw)
+        return super(CGD, self).opt(*a, **kw)
 
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index b824bdfe..fc7e4ba9 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -96,9 +96,9 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             print "\rWARNING: Caught LinAlgError, continueing without setting            "
             if self._debug:
                 self._savederrors.append(self.f_call)
-#             if save_count > 10:
-#                 raise
-#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
+            if save_count > 10:
+                raise
+            self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
 
     def dKL_dmuS(self):
         dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@@ -259,28 +259,28 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
                  ha='center', va='center')
         figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs[-1].tight_layout(rect=(0, 0, 1, .86))
 #         ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
         figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
         ax3 = self._debug_get_axis(figs)
         ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
                  ha='center', va='center')
         figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs[-1].tight_layout(rect=(0, 0, 1, .86))
 #         ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
         figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
         ax4 = self._debug_get_axis(figs)
         ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
                  ha='center', va='center')
         figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs[-1].tight_layout(rect=(0, 0, 1, .86))
 #         ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
         figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
         ax5 = self._debug_get_axis(figs)
         ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
                  ha='center', va='center')
         figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs[-1].tight_layout(rect=(.15, 0, 1, .86))
         figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
         fig = figs[-1]
         ax6 = fig.add_subplot(121)
@@ -345,16 +345,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #                    loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
 #                    borderaxespad=0, mode="expand")
         ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
         ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
         ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
         ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
         Lleg = ax1.legend()
         Lleg.draggable()
diff --git a/GPy/testing/cgd_tests.py b/GPy/testing/cgd_tests.py
index ecd6f829..79e5e08b 100644
--- a/GPy/testing/cgd_tests.py
+++ b/GPy/testing/cgd_tests.py
@@ -14,7 +14,7 @@ from scipy.optimize.optimize import rosen, rosen_der
 class Test(unittest.TestCase):
 
     def testMinimizeSquare(self):
-        N = 2
+        N = 100
         A = numpy.random.rand(N) * numpy.eye(N)
         b = numpy.random.rand(N) * 0
         f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
@@ -25,8 +25,8 @@ class Test(unittest.TestCase):
         restarts = 10
         for _ in range(restarts):
             try:
-                x0 = numpy.random.randn(N) * .5
-                res = opt.fmin(f, df, x0, messages=0,
+                x0 = numpy.random.randn(N) * 300
+                res = opt.opt(f, df, x0, messages=0,
                                maxiter=1000, gtol=1e-10)
                 assert numpy.allclose(res[0], 0, atol=1e-3)
                 break
@@ -37,10 +37,9 @@ class Test(unittest.TestCase):
             raise AssertionError("Test failed for {} restarts".format(restarts))
 
     def testRosen(self):
-        N = 2
+        N = 20
         f = rosen
         df = rosen_der
-        x0 = numpy.random.randn(N) * .5
 
         opt = CGD()
 
@@ -48,9 +47,9 @@ class Test(unittest.TestCase):
         for _ in range(restarts):
             try:
                 x0 = numpy.random.randn(N) * .5
-                res = opt.fmin(f, df, x0, messages=0,
-                               maxiter=1000, gtol=1e-2)
-                assert numpy.allclose(res[0], 1, atol=.01)
+                res = opt.opt(f, df, x0, messages=0,
+                               maxiter=5e2, gtol=1e-2)
+                assert numpy.allclose(res[0], 1, atol=.1)
                 break
             except:
                 # RESTART
@@ -103,7 +102,7 @@ if __name__ == "__main__":
         if r[-1] != RUNNING:
             res[0] = r
 
-    p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
+    p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
                    report_every=20, gtol=1e-12)