diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index b3048f5a..279bc597 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -1,6 +1,7 @@
 import GPy
 import numpy as np
 import matplotlib.pyplot as plt
+np.random.seed(1)
 
 def timing():
     real_var = 0.1
@@ -86,17 +87,67 @@ def v_fail_test():
 
 def student_t_f_check():
     plt.close('all')
-    real_std = 0.1
-    X = np.random.rand(100)[:, None]
+    X = np.linspace(0, 1, 50)[:, None]
+    real_std = 0.001
+    noise = np.random.randn(*X.shape)*real_std
+    Y = np.sin(X*2*np.pi) + noise
+    deg_free = 1000
+
+    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
+    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp.ensure_default_constraints()
+    mgp.randomize()
+    mgp.optimize()
+    print mgp
+    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+
+    kernelst = kernelgp.copy()
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=1e-5)
+    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
+    m = GPy.models.GP(X, stu_t_likelihood, kernelst)
+    m['rbf_v'] = mgp._get_params()[0]
+    m['rbf_l'] = mgp._get_params()[1] + 1
+    m.ensure_default_constraints()
+    m.constrain_positive('t_no')
+    print m
+    plt.figure()
+    plt.subplot(511)
+    m.plot()
+    print m
+    plt.subplot(512)
+    m.optimize(max_f_eval=15)
+    m.plot()
+    print m
+    plt.subplot(513)
+    m.optimize(max_f_eval=15)
+    m.plot()
+    print m
+    plt.subplot(514)
+    m.optimize(max_f_eval=15)
+    m.plot()
+    print m
+    plt.subplot(515)
+    m.optimize()
+    m.plot()
+    print "final optimised student t"
+    print m
+    print "real GP"
+    print mgp
+
+def student_t_fix_optimise_check():
+    plt.close('all')
+    real_var = 0.1
+    real_std = np.sqrt(real_var)
+    X = np.random.rand(200)[:, None]
     noise = np.random.randn(*X.shape)*real_std
     Y = np.sin(X*2*np.pi) + noise
     X_full = X
     Y_full = np.sin(X_full)
     #Y = Y/Y.max()
-    deg_free = 10000
+    deg_free = 1000
 
     #GP
-    kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
+    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
     mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
     mgp.ensure_default_constraints()
     mgp.randomize()
@@ -113,10 +164,12 @@ def student_t_f_check():
     m = GPy.models.GP(X, stu_t_likelihood, kernelst)
     m.constrain_fixed('rbf_var', mgp._get_params()[0])
     m.constrain_fixed('rbf_len', mgp._get_params()[1])
+    m.constrain_positive('t_noise')
+    #m.ensure_default_constraints()
 
     m.update_likelihood_approximation()
     print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood())
-    plt.subplot(221)
+    plt.subplot(231)
     m.plot()
     plt.title('Student t original data noise')
 
@@ -125,7 +178,7 @@ def student_t_f_check():
     m['t_noise_std2'] = gp_noise
     m.update_likelihood_approximation()
     print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood())
-    plt.subplot(222)
+    plt.subplot(232)
     m.plot()
     plt.title('Student t GP noise')
 
@@ -134,29 +187,57 @@ def student_t_f_check():
     m['t_noise_std2'] = real_stu_t_std2gp
     m.update_likelihood_approximation()
     print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood())
-    plt.subplot(223)
+    plt.subplot(233)
     m.plot()
     plt.title('Student t GP noise converted')
 
     m.constrain_positive('t_noise_std2')
     m.randomize()
     m.update_likelihood_approximation()
+    plt.subplot(234)
+    m.plot()
+    plt.title('Student t fixed rbf')
     m.optimize()
     print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood())
-    plt.subplot(224)
+    plt.subplot(235)
     m.plot()
-    plt.title('Student t optimised')
+    plt.title('Student t fixed rbf optimised')
 
     plt.figure(2)
+    mrbf = m.copy()
+    mrbf.unconstrain('')
+    mrbf.constrain_fixed('t_noise', m.likelihood.likelihood_function.sigma2)
+    gp_var = mgp._get_params()[0]
+    gp_len = mgp._get_params()[1]
+    mrbf.constrain_fixed('rbf_var', gp_var)
+    mrbf.constrain_positive('rbf_len')
+    mrbf.randomize()
+    print "Before optimize"
+    print mrbf
+    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+    mrbf.checkgrad(verbose=1)
+    plt.subplot(121)
+    mrbf.plot()
+    plt.title('Student t fixed noise')
+    #mrbf.optimize()
+    print "After optimize"
+    print mrbf
+    plt.subplot(122)
+    mrbf.plot()
+    plt.title('Student t fixed noise optimized')
+    print mrbf
+
+    plt.figure(3)
     print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood())
     plt.suptitle('Gaussian likelihood optimised')
     mgp.plot()
     print "Real std: {}".format(real_std)
     print "Real variance {}".format(real_std**2)
 
-    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+    #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
 
-    return m
+    print "Len should be: {}".format(gp_len)
+    return mrbf
 
 def debug_student_t_noise_approx():
     plot = False
diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py
index c5894ed6..5343f5dc 100644
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@@ -89,7 +89,7 @@ class Laplace(likelihood):
         expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
         dL_dthetaK_exp = dK_dthetaK(expl, X)
         dL_dthetaK_imp = dK_dthetaK(impl, X)
-        #print "dL_dthetaK_exp: {}     dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
+        print "dL_dthetaK_exp: {}     dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
         dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
         return dL_dthetaK
 
@@ -290,10 +290,12 @@ class Laplace(likelihood):
         :MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation
         :returns: f_mode
         """
-        if self.old_a is None:
-            old_a = np.zeros((self.N, 1))
-        else:
-            old_a = self.old_a
+        old_a = np.zeros((self.N, 1))
+        #old_a = None
+        #if self.old_a is None:
+            #old_a = np.zeros((self.N, 1))
+        #else:
+            #old_a = self.old_a
 
         f = np.dot(self.K, old_a)
         self.f = f
@@ -308,8 +310,6 @@ class Laplace(likelihood):
         step_size = 1
         rs = 0
         i = 0
-        #if self.likelihood_function.sigma < 0.001:
-            #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
         while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
             W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
             if not self.likelihood_function.log_concave:
@@ -371,8 +371,10 @@ class Laplace(likelihood):
             old_a = self.a #a
             i += 1
 
+        self.old_a = old_a
         #print "Positive difference obj: ", np.float(difference)
-        print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
+        #print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
+        print "Iterations: {}, Final_difference: {}".format(i, difference)
         #self.a = a
         #self.B, self.B_chol, self.W_12 = B, L, W_12
         #self.Bi, _, _, B_det = pdinv(self.B)
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index cd4b7dac..0f56e21c 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -132,7 +132,11 @@ class GP(model):
         model for a new variable Y* = v_tilde/tau_tilde, with a covariance
         matrix K* = K + diag(1./tau_tilde) plus a normalization term.
         """
+        if isinstance(self.likelihood, Laplace):
+            self.likelihood.fit_full(self.kern.K(self.X))
+            self.likelihood._set_params(self.likelihood._get_params())
         l = -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
+        print "K_ldet: {} mft: {} Z: {}".format(self.K_logdet, self._model_fit_term(), self.likelihood.Z)
         return l
 
     def _log_likelihood_gradients(self):
@@ -142,12 +146,12 @@ class GP(model):
         Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
         """
         dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X)
-        #print "dL_dthetaK should be: ", dL_dthetaK
+        print "dL_dthetaK should be: ", dL_dthetaK
         if isinstance(self.likelihood, Laplace):
-            #self.likelihood.fit_full(self.kern.K(self.X))
-            #self.likelihood._set_params(self.likelihood._get_params())
+            self.likelihood.fit_full(self.kern.K(self.X))
+            self.likelihood._set_params(self.likelihood._get_params())
             dK_dthetaK = self.kern.dK_dtheta
-            dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X)
+            dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X.copy())
             dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
         else:
             dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))