diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index e8af74eb..3e24c89f 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -199,7 +199,7 @@ def student_t_fix_optimise_check():
 
     #GP
     kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
+    mgp = GPy.models.GPRegression(X, Y.copy(), kernel=kernelgp)
     mgp.ensure_default_constraints()
     mgp.randomize()
     mgp.optimize()
@@ -212,7 +212,7 @@ def student_t_fix_optimise_check():
 
     plt.figure(1)
     plt.suptitle('Student likelihood')
-    m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
+    m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood)
     m.constrain_fixed('rbf_var', mgp._get_params()[0])
     m.constrain_fixed('rbf_len', mgp._get_params()[1])
     m.constrain_positive('t_noise')
@@ -406,27 +406,29 @@ def student_t_approx():
     """
     real_std = 0.1
     #Start a function, any function
-    X = np.linspace(0.0, 10.0, 100)[:, None]
+    X = np.linspace(0.0, np.pi*2, 100)[:, None]
     Y = np.sin(X) + np.random.randn(*X.shape)*real_std
     Yc = Y.copy()
 
-    X_full = np.linspace(0.0, 10.0, 500)[:, None]
+    X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
     Y_full = np.sin(X_full)
 
     Y = Y/Y.max()
 
-    Yc[10] += 100
-    Yc[25] += 10
-    Yc[23] += 10
-    Yc[26] += 1000
-    Yc[24] += 10
+    Yc[75:80] += 1
+
+    #Yc[10] += 100
+    #Yc[25] += 10
+    #Yc[23] += 10
+    #Yc[26] += 1000
+    #Yc[24] += 10
     #Yc = Yc/Yc.max()
 
     #Add student t random noise to datapoints
     deg_free = 5
     print "Real noise: ", real_std
 
-    initial_var_guess = 0.1
+    initial_var_guess = 0.5
     #t_rv = t(deg_free, loc=0, scale=real_var)
     #noise = t_rvrvs(size=Y.shape)
     #Y += noise
@@ -650,16 +652,78 @@ def gaussian_f_check():
     import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
 
 def boston_example():
+    import sklearn
+    from sklearn.cross_validation import KFold
     data = datasets.boston_housing()
     X = data['X'].copy()
     Y = data['Y'].copy()
-    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
-    mgp.ensure_default_constraints()
-    mgp.randomize()
-    mgp.optimize()
-    mgp.plot()
-    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+    Y = Y-Y.mean()
+    Y = Y/Y.std()
+    num_folds = 2
+    kf = KFold(len(Y), n_folds=num_folds, indices=True)
+    score_folds = np.zeros((3, num_folds))
+    def rmse(Y, Ystar):
+        return np.sqrt(np.mean((Y-Ystar)**2))
+    #for train, test in kf:
+    for n, (train, test) in enumerate(kf):
+        X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
+        print "Fold {}".format(n)
+
+        noise = np.exp(-2)
+
+        #Gaussian GP
+        print "Gauss GP"
+        kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
+        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp)
+        mgp.ensure_default_constraints()
+        mgp['noise'] = noise
+        mgp.optimize(messages=1)
+        Y_test_pred = mgp.predict(X_test)
+        score_folds[0, n] = rmse(Y_test, Y_test_pred[0])
+        plt.figure()
+        plt.scatter(X_test[:, 0], Y_test_pred[0])
+        plt.scatter(X_test[:, 0], Y_test, c='r', marker='x')
+        print score_folds
+        plt.title('GP gauss')
+
+        print "Gaussian Laplace GP"
+        sigma2_start = 1
+        kernelstu = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1], variance=0.01)
+        N, D = Y_train.shape
+        g_distribution = GPy.likelihoods.functions.Gaussian(variance=noise, N=N, D=D)
+        g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution, opt='rasm')
+        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
+        mg.ensure_default_constraints()
+        mg.constrain_positive('noise_variance')
+        mg.optimize(messages=1)
+        Y_test_pred = mg.predict(X_test)
+        score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
+        print score_folds
+        plt.figure()
+        plt.scatter(X_test[:, 0], Y_test_pred[0])
+        plt.scatter(X_test[:, 0], Y_test, c='r', marker='x')
+        plt.title('Lap gauss')
+
+        #Student t likelihood
+        print "Student-T GP"
+        deg_free = 5
+        kernelstu = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1], variance=0.01)
+        t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=noise)
+        stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm')
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t.ensure_default_constraints()
+        #mstu_t.constrain_positive('t_noise')
+        mstu_t.constrain_bounded('t_noise', 0.01, 1000)
+        mstu_t.optimize(messages=1)
+        Y_test_pred = mstu_t.predict(X_test)
+        score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
+        print score_folds
+        plt.figure()
+        plt.scatter(X_test[:, 0], Y_test_pred[0])
+        plt.scatter(X_test[:, 0], Y_test, c='r', marker='x')
+        plt.title('Stu t')
+        import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+
 
 def plot_f_approx(model):
     plt.figure()
diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index f8569c52..5c9362ab 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -291,7 +291,7 @@ class Laplace(likelihood):
         f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False)
         return f_hat[:, None]
 
-    def rasm_mode(self, K, MAX_ITER=200, MAX_RESTART=10):
+    def rasm_mode(self, K, MAX_ITER=100, MAX_RESTART=10):
         """
         Rasmussen's numerically stable mode finding
         For nomenclature see Rasmussen & Williams 2006
@@ -320,7 +320,7 @@ class Laplace(likelihood):
             return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data)
 
         difference = np.inf
-        epsilon = 1e-10
+        epsilon = 1e-6
         step_size = 1
         rs = 0
         i = 0
@@ -330,7 +330,7 @@ class Laplace(likelihood):
             #W = np.maximum(W, 0)
             if not self.likelihood_function.log_concave:
                 #print "Under 1e-10: {}".format(np.sum(W < 1e-10))
-                W[W < 1e-10] = 1e-10     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+                W[W < 1e-6] = 1e-6     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                     # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                     # To cause the posterior to become less certain than the prior and likelihood,
                                     # This is a property only held by non-log-concave likelihoods
@@ -355,7 +355,7 @@ class Laplace(likelihood):
 
             i_o = partial(inner_obj, old_a=old_a, da=da, K=K)
             #new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=20)
-            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-6, options={'maxiter':20, 'disp':True}).fun
+            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':20}).fun
             f = self.f.copy()
             a = self.a.copy()