diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index ea3a9f8e..2f163583 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -193,6 +193,8 @@ def gaussian_f_check():
 def boston_example():
     import sklearn
     from sklearn.cross_validation import KFold
+    optimizer='bfgs'
+    messages=0
     data = datasets.boston_housing()
     X = data['X'].copy()
     Y = data['Y'].copy()
@@ -200,9 +202,9 @@ def boston_example():
     X = X/X.std(axis=0)
     Y = Y-Y.mean()
     Y = Y/Y.std()
-    num_folds = 10
+    num_folds = 30
     kf = KFold(len(Y), n_folds=num_folds, indices=True)
-    score_folds = np.zeros((6, num_folds))
+    score_folds = np.zeros((7, num_folds))
     def rmse(Y, Ystar):
         return np.sqrt(np.mean((Y-Ystar)**2))
     for n, (train, test) in enumerate(kf):
@@ -212,18 +214,19 @@ def boston_example():
         noise = 1e-1 #np.exp(-2)
         rbf_len = 0.5
         data_axis_plot = 4
-        plot = True
+        plot = False
+        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
+        kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
 
         #Gaussian GP
         print "Gauss GP"
-        kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp)
+        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
         mgp.ensure_default_constraints()
         mgp.constrain_fixed('white', 1e-5)
         mgp['rbf_len'] = rbf_len
         mgp['noise'] = noise
         print mgp
-        mgp.optimize(messages=1)
+        mgp.optimize(optimizer=optimizer,messages=messages)
         Y_test_pred = mgp.predict(X_test)
         score_folds[0, n] = rmse(Y_test, Y_test_pred[0])
         print mgp
@@ -235,11 +238,10 @@ def boston_example():
             plt.title('GP gauss')
 
         print "Gaussian Laplace GP"
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         N, D = Y_train.shape
         g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
         g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
-        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
+        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
         mg.ensure_default_constraints()
         mg.constrain_positive('noise_variance')
         mg.constrain_fixed('white', 1e-5)
@@ -247,7 +249,7 @@ def boston_example():
         mg['noise'] = noise
         print mg
         try:
-            mg.optimize(messages=1)
+            mg.optimize(optimizer=optimizer, messages=messages)
         except Exception:
             print "Blew up"
         Y_test_pred = mg.predict(X_test)
@@ -263,10 +265,9 @@ def boston_example():
         #Student T
         deg_free = 1
         print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
         stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
         mstu_t.ensure_default_constraints()
         mstu_t.constrain_fixed('white', 1e-5)
         mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@@ -274,7 +275,7 @@ def boston_example():
         mstu_t['t_noise'] = noise
         print mstu_t
         try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
         except Exception:
             print "Blew up"
         Y_test_pred = mstu_t.predict(X_test)
@@ -287,12 +288,11 @@ def boston_example():
             plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
             plt.title('Stu t {}df'.format(deg_free))
 
-        deg_free = 2
+        deg_free = 8
         print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
         stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
         mstu_t.ensure_default_constraints()
         mstu_t.constrain_fixed('white', 1e-5)
         mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@@ -300,7 +300,7 @@ def boston_example():
         mstu_t['t_noise'] = noise
         print mstu_t
         try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
         except Exception:
             print "Blew up"
         Y_test_pred = mstu_t.predict(X_test)
@@ -316,10 +316,9 @@ def boston_example():
         #Student t likelihood
         deg_free = 3
         print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
         stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
         mstu_t.ensure_default_constraints()
         mstu_t.constrain_fixed('white', 1e-5)
         mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@@ -327,7 +326,7 @@ def boston_example():
         mstu_t['t_noise'] = noise
         print mstu_t
         try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
         except Exception:
             print "Blew up"
         Y_test_pred = mstu_t.predict(X_test)
@@ -342,10 +341,9 @@ def boston_example():
 
         deg_free = 5
         print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
         stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
         mstu_t.ensure_default_constraints()
         mstu_t.constrain_fixed('white', 1e-5)
         mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@@ -353,7 +351,7 @@ def boston_example():
         mstu_t['t_noise'] = noise
         print mstu_t
         try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
         except Exception:
             print "Blew up"
         Y_test_pred = mstu_t.predict(X_test)
@@ -366,9 +364,10 @@ def boston_example():
             plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
             plt.title('Stu t {}df'.format(deg_free))
 
+        score_folds[6, n] = rmse(Y_test, np.mean(Y_train))
 
 
-
+    print "Average scores: {}".format(np.mean(score_folds, 1))
     import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
     return score_folds
 
diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index e6ffd78c..05b4ff02 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -301,9 +301,9 @@ class Laplace(likelihood):
             return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
 
         difference = np.inf
-        epsilon = 1e-6
-        step_size = 1
-        rs = 0
+        epsilon = 1e-5
+        #step_size = 1
+        #rs = 0
         i = 0
 
         while difference > epsilon and i < MAX_ITER:
@@ -330,7 +330,9 @@ class Laplace(likelihood):
 
             i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
             #Find the stepsize that minimizes the objective function using a brent line search
-            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun
+            #The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
+            #steps than get this exact then make a step, if B was bigger it might be the other way around though
+            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
             f = self.f.copy()
             Ki_f = self.Ki_f.copy()
 
diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py
index 9a3dfd16..fff5dcac 100644
--- a/GPy/testing/likelihoods_tests.py
+++ b/GPy/testing/likelihoods_tests.py
@@ -30,9 +30,9 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
     checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
     However if we are holding other parameters fixed and moving something else
     We need to check the gradient of each of the fixed parameters
-    (f and y for example) seperately.
-    Whilst moving another parameter. otherwise f: gives back R^N and
-    df: gives back R^NxM where M is
+    (f and y for example) seperately,  whilst moving another parameter.
+    Otherwise f: gives back R^N and
+              df: gives back R^NxM where M is
     The number of parameters and N is the number of data
     Need to take a slice out from f and a slice out of df
     """
@@ -48,6 +48,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
             #dlik and dlik_dvar gives back 1 value for each
             f_ind = min(fnum, fixed_val+1) - 1
             print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
+            #Make grad checker with this param moving, note that set_params is NOT being called
+            #The parameter is being set directly with __setattr__
             grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
                                    lambda x : np.atleast_1d(partial_df(x))[fixed_val],
                                    param, 'p')
@@ -57,8 +59,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
                     constraint('p', grad)
             if randomize:
                 grad.randomize()
-            print grad
             if verbose:
+                print grad
                 grad.checkgrad(verbose=1)
             if not grad.checkgrad():
                 gradchecking = False
@@ -122,6 +124,7 @@ class TestNoiseModels(object):
                     "constrain": [constraint_wrappers, listed_here]
                     },
                 "laplace": boolean_of_whether_model_should_work_for_laplace,
+                "ep": boolean_of_whether_model_should_work_for_laplace,
                 "link_f_constraints": [constraint_wrappers, listed_here]
                 }
         """
@@ -177,7 +180,8 @@ class TestNoiseModels(object):
                                 "vals": [self.var],
                                 "constraints": [constrain_positive]
                                 },
-                            "laplace": True
+                            "laplace": True,
+                            "ep": True
                             },
                         "Gaussian_log": {
                             "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
@@ -211,6 +215,7 @@ class TestNoiseModels(object):
                             "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
                             "laplace": True,
                             "Y": self.binary_Y,
+                            "ep": True
                         }
                     }
 
@@ -238,7 +243,14 @@ class TestNoiseModels(object):
                 f = attributes["f"].copy()
             else:
                 f = self.f.copy()
-            laplace = attributes["laplace"]
+            if "laplace" in attributes:
+                laplace = attributes["laplace"]
+            else:
+                laplace = False
+            if "ep" in attributes:
+                ep = attributes["ep"]
+            else:
+                ep = False
 
             if len(param_vals) > 1:
                 raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@@ -266,6 +278,10 @@ class TestNoiseModels(object):
 
                 #laplace likelihood gradcheck
                 yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+            if ep:
+                #ep likelihood gradcheck
+                yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+
 
         self.tearDown()
 
@@ -321,7 +337,6 @@ class TestNoiseModels(object):
     def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
-        print param_constraints
         assert (
                 dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
                     params, args=(f, Y), constraints=param_constraints,
@@ -332,7 +347,6 @@ class TestNoiseModels(object):
     def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
-        print param_constraints
         assert (
                 dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
                     params, args=(f, Y), constraints=param_constraints,
@@ -343,7 +357,6 @@ class TestNoiseModels(object):
     def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
-        #print param_constraints
         assert (
                 dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
                     params, args=(f, Y), constraints=param_constraints,
@@ -459,6 +472,31 @@ class TestNoiseModels(object):
         print m
         assert m.checkgrad(step=step)
 
+    ###########
+    # EP test #
+    ###########
+    @with_setup(setUp, tearDown)
+    def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
+        print "\n{}".format(inspect.stack()[0][3])
+        #Normalize
+        Y = Y/Y.max()
+        white_var = 0.001
+        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
+        ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
+        m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
+        m.ensure_default_constraints()
+        m.constrain_fixed('white', white_var)
+
+        for param_num in range(len(param_names)):
+            name = param_names[param_num]
+            m[name] = param_vals[param_num]
+            constraints[param_num](name, m)
+
+        m.randomize()
+        m.checkgrad(verbose=1, step=step)
+        print m
+        assert m.checkgrad(step=step)
+
 
 class LaplaceTests(unittest.TestCase):
     """