diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index f9aaddd1..8637cc35 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
     Y = data['Y'][:, 0:1]
     Y[Y.flatten() == -1] = 0
 
-    bern_noise_model = GPy.likelihoods.bernoulli()
-    laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
+    likelihood = GPy.likelihoods.Bernoulli()
+    laplace_inf = GPy.inference.latent_function_inference.Laplace()
+    kernel = GPy.kern.rbf(1)
 
     # Model definition
-    m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
-    print m
+    m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
 
     # Optimize
     if optimize:
         #m.update_likelihood_approximation()
         # Parameters optimization:
-        m.optimize('bfgs', messages=1)
+        try:
+            m.optimize('scg', messages=1)
+        except Exception as e:
+            return m
+
         #m.pseudo_EM()
 
     # Plot
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index c89f771b..b114b8b9 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -9,12 +9,12 @@ prior over a finite set of points f. This prior is
 where K is the kernel matrix.
 
 We also have a likelihood (see GPy.likelihoods) which defines how the data are
-related to the latent function: p(y | f).  If the likelihood is also a Gaussian, 
-the inference over f is tractable (see exact_gaussian_inference.py). 
+related to the latent function: p(y | f).  If the likelihood is also a Gaussian,
+the inference over f is tractable (see exact_gaussian_inference.py).
 
 If the likelihood object is something other than Gaussian, then exact inference
 is not tractable. We then resort to a Laplace approximation (laplace.py) or
-expectation propagation (ep.py). 
+expectation propagation (ep.py).
 
 The inference methods return a "Posterior" instance, which is a simple
 structure which contains a summary of the posterior. The model classes can then
@@ -24,7 +24,7 @@ etc.
 """
 
 from exact_gaussian_inference import ExactGaussianInference
-from laplace import LaplaceInference
+from laplace import Laplace
 expectation_propagation = 'foo' # TODO
 from dtc import DTC
 from fitc import FITC
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 4edb9a1d..1b7bdad8 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -17,7 +17,7 @@ from posterior import Posterior
 import warnings
 from scipy import optimize
 
-class LaplaceInference(object):
+class Laplace(object):
 
     def __init__(self):
         """
@@ -52,6 +52,7 @@ class LaplaceInference(object):
 
         f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
 
+        self.f_hat = f_hat
         #Compute hessian and other variables at mode
         log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
 
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index 00626cd3..388ce173 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -116,7 +116,8 @@ class Bernoulli(Likelihood):
             Each y_i must be in {0, 1}
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        objective = (link_f**y) * ((1.-link_f)**(1.-y))
+        #objective = (link_f**y) * ((1.-link_f)**(1.-y))
+        objective = np.where(y, link_f, 1.-link_f)
         return np.exp(np.sum(np.log(objective)))
 
     def logpdf_link(self, link_f, y, extra_data=None):
@@ -136,7 +137,9 @@ class Bernoulli(Likelihood):
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         #objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
+        state = np.seterr(divide='ignore')
         objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
+        np.seterr(**state)
         return np.sum(objective)
 
     def dlogpdf_dlink(self, link_f, y, extra_data=None):
@@ -155,7 +158,10 @@ class Bernoulli(Likelihood):
         :rtype: Nx1 array
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        grad = (y/link_f) - (1.-y)/(1-link_f)
+        #grad = (y/link_f) - (1.-y)/(1-link_f)
+        state = np.seterr(divide='ignore')
+        grad = np.where(y, 1./link_f, -1./(1-link_f))
+        np.seterr(**state)
         return grad
 
     def d2logpdf_dlink2(self, link_f, y, extra_data=None):
@@ -180,7 +186,10 @@ class Bernoulli(Likelihood):
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
+        #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
+        state = np.seterr(divide='ignore')
+        d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
+        np.seterr(**state)
         return d2logpdf_dlink2
 
     def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@@ -199,7 +208,10 @@ class Bernoulli(Likelihood):
         :rtype: Nx1 array
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
+        #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
+        state = np.seterr(divide='ignore')
+        d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
+        np.seterr(**state)
         return d3logpdf_dlink3
 
     def samples(self, gp):
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index 9920d648..458831a0 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -516,9 +516,8 @@ class TestNoiseModels(object):
         Y = Y/Y.max()
         white_var = 1e-6
         kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-        laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
         m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
-        m.ensure_default_constraints()
         m['white'].constrain_fixed(white_var)
 
         #Set constraints
@@ -555,7 +554,6 @@ class TestNoiseModels(object):
         kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
         ep_inf = GPy.inference.latent_function_inference.EP()
         m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
-        m.ensure_default_constraints()
         m['white'].constrain_fixed(white_var)
 
         for param_num in range(len(param_names)):
@@ -644,13 +642,11 @@ class LaplaceTests(unittest.TestCase):
         m1['variance'] = initial_var_guess
         m1['variance'].constrain_bounded(1e-4, 10)
         m1['rbf'].constrain_bounded(1e-4, 10)
-        m1.ensure_default_constraints()
         m1.randomize()
 
         gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
-        laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_inf = GPy.inference.latent_function_inference.Laplace()
         m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
-        m2.ensure_default_constraints()
         m2['white'].constrain_fixed(1e-6)
         m2['rbf'].constrain_bounded(1e-4, 10)
         m2['variance'].constrain_bounded(1e-4, 10)