diff --git a/python/examples/laplace_approximations.py b/python/examples/laplace_approximations.py
index 6f2b19aa..5fb39e08 100644
--- a/python/examples/laplace_approximations.py
+++ b/python/examples/laplace_approximations.py
@@ -11,15 +11,22 @@ def student_t_approx():
     Example of regressing with a student t likelihood
     """
     #Start a function, any function
-    X = np.sort(np.random.uniform(0, 15, 100))[:, None]
-    Y = np.sin(X)
+    X = np.linspace(0.0, 10.0, 100)[:, None]
+    Y = np.sin(X) + np.random.randn(*X.shape)*0.1
+    Yc = Y.copy()
+
+    Y = Y/Y.max()
+
+    Yc[10] += 5
+    Yc[15] += 20
+    Yc = Yc/Yc.max()
 
     #Add student t random noise to datapoints
-    deg_free = 100000.5
-    real_var = 4
-    t_rv = t(deg_free, loc=0, scale=real_var)
-    noise = t_rv.rvs(size=Y.shape)
-    Y += noise
+    deg_free = 1000000 #100000.5
+    real_var = 0.1
+    #t_rv = t(deg_free, loc=0, scale=real_var)
+    #noise = t_rvrvs(size=Y.shape)
+    #Y += noise
 
     #Add some extreme value noise to some of the datapoints
     #percent_corrupted = 0.15
@@ -30,64 +37,83 @@ def student_t_approx():
     #print corrupted_indices
     #noise = t_rv.rvs(size=(len(corrupted_indices), 1))
     #Y[corrupted_indices] += noise
-
+    plt.figure(1)
     # Kernel object
-    print X.shape
-    kernel = GPy.kern.rbf(X.shape[1])
+    kernel1 = GPy.kern.rbf(X.shape[1])
+    kernel2 = kernel1.copy()
+    kernel3 = kernel1.copy()
+    kernel4 = kernel1.copy()
 
-    #A GP should completely break down due to the points as they get a lot of weight
-    # create simple GP model
-    #m = GPy.models.GP_regression(X, Y, kernel=kernel)
-
-    ## optimize
+    #print "Clean Gaussian"
+    ##A GP should completely break down due to the points as they get a lot of weight
+    ## create simple GP model
+    #m = GPy.models.GP_regression(X, Y, kernel=kernel1)
+    ### optimize
     #m.ensure_default_constraints()
+    ##m.unconstrain('noise')
+    ##m.constrain_fixed('noise', 0.1)
     #m.optimize()
     ## plot
-    ##m.plot()
+    #plt.subplot(221)
+    #m.plot()
     #print m
 
-    #with a student t distribution, since it has heavy tails it should work well
-    likelihood_function = student_t(deg_free, sigma=real_var)
-    lap = Laplace(Y, likelihood_function)
-    cov = kernel.K(X)
-    lap.fit_full(cov)
+    ##Corrupt
+    #print "Corrupt Gaussian"
+    #m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
+    #m.ensure_default_constraints()
+    ##m.unconstrain('noise')
+    ##m.constrain_fixed('noise', 0.1)
+    #m.optimize()
+    #plt.subplot(222)
+    #m.plot()
+    #print m
 
-    test_range = np.arange(0, 10, 0.1)
-    plt.plot(test_range, t_rv.pdf(test_range))
-    for i in xrange(X.shape[0]):
-        mode = lap.f_hat[i]
-        covariance = lap.hess_hat_i[i,i]
-        scaling = np.exp(lap.ln_z_hat)
-        normalised_approx = norm(loc=mode, scale=covariance)
-        print "Normal with mode %f, and variance %f" % (mode, covariance)
-        plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
-    plt.show()
-    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+    ##with a student t distribution, since it has heavy tails it should work well
+    ##likelihood_function = student_t(deg_free, sigma=real_var)
+    ##lap = Laplace(Y, likelihood_function)
+    ##cov = kernel.K(X)
+    ##lap.fit_full(cov)
+
+    ##test_range = np.arange(0, 10, 0.1)
+    ##plt.plot(test_range, t_rv.pdf(test_range))
+    ##for i in xrange(X.shape[0]):
+        ##mode = lap.f_hat[i]
+        ##covariance = lap.hess_hat_i[i,i]
+        ##scaling = np.exp(lap.ln_z_hat)
+        ##normalised_approx = norm(loc=mode, scale=covariance)
+        ##print "Normal with mode %f, and variance %f" % (mode, covariance)
+        ##plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
+    ##plt.show()
 
     # Likelihood object
-    t_distribution = student_t(deg_free, sigma=real_var)
+    t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
     stu_t_likelihood = Laplace(Y, t_distribution)
-    kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.bias(X.shape[1])
 
-    m = GPy.models.GP(X, stu_t_likelihood, kernel)
+    print "Clean student t"
+    m = GPy.models.GP(X, stu_t_likelihood, kernel3)
     m.ensure_default_constraints()
-
     m.update_likelihood_approximation()
-    print "NEW MODEL"
-    print(m)
-
     # optimize
-    #m.optimize()
-    #print(m)
-
-    # plot
-    m.plot()
-    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
-
     m.optimize()
     print(m)
+    # plot
+    plt.subplot(211)
+    m.plot_f()
+
+    print "Corrupt student t"
+    t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
+    corrupt_stu_t_likelihood = Laplace(Yc, t_distribution)
+    m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
+    m.ensure_default_constraints()
+    m.update_likelihood_approximation()
+    m.optimize()
+    print(m)
+    plt.subplot(212)
+    m.plot_f()
 
     import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+
     return m
 
 
diff --git a/python/likelihoods/Laplace.py b/python/likelihoods/Laplace.py
index b002034d..d86523d8 100644
--- a/python/likelihoods/Laplace.py
+++ b/python/likelihoods/Laplace.py
@@ -33,13 +33,15 @@ class Laplace(likelihood):
 
         #Inital values
         self.N, self.D = self.data.shape
+        self.is_heteroscedastic = True
+        self.Nparams = 0
 
         self.NORMAL_CONST = -((0.5 * self.N) * np.log(2 * np.pi))
 
         #Initial values for the GP variables
-        self.Y = np.zeros((self.N,1))
+        self.Y = np.zeros((self.N, 1))
         self.covariance_matrix = np.eye(self.N)
-        self.precision = np.ones(self.N)[:,None]
+        self.precision = np.ones(self.N)[:, None]
         self.Z = 0
         self.YYT = None
 
@@ -58,6 +60,7 @@ class Laplace(likelihood):
         pass # TODO: Laplace likelihood might want to take some parameters...
 
     def _gradients(self, partial):
+        #return np.zeros(0) # TODO: Laplace likelihood might want to take some parameters...
         return np.zeros(0) # TODO: Laplace likelihood might want to take some parameters...
         raise NotImplementedError
 
@@ -88,10 +91,8 @@ class Laplace(likelihood):
         self.Sigma_tilde_i = self.W #self.hess_hat_i
         #Check it isn't singular!
         epsilon = 1e-2
-        """
         if np.abs(det(self.Sigma_tilde_i)) < epsilon:
             raise ValueError("inverse covariance must be non-singular to inverse!")
-        """
         #Do we really need to inverse Sigma_tilde_i? :(
         if self.likelihood_function.log_concave:
             (self.Sigma_tilde, _, _, _) = pdinv(self.Sigma_tilde_i)
@@ -99,21 +100,17 @@ class Laplace(likelihood):
             self.Sigma_tilde = inv(self.Sigma_tilde_i)
         #f_hat? should be f but we must have optimized for them I guess?
         Y_tilde = mdot(self.Sigma_tilde, self.hess_hat, self.f_hat)
-        #Z_tilde = (self.ln_z_hat - self.NORMAL_CONST
-                        #- 0.5*mdot(self.f_hat, self.hess_hat, self.f_hat)
-                        #+ 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
-                   #)
         Z_tilde = (self.ln_z_hat - self.NORMAL_CONST
-                   + 0.5*self.log_hess_hat_det
-                   + 0.5*mdot(self.f_hat, self.Ki , self.f_hat)
-                   + 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
+                    + 0.5*mdot(self.f_hat, self.hess_hat, self.f_hat)
+                    + 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
+                    - mdot(Y_tilde.T, (self.Sigma_tilde_i, self.f_hat))
                    )
 
         self.Z = Z_tilde
-        self.Y = Y_tilde
+        self.Y = Y_tilde[:, None]
+        self.YYT = np.dot(self.Y, self.Y.T)
         self.covariance_matrix = self.Sigma_tilde
         self.precision = 1 / np.diag(self.Sigma_tilde)[:, None]
-        self.YYT = np.dot(self.Y, self.Y.T)
         import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
 
     def fit_full(self, K):
@@ -122,6 +119,7 @@ class Laplace(likelihood):
         For nomenclature see Rasmussen & Williams 2006
         :K: Covariance matrix
         """
+        self.K = K.copy()
         f = np.zeros((self.N, 1))
         (self.Ki, _, _, self.log_Kdet) = pdinv(K)
         LOG_K_CONST = -(0.5 * self.log_Kdet)
@@ -148,6 +146,11 @@ class Laplace(likelihood):
 
         #At this point get the hessian matrix
         self.W = -np.diag(self.likelihood_function.link_hess(self.data[:, 0], self.f_hat))
+        if not self.likelihood_function.log_concave:
+            self.W[self.W < 0] = 1e-6 #FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+                                   #If the likelihood is non-log-concave. We wan't to say that there is a negative variance
+                                   #To cause the posterior to become less certain than the prior and likelihood,
+                                   #This is a property only held by non-log-concave likelihoods
         self.hess_hat = self.Ki + self.W
         (self.hess_hat_i, _, _, self.log_hess_hat_det) = pdinv(self.hess_hat)
 
@@ -166,10 +169,10 @@ class Laplace(likelihood):
         #the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
         #z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
         #Unsure whether its log_hess or log_hess_i
-        self.ln_z_hat = (-0.5*self.log_hess_hat_det
-                         - 0.5*self.log_Kdet
-                         -1*self.likelihood_function.link_function(self.data[:,0], self.f_hat)
-                         - mdot(self.f_hat.T, (self.Ki, self.f_hat))
+        self.ln_z_hat = (- 0.5*self.log_hess_hat_det
+                         + 0.5*self.log_Kdet
+                         + self.likelihood_function.link_function(self.data[:,0], self.f_hat)
+                         - 0.5*mdot(self.f_hat.T, (self.Ki, self.f_hat))
                          )
 
         return self._compute_GP_variables()