From ab6a3a571e4ef0aec66776f56921326166f09d40 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 2 Jul 2013 11:14:48 +0100
Subject: [PATCH] Playing trying to find what makes it want to go so low

---
 GPy/core/model.py                       |  2 +-
 GPy/examples/laplace_approximations.py  | 21 ++++++++++++++-------
 GPy/likelihoods/Laplace.py              | 18 +++++++++---------
 GPy/likelihoods/likelihood_functions.py |  4 ++--
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 83a4a428..f97938a4 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -246,7 +246,7 @@ class model(parameterised):
         obj_grads = -LL_gradients - prior_gradients
         print self
         #self.checkgrad(verbose=1)
-        import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+        #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index bb621424..14400a08 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -88,9 +88,12 @@ def debug_student_t_noise_approx():
     plot = False
     real_var = 0.1
     #Start a function, any function
-    X = np.linspace(0.0, 10.0, 50)[:, None]
+    #X = np.linspace(0.0, 10.0, 50)[:, None]
+    X = np.random.rand(100)[:, None]
+    #X = np.random.rand(100)[:, None]
     #X = np.array([0.5, 1])[:, None]
-    Y = np.sin(X) + np.random.randn(*X.shape)*real_var
+    Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var
+    #Y = X + np.random.randn(*X.shape)*real_var
     #ty = np.array([1., 9.97733584, 4.17841363])[:, None]
     #Y = ty
 
@@ -112,7 +115,8 @@ def debug_student_t_noise_approx():
 
     plt.close('all')
     # Kernel object
-    kernel1 = GPy.kern.rbf(X.shape[1])# + GPy.kern.white(X.shape[1])
+    kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
+    #kernel1 = GPy.kern.linear(X.shape[1]) + GPy.kern.white(X.shape[1])
     kernel2 = kernel1.copy()
     kernel3 = kernel1.copy()
     kernel4 = kernel1.copy()
@@ -136,7 +140,7 @@ def debug_student_t_noise_approx():
     #print m
 
     real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
-    edited_real_sd = real_stu_t_std#initial_var_guess #real_sd
+    edited_real_sd = real_stu_t_std + 1#initial_var_guess #real_sd
     #edited_real_sd = real_sd
 
     print "Clean student t, rasm"
@@ -149,13 +153,16 @@ def debug_student_t_noise_approx():
     #m.constrain_fixed('rbf_l', 1.8651)
     #m.constrain_fixed('t_noise_std', edited_real_sd)
     #m.constrain_positive('rbf')
-    m.constrain_positive('t_noise_std')
+    #m.constrain_positive('t_noise_std')
     #m.constrain_positive('')
-    m.ensure_default_constraints()
-    m.constrain_bounded('t_noi', 0.001, 10)
+    #m.constrain_bounded('t_noi', 0.001, 10)
+    #m.constrain_fixed('t_noi', real_stu_t_std)
+    m.constrain_fixed('white', 0.01)
+    #m.constrain_fixed('t_no', 0.01)
     #m['rbf_var'] = 0.20446332
     #m['rbf_leng'] = 0.85776241
     #m['t_noise'] = 0.667083294421005
+    m.ensure_default_constraints()
     m.update_likelihood_approximation()
     #m.optimize(messages=True)
     print(m)
diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py
index 4c9c67df..2ae68613 100644
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@@ -156,15 +156,15 @@ class Laplace(likelihood):
         Y_tilde = Wi*self.Ki_f + self.f_hat
 
         self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R
-        ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K)
-        l = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
+        self.ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K)
+        self.lik = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
 
-        y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
-        Z_tilde = (+ self.NORMAL_CONST
-                   + l
-                   + 0.5*ln_det_K_Wi__Bi
+        self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
+        Z_tilde = (#+ self.NORMAL_CONST
+                   + self.lik
+                   + 0.5*self.ln_det_K_Wi__Bi
                    - 0.5*self.f_Ki_f
-                   + 0.5*y_Wi_Ki_i_y
+                   + 0.5*self.y_Wi_Ki_i_y
                   )
         #print "Ztilde: {}".format(Z_tilde)
 
@@ -198,7 +198,7 @@ class Laplace(likelihood):
         self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data)
 
         if not self.likelihood_function.log_concave:
-            self.W[self.W < 0] = 1e-6  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+            self.W[self.W < 0] = 1e-10  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                        #If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                        #To cause the posterior to become less certain than the prior and likelihood,
                                        #This is a property only held by non-log-concave likelihoods
@@ -311,7 +311,7 @@ class Laplace(likelihood):
         while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
             W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
             if not self.likelihood_function.log_concave:
-                W[W < 0] = 1e-6     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+                W[W < 0] = 1e-10     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                     # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                     # To cause the posterior to become less certain than the prior and likelihood,
                                     # This is a property only held by non-log-concave likelihoods
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 57627198..fd64dbe6 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -196,8 +196,8 @@ class student_t(likelihood_function):
         objective = (+ gammaln((self.v + 1) * 0.5)
                      - gammaln(self.v * 0.5)
                      - 0.5*np.log((self.sigma**2) * self.v * np.pi)
-                     - (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
-                     #- (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
+                     #- (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
+                     - (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
                     )
         return np.sum(objective)