From ee980227ac34262b192565cafb5e195cefee46d0 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 9 Jul 2013 11:35:42 +0100
Subject: [PATCH] Fixed 2*variance plotting instead of 2*std plotting, tidied
 up

---
 GPy/examples/laplace_approximations.py  | 93 ++++++++++++++++++++-----
 GPy/likelihoods/Laplace.py              |  2 +-
 GPy/likelihoods/likelihood_functions.py | 28 +-------
 GPy/models/GP.py                        |  2 +-
 4 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index 78b4e986..b3048f5a 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -85,24 +85,78 @@ def v_fail_test():
     print(m)
 
 def student_t_f_check():
-    real_var = 0.1
+    plt.close('all')
+    real_std = 0.1
     X = np.random.rand(100)[:, None]
-    Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var
+    noise = np.random.randn(*X.shape)*real_std
+    Y = np.sin(X*2*np.pi) + noise
     X_full = X
     Y_full = np.sin(X_full)
-    Y = Y/Y.max()
-    deg_free = 1000
-    real_sd = np.sqrt(real_var)
+    #Y = Y/Y.max()
+    deg_free = 10000
 
-    kernel = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
-    real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
+    #GP
+    kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
+    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp.ensure_default_constraints()
+    mgp.randomize()
+    mgp.optimize()
 
-    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std**2)
+    kernelst = kernelgp.copy()
+    real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free))
+
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std2)
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernel)
-    m.constrain_positive('t_noise_std2')
-    m.ensure_default_constraints()
+
+    plt.figure(1)
+    plt.suptitle('Student likelihood')
+    m = GPy.models.GP(X, stu_t_likelihood, kernelst)
+    m.constrain_fixed('rbf_var', mgp._get_params()[0])
+    m.constrain_fixed('rbf_len', mgp._get_params()[1])
+
     m.update_likelihood_approximation()
+    print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood())
+    plt.subplot(221)
+    m.plot()
+    plt.title('Student t original data noise')
+
+    #Fix student t noise variance to same a GP
+    gp_noise = mgp._get_params()[2]
+    m['t_noise_std2'] = gp_noise
+    m.update_likelihood_approximation()
+    print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood())
+    plt.subplot(222)
+    m.plot()
+    plt.title('Student t GP noise')
+
+    #Fix student t noise to variance converted from the GP
+    real_stu_t_std2gp = (gp_noise)*((deg_free - 2)/float(deg_free))
+    m['t_noise_std2'] = real_stu_t_std2gp
+    m.update_likelihood_approximation()
+    print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood())
+    plt.subplot(223)
+    m.plot()
+    plt.title('Student t GP noise converted')
+
+    m.constrain_positive('t_noise_std2')
+    m.randomize()
+    m.update_likelihood_approximation()
+    m.optimize()
+    print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood())
+    plt.subplot(224)
+    m.plot()
+    plt.title('Student t optimised')
+
+    plt.figure(2)
+    print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood())
+    plt.suptitle('Gaussian likelihood optimised')
+    mgp.plot()
+    print "Real std: {}".format(real_std)
+    print "Real variance {}".format(real_std**2)
+
+    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+
+    return m
 
 def debug_student_t_noise_approx():
     plot = False
@@ -218,16 +272,16 @@ def student_t_approx():
     """
     Example of regressing with a student t likelihood
     """
-    real_var = 0.2
+    real_std = 0.1
     #Start a function, any function
-    X = np.linspace(0.0, 10.0, 30)[:, None]
-    Y = np.sin(X) + np.random.randn(*X.shape)*real_var
+    X = np.linspace(0.0, 10.0, 50)[:, None]
+    Y = np.sin(X) + np.random.randn(*X.shape)*real_std
     Yc = Y.copy()
 
     X_full = np.linspace(0.0, 10.0, 500)[:, None]
     Y_full = np.sin(X_full)
 
-    #Y = Y/Y.max()
+    Y = Y/Y.max()
 
     Yc[10] += 100
     Yc[25] += 10
@@ -238,10 +292,9 @@ def student_t_approx():
 
     #Add student t random noise to datapoints
     deg_free = 8
-    real_sd = np.sqrt(real_var)
-    print "Real noise: ", real_sd
+    print "Real noise: ", real_std
 
-    initial_var_guess = 0.01
+    initial_var_guess = 0.1
     #t_rv = t(deg_free, loc=0, scale=real_var)
     #noise = t_rvrvs(size=Y.shape)
     #Y += noise
@@ -293,7 +346,7 @@ def student_t_approx():
 
     plt.figure(2)
     plt.suptitle('Student-t likelihood')
-    edited_real_sd = real_sd #initial_var_guess
+    edited_real_sd = real_std #initial_var_guess
 
     print "Clean student t, rasm"
     t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
@@ -301,6 +354,7 @@ def student_t_approx():
     m = GPy.models.GP(X, stu_t_likelihood, kernel6)
     m.ensure_default_constraints()
     m.constrain_positive('t_noise')
+    m.randomize()
     m.update_likelihood_approximation()
     m.optimize()
     print(m)
@@ -316,6 +370,7 @@ def student_t_approx():
     m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
     m.ensure_default_constraints()
     m.constrain_positive('t_noise')
+    m.randomize()
     m.update_likelihood_approximation()
     m.optimize()
     print(m)
diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py
index 984112a5..c5894ed6 100644
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@@ -89,7 +89,7 @@ class Laplace(likelihood):
         expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
         dL_dthetaK_exp = dK_dthetaK(expl, X)
         dL_dthetaK_imp = dK_dthetaK(impl, X)
-        print "dL_dthetaK_exp: {}     dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
+        #print "dL_dthetaK_exp: {}     dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
         dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
         return dL_dthetaK
 
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index bfc759d7..595fa63c 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -193,16 +193,11 @@ class student_t(likelihood_function):
         """
         assert y.shape == f.shape
         e = y - f
-        A = gammaln((self.v + 1) * 0.5)
-        B = -gammaln(self.v * 0.5)
-        C = - 0.5*np.log(self.sigma2 * self.v * np.pi)
-        D = (-(self.v + 1)*0.5)*np.log(1 + (e**2)/(self.v*self.sigma2))
         objective = (+ gammaln((self.v + 1) * 0.5)
                      - gammaln(self.v * 0.5)
                      - 0.5*np.log(self.sigma2 * self.v * np.pi)
                      + (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
                     )
-        #print "A: {} B: {} C: {} D: {} obj: {}".format(A,B,C,D.sum(),objective.sum())
         return np.sum(objective)
 
     def dlik_df(self, y, f, extra_data=None):
@@ -266,15 +261,6 @@ class student_t(likelihood_function):
         """
         assert y.shape == f.shape
         e = y - f
-        #sigma = np.sqrt(self.sigma2)
-        #dlik_dsigma = ( - (1/sigma) +
-                        #((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
-                      #)
-        #dlik_dsigma = ( - 1 +
-                        #((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
-                      #)
-        #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
-        #dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
         dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
         return dlik_dsigma
 
@@ -286,10 +272,6 @@ class student_t(likelihood_function):
         """
         assert y.shape == f.shape
         e = y - f
-        #sigma = np.sqrt(self.sigma2)
-        #dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
-                            #/ ((self.v*self.sigma2 + e**2)**2)
-                           #)
         dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2)
         return dlik_grad_dsigma
 
@@ -301,12 +283,6 @@ class student_t(likelihood_function):
         """
         assert y.shape == f.shape
         e = y - f
-        #sigma = np.sqrt(self.sigma2)
-        #dlik_hess_dsigma = (  (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
-                              #((e**2 + self.sigma2*self.v)**3)
-                           #)
-        #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
-                             #/ ((e**2 + (self.sigma**2)*self.v)**3) )
         dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                               / (self.sigma2*self.v + (e**2))**3
                            )
@@ -344,8 +320,8 @@ class student_t(likelihood_function):
         #Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
         #need the 95 and 5 percentiles.
         #FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
-        p_025 = mu - 2.*true_var
-        p_975 = mu + 2.*true_var
+        p_025 = mu - 2.*np.sqrt(true_var)
+        p_975 = mu + 2.*np.sqrt(true_var)
 
         return mu, np.nan*mu, p_025, p_975
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 20337ef5..cd4b7dac 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -152,7 +152,7 @@ class GP(model):
         else:
             dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
         #print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL))
-        print "dL_dthetaK: {}   dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL)
+        #print "dL_dthetaK: {}   dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL)
 
         return np.hstack((dL_dthetaK, dL_dthetaL))
         #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))