about to input new derivations for Z's...

2026-04-28 22:36:24 +02:00 · 2013-06-03 14:51:09 +01:00 · 2013-06-03 14:51:09 +01:00 · f3b8dfb222
commit f3b8dfb222
parent 6c29750795
3 changed files with 37 additions and 23 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -143,11 +143,12 @@ def student_t_approx():
    Yc[10] += 100
    Yc[25] += 10
    Yc[23] += 10
+    Yc[26] += 1000
    Yc[24] += 10
    #Yc = Yc/Yc.max()

    #Add student t random noise to datapoints
-    deg_free = 1000000000000
+    deg_free = 10
    real_sd = np.sqrt(real_var)
    print "Real noise: ", real_sd

@ -187,21 +188,25 @@ def student_t_approx():
    plt.subplot(211)
    m.plot()
    plt.plot(X_full, Y_full)
+    plt.title('Gaussian clean')
    print m

    #Corrupt
    print "Corrupt Gaussian"
    m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
    m.ensure_default_constraints()
-    m.optimize()
+    #m.optimize()
    plt.subplot(212)
    m.plot()
    plt.plot(X_full, Y_full)
+    plt.title('Gaussian corrupt')
    print m

+    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
+
    plt.figure(2)
    plt.suptitle('Student-t likelihood')
-    edited_real_sd = initial_var_guess #real_sd
+    edited_real_sd = real_sd #initial_var_guess

    print "Clean student t, rasm"
    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
@ -215,6 +220,7 @@ def student_t_approx():
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)
+    plt.title('Student-t rasm clean')

    print "Corrupt student t, rasm"
    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
@ -228,6 +234,7 @@ def student_t_approx():
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)
+    plt.title('Student-t rasm corrupt')

    print "Clean student t, ncg"
    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
@ -241,6 +248,7 @@ def student_t_approx():
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)
+    plt.title('Student-t ncg clean')

    print "Corrupt student t, ncg"
    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
@ -254,6 +262,7 @@ def student_t_approx():
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)
+    plt.title('Student-t ncg corrupt')


    ###with a student t distribution, since it has heavy tails it should work well
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@ -6,7 +6,10 @@ from numpy.linalg import cond
 from likelihood import likelihood
 from ..util.linalg import pdinv, mdot, jitchol, chol_inv, det_ln_diag, pddet
 from scipy.linalg.lapack import dtrtrs
+import random
 #import pylab as plt
+np.random.seed(50)
+random.seed(50)


 class Laplace(likelihood):
@ -156,6 +159,7 @@ class Laplace(likelihood):

        Lt_W_i_Li = dtrtrs(Lt_W, Li, lower=False)[0]
        self.Wi__Ki_W = Lt_W_i_Li + np.eye(self.N)
+
        Y_tilde = np.dot(self.Wi__Ki_W, self.f_hat)

        #f.T(Ki + W)f
@ -239,15 +243,15 @@ class Laplace(likelihood):

        self.ln_Ki_W_i_det = np.linalg.det(self.Ki_W_i)

+        #Do the computation again at f to get Ki_f which is useful
        b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data)
-
        solve_chol = cho_solve((self.B_chol, True), np.dot(self.W_12*self.K, b))
-
        a = b - self.W_12*solve_chol
-
        self.Ki_f = a
+
        self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
        self.ln_K_det = pddet(self.K)
+        #_, _, _, self.ln_K_det = pdinv(self.K)

        self.ln_z_hat = (- 0.5*self.f_Ki_f
                         - 0.5*self.ln_K_det
@ -296,7 +300,7 @@ class Laplace(likelihood):
            res = -1 * (--np.diag(self.likelihood_function.d2lik_d2f(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki)
            return np.squeeze(res)

-        f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
+        f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False)
        return f_hat[:, None]

    def rasm_mode(self, K, MAX_ITER=500000, MAX_RESTART=50):
@ -336,17 +340,19 @@ class Laplace(likelihood):
            grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data)
            #Find K_i_f
            b = W_f + grad
+            b = step_size*b

-            #a should be equal to Ki*f now so should be able to use it
-            c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad)
-
-            solve_L = cho_solve((L, True), W_12*c)
-
-            f = c - np.dot(K, W_12*solve_L)
+            #Need this to find the f we have a stepsize which we need to move in, rather than a full unit movement
+            #c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad)
+            #solve_L = cho_solve((L, True), W_12*c)
+            #f = c - np.dot(K, W_12*solve_L)

+            #FIXME: Can't we get rid of this? Don't we want to evaluate obj(c,f) and this is our new_obj?
+            #Why did I choose to evaluate the objective function at the new f with the old hessian? I'm sure there was a good reason,
+            #Document it!
            solve_L = cho_solve((L, True), W_12*np.dot(K, b))
-
            a = b - W_12*solve_L
+            f = np.dot(K, a)

            tmp_old_obj = old_obj
            old_obj = new_obj
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@ -142,23 +142,22 @@ class GP(model):
        Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
        """
        dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X)
-        print "dL_dthetaK before: ",dL_dthetaK
        if isinstance(self.likelihood, Laplace):
            #Reapproximate incase it hasnt been done...
-            if isinstance(self.likelihood, Laplace):
-                self.likelihood.fit_full(self.kern.K(self.X))
-                self.likelihood._set_params(self.likelihood._get_params())
+            self.likelihood.fit_full(self.kern.K(self.X))
+            self.likelihood._set_params(self.likelihood._get_params())
+            print self.kern._get_params()

            #Need to pass in a matrix of ones to get access to raw dK_dthetaK values without being chained
-            fake_dL_dKs = np.ones(self.dL_dK.shape) #FIXME: Check this is right...
+            #fake_dL_dKs = np.ones(self.dL_dK.shape) #FIXME: Check this is right...
            #fake_dL_dKs = np.eye(self.dL_dK.shape[0]) #FIXME: Check this is right...

            #BUG: THIS SHOULD NOT BE (1,num_k_params) matrix it should be (N,N,num_k_params)
-            dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X)
+            #dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X)

-            dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK=dK_dthetaK)
-            dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
-            print "dL_dthetaK after: ",dL_dthetaK
+            #dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK=dK_dthetaK)
+            dL_dthetaL = 0 #self.likelihood._gradients(partial=np.diag(self.dL_dK))
+            #print "dL_dthetaK after: ",dL_dthetaK
            #print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL))
        else:
            dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))