Changed some parameters of the laplace, tidied up examples

2026-06-08 15:05:15 +02:00 · 2013-11-28 15:23:39 +00:00 · 2013-11-28 15:23:39 +00:00 · 0a43329150
commit 0a43329150
parent 50e9034a6d
2 changed files with 105 additions and 96 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -2,22 +2,21 @@ import GPy
 import numpy as np
 import matplotlib.pyplot as plt
 from GPy.util import datasets
 #np.random.seed(1)
-def student_t_approx():
+def student_t_approx(optimize=True, plot=True):
    """
-    Example of regressing with a student t likelihood
+    Example of regressing with a student t likelihood using Laplace
    """
    real_std = 0.1
    #Start a function, any function
    X = np.linspace(0.0, np.pi*2, 100)[:, None]
    Y = np.sin(X) + np.random.randn(*X.shape)*real_std
    Y = Y/Y.max()
    Yc = Y.copy()
    X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
    Y_full = np.sin(X_full)
-
+    Y_full = Y_full/Y_full.max()
    Y = Y/Y.max()
    #Slightly noisy data
    Yc[75:80] += 1
@ -34,94 +33,93 @@ def student_t_approx():
    deg_free = 5
    print "Real noise: ", real_std
    initial_var_guess = 0.5
    edited_real_sd = initial_var_guess
    #t_rv = t(deg_free, loc=0, scale=real_var)
    #noise = t_rvrvs(size=Y.shape)
    #Y += noise
    plt.figure(1)
    plt.suptitle('Gaussian likelihood')
    # Kernel object
    kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
    kernel2 = kernel1.copy()
    kernel3 = kernel1.copy()
    kernel4 = kernel1.copy()
    kernel5 = kernel1.copy()
    kernel6 = kernel1.copy()
-    print "Clean Gaussian"
+    #Gaussian GP model on clean data
-    #A GP should completely break down due to the points as they get a lot of weight
+    m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
    # create simple GP model
    m = GPy.models.GPRegression(X, Y, kernel=kernel1)
    # optimize
-    m.ensure_default_constraints()
+    m1.ensure_default_constraints()
-    m.constrain_fixed('white', 1e-4)
+    m1.constrain_fixed('white', 1e-5)
-    m.randomize()
+    m1.randomize()
    m.optimize()
    # plot
    ax = plt.subplot(211)
    m.plot(ax=ax)
    plt.plot(X_full, Y_full)
    plt.ylim(-1.5, 1.5)
    plt.title('Gaussian clean')
    print m
-    #Corrupt
+    #Gaussian GP model on corrupt data
-    print "Corrupt Gaussian"
+    m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
-    m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
+    m2.ensure_default_constraints()
-    m.ensure_default_constraints()
+    m2.constrain_fixed('white', 1e-5)
-    m.constrain_fixed('white', 1e-4)
+    m2.randomize()
    m.randomize()
    m.optimize()
    ax = plt.subplot(212)
    m.plot(ax=ax)
    plt.plot(X_full, Y_full)
    plt.ylim(-1.5, 1.5)
    plt.title('Gaussian corrupt')
    print m
-    plt.figure(2)
+    #Student t GP model on clean data
    plt.suptitle('Student-t likelihood')
    edited_real_sd = initial_var_guess
    print "Clean student t, rasm"
    t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
-    m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
+    m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
-    m.ensure_default_constraints()
+    m3.ensure_default_constraints()
-    m.constrain_positive('t_noise')
+    m3.constrain_bounded('t_noise', 1e-6, 10.)
-    m.constrain_fixed('white', 1e-4)
+    m3.constrain_fixed('white', 1e-5)
-    m.randomize()
+    m3.randomize()
    #m.update_likelihood_approximation()
    m.optimize()
    print(m)
    ax = plt.subplot(211)
    m.plot(ax=ax)
    plt.plot(X_full, Y_full)
    plt.ylim(-1.5, 1.5)
    plt.title('Student-t rasm clean')
-    print "Corrupt student t, rasm"
+    #Student t GP model on corrupt data
    t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
    corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
-    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
+    m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
-    m.ensure_default_constraints()
+    m4.ensure_default_constraints()
-    m.constrain_bounded('t_noise', 1e-6, 10.)
+    m4.constrain_bounded('t_noise', 1e-6, 10.)
-    m.constrain_fixed('white', 1e-4)
+    m4.constrain_fixed('white', 1e-5)
-    m.randomize()
+    m4.randomize()
    for a in range(1):
        m.randomize()
        m_start = m.copy()
        print m
        m.optimize('scg', messages=1)
    print(m)
    ax = plt.subplot(212)
    m.plot(ax=ax)
    plt.plot(X_full, Y_full)
    plt.ylim(-1.5, 1.5)
    plt.title('Student-t rasm corrupt')
-    return m
+    if optimize:
        optimizer='scg'
        print "Clean Gaussian"
        m1.optimize(optimizer, messages=1)
        print "Corrupt Gaussian"
        m2.optimize(optimizer, messages=1)
        print "Clean student t"
        m3.optimize(optimizer, messages=1)
        print "Corrupt student t"
        m4.optimize(optimizer, messages=1)
    if False:
        print m1
        print m3
        plt.figure(3)
        plt.scatter(X, m1.likelihood.Y, c='g')
        plt.scatter(X, m3.likelihood.Y, c='r')
    if plot:
        plt.figure(1)
        plt.suptitle('Gaussian likelihood')
        ax = plt.subplot(211)
        m1.plot(ax=ax)
        plt.plot(X_full, Y_full)
        plt.ylim(-1.5, 1.5)
        plt.title('Gaussian clean')
        ax = plt.subplot(212)
        m2.plot(ax=ax)
        plt.plot(X_full, Y_full)
        plt.ylim(-1.5, 1.5)
        plt.title('Gaussian corrupt')
        plt.figure(2)
        plt.suptitle('Student-t likelihood')
        ax = plt.subplot(211)
        m3.plot(ax=ax)
        plt.plot(X_full, Y_full)
        plt.ylim(-1.5, 1.5)
        plt.title('Student-t rasm clean')
        ax = plt.subplot(212)
        m4.plot(ax=ax)
        plt.plot(X_full, Y_full)
        plt.ylim(-1.5, 1.5)
        plt.title('Student-t rasm corrupt')
    return m1, m2, m3, m4
 def boston_example():
    import sklearn
@ -294,3 +292,4 @@ def precipitation_example():
    for n, (train, test) in enumerate(kf):
        X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
        print "Fold {}".format(n)
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@ -15,6 +15,7 @@ import scipy as sp
 from likelihood import likelihood
 from ..util.linalg import mdot, jitchol, pddet, dpotrs
 from functools import partial as partial_func
 import warnings
 class Laplace(likelihood):
    """Laplace approximation to a posterior"""
@ -64,6 +65,7 @@ class Laplace(likelihood):
        self.YYT = None
        self.old_Ki_f = None
        self.bad_fhat = False
    def predictive_values(self,mu,var,full_cov,**noise_args):
        if full_cov:
@ -198,18 +200,16 @@ class Laplace(likelihood):
        Y_tilde = Wi*self.Ki_f + self.f_hat
        self.Wi_K_i = self.W12BiW12
-        self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
+        ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
-        self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
+        lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
-        self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
+        y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
-        Z_tilde = (+ self.lik
+        Z_tilde = (+ lik
                   - 0.5*self.ln_B_det
-                   + 0.5*self.ln_det_Wi_K
+                   + 0.5*ln_det_Wi_K
                   - 0.5*self.f_Ki_f
-                   + 0.5*self.y_Wi_Ki_i_y
+                   + 0.5*y_Wi_K_i_y
                  )
        #print "Term, {}, {}, {}, {}, {}".format(self.lik, - 0.5*self.ln_B_det, + 0.5*self.ln_det_Wi_K, - 0.5*self.f_Ki_f, + 0.5*self.y_Wi_Ki_i_y)
        #Convert to float as its (1, 1) and Z must be a scalar
        self.Z = np.float64(Z_tilde)
        self.Y = Y_tilde
@ -247,7 +247,10 @@ class Laplace(likelihood):
        #At this point get the hessian matrix (or vector as W is diagonal)
        self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
-        #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
+        if not self.noise_model.log_concave:
            #print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
            self.W[self.W < 1e-6] = 1e-6  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
        self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
        self.Ki_f = self.Ki_f
@ -283,11 +286,11 @@ class Laplace(likelihood):
        except:
            import ipdb; ipdb.set_trace()
-        W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
+        W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
        ln_B_det = 2*np.sum(np.log(np.diag(L)))
-        return W12BiW12, ln_B_det
+        return W12BiW12a, ln_B_det
-    def rasm_mode(self, K, MAX_ITER=30):
+    def rasm_mode(self, K, MAX_ITER=40):
        """
        Rasmussen's numerically stable mode finding
        For nomenclature see Rasmussen & Williams 2006
@ -302,9 +305,10 @@ class Laplace(likelihood):
        """
        #old_Ki_f = np.zeros((self.N, 1))
-        #Start f's at zero originally
+        #Start f's at zero originally of if we have gone off track, try restarting
-        if self.old_Ki_f is None:
+        if self.old_Ki_f is None or self.bad_fhat:
-            old_Ki_f = np.zeros((self.N, 1))
+            old_Ki_f = np.random.rand(self.N, 1)/50.0
            #old_Ki_f = self.Y
            f = np.dot(K, old_Ki_f)
        else:
            #Start at the old best point
@ -318,7 +322,7 @@ class Laplace(likelihood):
            return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
        difference = np.inf
-        epsilon = 1e-5
+        epsilon = 1e-7
        #step_size = 1
        #rs = 0
        i = 0
@ -381,14 +385,20 @@ class Laplace(likelihood):
            #difference = abs(new_obj - old_obj)
            #old_obj = new_obj.copy()
-            difference = np.abs(np.sum(f - f_old))
+            difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
-            #difference = np.abs(np.sum(Ki_f - old_Ki_f))
+            #difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
            old_Ki_f = Ki_f.copy()
            i += 1
        self.old_Ki_f = old_Ki_f.copy()
        #Warn of bad fits
        if difference > epsilon:
-            print "Not perfect f_hat fit difference: {}".format(difference)
+            self.bad_fhat = True
            warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
        elif self.bad_fhat:
            self.bad_fhat = False
            warnings.warn("f_hat now perfect again")
        self.Ki_f = Ki_f
        return f