trying to fix optimisation problem, fixed a few bugs but still fails at very low noise

2026-04-28 22:36:24 +02:00 · 2013-06-24 15:39:38 +01:00 · 2013-06-24 15:39:38 +01:00 · e80fad197c
commit e80fad197c
parent d4bfd99c21
2 changed files with 49 additions and 34 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -90,7 +90,7 @@ def debug_student_t_noise_approx():
    real_var = 0.1
    #Start a function, any function
    X = np.linspace(0.0, 10.0, 50)[:, None]
-    #X = np.array([0.5])[:, None]
+    #X = np.array([0.5, 1])[:, None]
    Y = np.sin(X) + np.random.randn(*X.shape)*real_var

    X_full = np.linspace(0.0, 10.0, 50)[:, None]
@ -99,7 +99,7 @@ def debug_student_t_noise_approx():
    Y = Y/Y.max()

    #Add student t random noise to datapoints
-    deg_free = 10
+    deg_free = 100000
    real_sd = np.sqrt(real_var)
    print "Real noise std: ", real_sd

--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@ -51,6 +51,8 @@ class Laplace(likelihood):
        self.Z = 0
        self.YYT = None

+        self.old_a = None
+
    def predictive_values(self, mu, var, full_cov):
        if full_cov:
            raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood")
@ -83,7 +85,7 @@ class Laplace(likelihood):
        impl = mdot(dlp, dL_dfhat, I_KW_i)
        expl_a = mdot(self.Ki_f, self.Ki_f.T)
        expl_b = self.Wi_K_i
-        expl = 0.5*expl_a - 0.5*expl_b # Might need to be -?
+        expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
        dL_dthetaK_exp = dK_dthetaK(expl, X)
        dL_dthetaK_imp = dK_dthetaK(impl, X)
        #print "dL_dthetaK_exp: {}     dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
@ -265,7 +267,7 @@ class Laplace(likelihood):
        f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False)
        return f_hat[:, None]

-    def rasm_mode(self, K, MAX_ITER=500000, MAX_RESTART=50):
+    def rasm_mode(self, K, MAX_ITER=500, MAX_RESTART=40):
        """
        Rasmussens numerically stable mode finding
        For nomenclature see Rasmussen & Williams 2006
@ -275,7 +277,12 @@ class Laplace(likelihood):
        :MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation
        :returns: f_mode
        """
-        f = np.zeros((self.N, 1))
+        if self.old_a is None:
+            old_a = np.zeros((self.N, 1))
+        else:
+            old_a = self.old_a
+
+        f = np.dot(self.K, old_a)
        new_obj = -np.inf
        old_obj = np.inf

@ -292,7 +299,7 @@ class Laplace(likelihood):
            #f_old = f.copy()
            W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
            if not self.likelihood_function.log_concave:
-                W[W < 0] = 1e-5     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+                W[W < 0] = 1e-8     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                    # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                    # To cause the posterior to become less certain than the prior and likelihood,
                                    # This is a property only held by non-log-concave likelihoods
@ -300,38 +307,46 @@ class Laplace(likelihood):

            W_f = W*f
            grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data)
-            #Find K_i_f
+
            b = W_f + grad
-            b = step_size*b
-
-            #Need this to find the f we have a stepsize which we need to move in, rather than a full unit movement
-            #c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad)
-            #solve_L = cho_solve((L, True), W_12*c)
-            #f = c - np.dot(K, W_12*solve_L)
-
-            #FIXME: Can't we get rid of this? Don't we want to evaluate obj(c,f) and this is our new_obj?
-            #Why did I choose to evaluate the objective function at the new f with the old hessian? I'm sure there was a good reason,
-            #Document it!
            solve_L = cho_solve((L, True), W_12*np.dot(K, b))
-            a = b - W_12*solve_L
-            f = np.dot(K, a)
+            #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
+            full_step_a = b - W_12*solve_L
+            da = full_step_a - old_a

-            tmp_old_obj = old_obj
-            old_obj = new_obj
-            new_obj = obj(a, f)
-            difference = new_obj - old_obj
-            if difference < 0:
-                #print "Objective function rose", difference
-                #If the objective function isn't rising, restart optimization
-                step_size *= 0.9
-                #print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
-                #objective function isn't increasing, try reducing step size
-                #f = f_old #it's actually faster not to go back to old location and just zigzag across the mode
-                old_obj = tmp_old_obj
-                rs += 1
+            update_passed = False
+            while not update_passed:
+                a = old_a + step_size*da
+                f = np.dot(K, a)

-            difference = abs(difference)
+                old_obj = new_obj
+                new_obj = np.float(obj(a, f))
+                difference = new_obj - old_obj
+                #print "difference: ",difference
+                if difference < 0:
+                    #print grad
+                    print "Objective function rose", np.float(difference)
+                    #If the objective function isn't rising, restart optimization
+                    step_size *= 0.8
+                    print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
+                    #objective function isn't increasing, try reducing step size
+                    #f = f_old #it's actually faster not to go back to old location and just zigzag across the mode
+                    #old_obj = tmp_old_obj
+                    old_obj = new_obj
+                    rs += 1
+                else:
+                    update_passed = True
+
+            #print "Iter difference: ", difference
+            #print "F: ", f
+            #print "A: ", a
+            old_a = a
+            #print "Positive difference obj: ", np.float(difference)
+            difference = np.float(abs(difference))
            i += 1

-        self.i = i
+        #print "Positive difference obj: ", np.float(difference)
+        print "Iterations: ",i
+        print "Step size reductions", rs
+        print "Final difference: ", difference
        return f