diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index d300806f..7b9f10b1 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -90,7 +90,7 @@ def debug_student_t_noise_approx(): real_var = 0.1 #Start a function, any function X = np.linspace(0.0, 10.0, 50)[:, None] - #X = np.array([0.5])[:, None] + #X = np.array([0.5, 1])[:, None] Y = np.sin(X) + np.random.randn(*X.shape)*real_var X_full = np.linspace(0.0, 10.0, 50)[:, None] @@ -99,7 +99,7 @@ def debug_student_t_noise_approx(): Y = Y/Y.max() #Add student t random noise to datapoints - deg_free = 10 + deg_free = 100000 real_sd = np.sqrt(real_var) print "Real noise std: ", real_sd diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index ed3229a9..b5362839 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -51,6 +51,8 @@ class Laplace(likelihood): self.Z = 0 self.YYT = None + self.old_a = None + def predictive_values(self, mu, var, full_cov): if full_cov: raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood") @@ -83,7 +85,7 @@ class Laplace(likelihood): impl = mdot(dlp, dL_dfhat, I_KW_i) expl_a = mdot(self.Ki_f, self.Ki_f.T) expl_b = self.Wi_K_i - expl = 0.5*expl_a - 0.5*expl_b # Might need to be -? + expl = 0.5*expl_a + 0.5*expl_b # Might need to be -? dL_dthetaK_exp = dK_dthetaK(expl, X) dL_dthetaK_imp = dK_dthetaK(impl, X) #print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) @@ -265,7 +267,7 @@ class Laplace(likelihood): f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False) return f_hat[:, None] - def rasm_mode(self, K, MAX_ITER=500000, MAX_RESTART=50): + def rasm_mode(self, K, MAX_ITER=500, MAX_RESTART=40): """ Rasmussens numerically stable mode finding For nomenclature see Rasmussen & Williams 2006 @@ -275,7 +277,12 @@ class Laplace(likelihood): :MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation :returns: f_mode """ - f = np.zeros((self.N, 1)) + if self.old_a is None: + old_a = np.zeros((self.N, 1)) + else: + old_a = self.old_a + + f = np.dot(self.K, old_a) new_obj = -np.inf old_obj = np.inf @@ -292,7 +299,7 @@ class Laplace(likelihood): #f_old = f.copy() W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data) if not self.likelihood_function.log_concave: - W[W < 0] = 1e-5 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + W[W < 0] = 1e-8 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods @@ -300,38 +307,46 @@ class Laplace(likelihood): W_f = W*f grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data) - #Find K_i_f + b = W_f + grad - b = step_size*b - - #Need this to find the f we have a stepsize which we need to move in, rather than a full unit movement - #c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad) - #solve_L = cho_solve((L, True), W_12*c) - #f = c - np.dot(K, W_12*solve_L) - - #FIXME: Can't we get rid of this? Don't we want to evaluate obj(c,f) and this is our new_obj? - #Why did I choose to evaluate the objective function at the new f with the old hessian? I'm sure there was a good reason, - #Document it! solve_L = cho_solve((L, True), W_12*np.dot(K, b)) - a = b - W_12*solve_L - f = np.dot(K, a) + #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet + full_step_a = b - W_12*solve_L + da = full_step_a - old_a - tmp_old_obj = old_obj - old_obj = new_obj - new_obj = obj(a, f) - difference = new_obj - old_obj - if difference < 0: - #print "Objective function rose", difference - #If the objective function isn't rising, restart optimization - step_size *= 0.9 - #print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) - #objective function isn't increasing, try reducing step size - #f = f_old #it's actually faster not to go back to old location and just zigzag across the mode - old_obj = tmp_old_obj - rs += 1 + update_passed = False + while not update_passed: + a = old_a + step_size*da + f = np.dot(K, a) - difference = abs(difference) + old_obj = new_obj + new_obj = np.float(obj(a, f)) + difference = new_obj - old_obj + #print "difference: ",difference + if difference < 0: + #print grad + print "Objective function rose", np.float(difference) + #If the objective function isn't rising, restart optimization + step_size *= 0.8 + print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) + #objective function isn't increasing, try reducing step size + #f = f_old #it's actually faster not to go back to old location and just zigzag across the mode + #old_obj = tmp_old_obj + old_obj = new_obj + rs += 1 + else: + update_passed = True + + #print "Iter difference: ", difference + #print "F: ", f + #print "A: ", a + old_a = a + #print "Positive difference obj: ", np.float(difference) + difference = np.float(abs(difference)) i += 1 - self.i = i + #print "Positive difference obj: ", np.float(difference) + print "Iterations: ",i + print "Step size reductions", rs + print "Final difference: ", difference return f