diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index abb5f4ce..24f2d88c 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -91,6 +91,8 @@ def debug_student_t_noise_approx(): X = np.linspace(0.0, 10.0, 50)[:, None] #X = np.array([0.5, 1])[:, None] Y = np.sin(X) + np.random.randn(*X.shape)*real_var + #ty = np.array([1., 9.97733584, 4.17841363])[:, None] + #Y = ty X_full = X Y_full = np.sin(X_full) @@ -98,7 +100,7 @@ def debug_student_t_noise_approx(): Y = Y/Y.max() #Add student t random noise to datapoints - deg_free = 100 + deg_free = 10000 real_sd = np.sqrt(real_var) print "Real noise std: ", real_sd @@ -151,6 +153,9 @@ def debug_student_t_noise_approx(): #m.constrain_positive('') m.ensure_default_constraints() #m.constrain_fixed('t_noi', real_sd) + #m['rbf_var'] = 0.20446332 + #m['rbf_leng'] = 0.85776241 + #m['t_noise'] = 0.667083294421005 m.update_likelihood_approximation() #m.optimize(messages=True) print(m) diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index e096c5f4..e4652f27 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -153,7 +153,7 @@ class Laplace(likelihood): Wi = 1.0/self.W self.Sigma_tilde = np.diagflat(Wi) - Y_tilde = Wi*(self.Ki_f + self.W*self.f_hat) + Y_tilde = Wi*self.Ki_f + self.f_hat self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K) @@ -199,7 +199,7 @@ class Laplace(likelihood): self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data) if not self.likelihood_function.log_concave: - self.W[self.W < 0] = 1e-5 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + self.W[self.W < 0] = 1e-8 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur #If the likelihood is non-log-concave. We wan't to say that there is a negative variance #To cause the posterior to become less certain than the prior and likelihood, #This is a property only held by non-log-concave likelihoods @@ -312,7 +312,7 @@ class Laplace(likelihood): while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART: W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data) if not self.likelihood_function.log_concave: - W[W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + W[W < 0] = 0#1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods @@ -329,8 +329,9 @@ class Laplace(likelihood): full_step_a = b - W_12*solve_L da = full_step_a - old_a - f_old = self.f.copy() + f_old = f.copy() + f_old = self.f.copy() def inner_obj(step_size, old_a, da, K): a = old_a + step_size*da f = np.dot(K, a) @@ -340,7 +341,6 @@ class Laplace(likelihood): from functools import partial i_o = partial(inner_obj, old_a=old_a, da=da, K=self.K) - old_obj = new_obj new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10) #update_passed = False @@ -354,10 +354,10 @@ class Laplace(likelihood): #print "difference: ",difference #if difference < 0: ##print grad - #print "Objective function rose", np.float(difference) + ##print "Objective function rose", np.float(difference) ##If the objective function isn't rising, restart optimization #step_size *= 0.8 - #print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) + ##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) ##objective function isn't increasing, try reducing step size ##f = f_old #it's actually faster not to go back to old location and just zigzag across the mode ##old_obj = tmp_old_obj @@ -368,12 +368,12 @@ class Laplace(likelihood): f = self.f difference = new_obj - old_obj - difference = np.abs(np.sum(f - f_old)) + abs(difference) + difference = np.abs(np.sum(f - f_old)) #+ abs(difference) old_a = self.a #a i += 1 #print "Positive difference obj: ", np.float(difference) - print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) + #print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) #self.a = a self.B, self.B_chol, self.W_12 = B, L, W_12 self.Bi, _, _, B_det = pdinv(self.B) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 4d298122..ebc87f56 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -274,7 +274,7 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - dlik_grad_dsigma = ((-2*self.sigma*self.v*(self.v + 1)*e) + dlik_grad_dsigma = ((-2*self.sigma*self.v*(self.v + 1)*e) #2 might not want to be here? / ((self.v*(self.sigma**2) + e**2)**2) ) return dlik_grad_dsigma diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 1d57ed38..20337ef5 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -152,8 +152,7 @@ class GP(model): else: dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK)) #print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL)) - print "dL_dthetaK is: ", dL_dthetaK - print "dL_dthetaL is: ", dL_dthetaL + print "dL_dthetaK: {} dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL) return np.hstack((dL_dthetaK, dL_dthetaL)) #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))