From f3b8dfb2225c8a25a0b753ec0e2f63b28cdec827 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 3 Jun 2013 14:51:09 +0100 Subject: [PATCH] about to input new derivations for Z's... --- GPy/examples/laplace_approximations.py | 15 +++++++++++--- GPy/likelihoods/Laplace.py | 28 ++++++++++++++++---------- GPy/models/GP.py | 17 ++++++++-------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 14ff44a0..ee71a950 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -143,11 +143,12 @@ def student_t_approx(): Yc[10] += 100 Yc[25] += 10 Yc[23] += 10 + Yc[26] += 1000 Yc[24] += 10 #Yc = Yc/Yc.max() #Add student t random noise to datapoints - deg_free = 1000000000000 + deg_free = 10 real_sd = np.sqrt(real_var) print "Real noise: ", real_sd @@ -187,21 +188,25 @@ def student_t_approx(): plt.subplot(211) m.plot() plt.plot(X_full, Y_full) + plt.title('Gaussian clean') print m #Corrupt print "Corrupt Gaussian" m = GPy.models.GP_regression(X, Yc, kernel=kernel2) m.ensure_default_constraints() - m.optimize() + #m.optimize() plt.subplot(212) m.plot() plt.plot(X_full, Y_full) + plt.title('Gaussian corrupt') print m + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + plt.figure(2) plt.suptitle('Student-t likelihood') - edited_real_sd = initial_var_guess #real_sd + edited_real_sd = real_sd #initial_var_guess print "Clean student t, rasm" t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd) @@ -215,6 +220,7 @@ def student_t_approx(): m.plot() plt.plot(X_full, Y_full) plt.ylim(-2.5, 2.5) + plt.title('Student-t rasm clean') print "Corrupt student t, rasm" t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd) @@ -228,6 +234,7 @@ def student_t_approx(): m.plot() plt.plot(X_full, Y_full) plt.ylim(-2.5, 2.5) + plt.title('Student-t rasm corrupt') print "Clean student t, ncg" t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd) @@ -241,6 +248,7 @@ def student_t_approx(): m.plot() plt.plot(X_full, Y_full) plt.ylim(-2.5, 2.5) + plt.title('Student-t ncg clean') print "Corrupt student t, ncg" t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd) @@ -254,6 +262,7 @@ def student_t_approx(): m.plot() plt.plot(X_full, Y_full) plt.ylim(-2.5, 2.5) + plt.title('Student-t ncg corrupt') ###with a student t distribution, since it has heavy tails it should work well diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index 74d37d48..45fddeaa 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -6,7 +6,10 @@ from numpy.linalg import cond from likelihood import likelihood from ..util.linalg import pdinv, mdot, jitchol, chol_inv, det_ln_diag, pddet from scipy.linalg.lapack import dtrtrs +import random #import pylab as plt +np.random.seed(50) +random.seed(50) class Laplace(likelihood): @@ -156,6 +159,7 @@ class Laplace(likelihood): Lt_W_i_Li = dtrtrs(Lt_W, Li, lower=False)[0] self.Wi__Ki_W = Lt_W_i_Li + np.eye(self.N) + Y_tilde = np.dot(self.Wi__Ki_W, self.f_hat) #f.T(Ki + W)f @@ -239,15 +243,15 @@ class Laplace(likelihood): self.ln_Ki_W_i_det = np.linalg.det(self.Ki_W_i) + #Do the computation again at f to get Ki_f which is useful b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data) - solve_chol = cho_solve((self.B_chol, True), np.dot(self.W_12*self.K, b)) - a = b - self.W_12*solve_chol - self.Ki_f = a + self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f) self.ln_K_det = pddet(self.K) + #_, _, _, self.ln_K_det = pdinv(self.K) self.ln_z_hat = (- 0.5*self.f_Ki_f - 0.5*self.ln_K_det @@ -296,7 +300,7 @@ class Laplace(likelihood): res = -1 * (--np.diag(self.likelihood_function.d2lik_d2f(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki) return np.squeeze(res) - f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) + f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False) return f_hat[:, None] def rasm_mode(self, K, MAX_ITER=500000, MAX_RESTART=50): @@ -336,17 +340,19 @@ class Laplace(likelihood): grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data) #Find K_i_f b = W_f + grad + b = step_size*b - #a should be equal to Ki*f now so should be able to use it - c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad) - - solve_L = cho_solve((L, True), W_12*c) - - f = c - np.dot(K, W_12*solve_L) + #Need this to find the f we have a stepsize which we need to move in, rather than a full unit movement + #c = np.dot(K, W_f) + f*(1-step_size) + step_size*np.dot(K, grad) + #solve_L = cho_solve((L, True), W_12*c) + #f = c - np.dot(K, W_12*solve_L) + #FIXME: Can't we get rid of this? Don't we want to evaluate obj(c,f) and this is our new_obj? + #Why did I choose to evaluate the objective function at the new f with the old hessian? I'm sure there was a good reason, + #Document it! solve_L = cho_solve((L, True), W_12*np.dot(K, b)) - a = b - W_12*solve_L + f = np.dot(K, a) tmp_old_obj = old_obj old_obj = new_obj diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 0ba20d7b..e4ed52ef 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -142,23 +142,22 @@ class GP(model): Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X) - print "dL_dthetaK before: ",dL_dthetaK if isinstance(self.likelihood, Laplace): #Reapproximate incase it hasnt been done... - if isinstance(self.likelihood, Laplace): - self.likelihood.fit_full(self.kern.K(self.X)) - self.likelihood._set_params(self.likelihood._get_params()) + self.likelihood.fit_full(self.kern.K(self.X)) + self.likelihood._set_params(self.likelihood._get_params()) + print self.kern._get_params() #Need to pass in a matrix of ones to get access to raw dK_dthetaK values without being chained - fake_dL_dKs = np.ones(self.dL_dK.shape) #FIXME: Check this is right... + #fake_dL_dKs = np.ones(self.dL_dK.shape) #FIXME: Check this is right... #fake_dL_dKs = np.eye(self.dL_dK.shape[0]) #FIXME: Check this is right... #BUG: THIS SHOULD NOT BE (1,num_k_params) matrix it should be (N,N,num_k_params) - dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X) + #dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X) - dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK=dK_dthetaK) - dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK)) - print "dL_dthetaK after: ",dL_dthetaK + #dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK=dK_dthetaK) + dL_dthetaL = 0 #self.likelihood._gradients(partial=np.diag(self.dL_dK)) + #print "dL_dthetaK after: ",dL_dthetaK #print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL)) else: dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))