diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index b3048f5a..279bc597 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -1,6 +1,7 @@ import GPy import numpy as np import matplotlib.pyplot as plt +np.random.seed(1) def timing(): real_var = 0.1 @@ -86,17 +87,67 @@ def v_fail_test(): def student_t_f_check(): plt.close('all') - real_std = 0.1 - X = np.random.rand(100)[:, None] + X = np.linspace(0, 1, 50)[:, None] + real_std = 0.001 + noise = np.random.randn(*X.shape)*real_std + Y = np.sin(X*2*np.pi) + noise + deg_free = 1000 + + kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) + mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp.ensure_default_constraints() + mgp.randomize() + mgp.optimize() + print mgp + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + + kernelst = kernelgp.copy() + t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=1e-5) + stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') + m = GPy.models.GP(X, stu_t_likelihood, kernelst) + m['rbf_v'] = mgp._get_params()[0] + m['rbf_l'] = mgp._get_params()[1] + 1 + m.ensure_default_constraints() + m.constrain_positive('t_no') + print m + plt.figure() + plt.subplot(511) + m.plot() + print m + plt.subplot(512) + m.optimize(max_f_eval=15) + m.plot() + print m + plt.subplot(513) + m.optimize(max_f_eval=15) + m.plot() + print m + plt.subplot(514) + m.optimize(max_f_eval=15) + m.plot() + print m + plt.subplot(515) + m.optimize() + m.plot() + print "final optimised student t" + print m + print "real GP" + print mgp + +def student_t_fix_optimise_check(): + plt.close('all') + real_var = 0.1 + real_std = np.sqrt(real_var) + X = np.random.rand(200)[:, None] noise = np.random.randn(*X.shape)*real_std Y = np.sin(X*2*np.pi) + noise X_full = X Y_full = np.sin(X_full) #Y = Y/Y.max() - deg_free = 10000 + deg_free = 1000 #GP - kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1]) + kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) mgp.ensure_default_constraints() mgp.randomize() @@ -113,10 +164,12 @@ def student_t_f_check(): m = GPy.models.GP(X, stu_t_likelihood, kernelst) m.constrain_fixed('rbf_var', mgp._get_params()[0]) m.constrain_fixed('rbf_len', mgp._get_params()[1]) + m.constrain_positive('t_noise') + #m.ensure_default_constraints() m.update_likelihood_approximation() print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood()) - plt.subplot(221) + plt.subplot(231) m.plot() plt.title('Student t original data noise') @@ -125,7 +178,7 @@ def student_t_f_check(): m['t_noise_std2'] = gp_noise m.update_likelihood_approximation() print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood()) - plt.subplot(222) + plt.subplot(232) m.plot() plt.title('Student t GP noise') @@ -134,29 +187,57 @@ def student_t_f_check(): m['t_noise_std2'] = real_stu_t_std2gp m.update_likelihood_approximation() print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood()) - plt.subplot(223) + plt.subplot(233) m.plot() plt.title('Student t GP noise converted') m.constrain_positive('t_noise_std2') m.randomize() m.update_likelihood_approximation() + plt.subplot(234) + m.plot() + plt.title('Student t fixed rbf') m.optimize() print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood()) - plt.subplot(224) + plt.subplot(235) m.plot() - plt.title('Student t optimised') + plt.title('Student t fixed rbf optimised') plt.figure(2) + mrbf = m.copy() + mrbf.unconstrain('') + mrbf.constrain_fixed('t_noise', m.likelihood.likelihood_function.sigma2) + gp_var = mgp._get_params()[0] + gp_len = mgp._get_params()[1] + mrbf.constrain_fixed('rbf_var', gp_var) + mrbf.constrain_positive('rbf_len') + mrbf.randomize() + print "Before optimize" + print mrbf + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + mrbf.checkgrad(verbose=1) + plt.subplot(121) + mrbf.plot() + plt.title('Student t fixed noise') + #mrbf.optimize() + print "After optimize" + print mrbf + plt.subplot(122) + mrbf.plot() + plt.title('Student t fixed noise optimized') + print mrbf + + plt.figure(3) print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood()) plt.suptitle('Gaussian likelihood optimised') mgp.plot() print "Real std: {}".format(real_std) print "Real variance {}".format(real_std**2) - import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT - return m + print "Len should be: {}".format(gp_len) + return mrbf def debug_student_t_noise_approx(): plot = False diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index c5894ed6..5343f5dc 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -89,7 +89,7 @@ class Laplace(likelihood): expl = 0.5*expl_a + 0.5*expl_b # Might need to be -? dL_dthetaK_exp = dK_dthetaK(expl, X) dL_dthetaK_imp = dK_dthetaK(impl, X) - #print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) + print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp return dL_dthetaK @@ -290,10 +290,12 @@ class Laplace(likelihood): :MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation :returns: f_mode """ - if self.old_a is None: - old_a = np.zeros((self.N, 1)) - else: - old_a = self.old_a + old_a = np.zeros((self.N, 1)) + #old_a = None + #if self.old_a is None: + #old_a = np.zeros((self.N, 1)) + #else: + #old_a = self.old_a f = np.dot(self.K, old_a) self.f = f @@ -308,8 +310,6 @@ class Laplace(likelihood): step_size = 1 rs = 0 i = 0 - #if self.likelihood_function.sigma < 0.001: - #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART: W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data) if not self.likelihood_function.log_concave: @@ -371,8 +371,10 @@ class Laplace(likelihood): old_a = self.a #a i += 1 + self.old_a = old_a #print "Positive difference obj: ", np.float(difference) - print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) + #print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) + print "Iterations: {}, Final_difference: {}".format(i, difference) #self.a = a #self.B, self.B_chol, self.W_12 = B, L, W_12 #self.Bi, _, _, B_det = pdinv(self.B) diff --git a/GPy/models/GP.py b/GPy/models/GP.py index cd4b7dac..0f56e21c 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -132,7 +132,11 @@ class GP(model): model for a new variable Y* = v_tilde/tau_tilde, with a covariance matrix K* = K + diag(1./tau_tilde) plus a normalization term. """ + if isinstance(self.likelihood, Laplace): + self.likelihood.fit_full(self.kern.K(self.X)) + self.likelihood._set_params(self.likelihood._get_params()) l = -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z + print "K_ldet: {} mft: {} Z: {}".format(self.K_logdet, self._model_fit_term(), self.likelihood.Z) return l def _log_likelihood_gradients(self): @@ -142,12 +146,12 @@ class GP(model): Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X) - #print "dL_dthetaK should be: ", dL_dthetaK + print "dL_dthetaK should be: ", dL_dthetaK if isinstance(self.likelihood, Laplace): - #self.likelihood.fit_full(self.kern.K(self.X)) - #self.likelihood._set_params(self.likelihood._get_params()) + self.likelihood.fit_full(self.kern.K(self.X)) + self.likelihood._set_params(self.likelihood._get_params()) dK_dthetaK = self.kern.dK_dtheta - dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X) + dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X.copy()) dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK)) else: dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))