From ee980227ac34262b192565cafb5e195cefee46d0 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 9 Jul 2013 11:35:42 +0100 Subject: [PATCH] Fixed 2*variance plotting instead of 2*std plotting, tidied up --- GPy/examples/laplace_approximations.py | 93 ++++++++++++++++++++----- GPy/likelihoods/Laplace.py | 2 +- GPy/likelihoods/likelihood_functions.py | 28 +------- GPy/models/GP.py | 2 +- 4 files changed, 78 insertions(+), 47 deletions(-) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 78b4e986..b3048f5a 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -85,24 +85,78 @@ def v_fail_test(): print(m) def student_t_f_check(): - real_var = 0.1 + plt.close('all') + real_std = 0.1 X = np.random.rand(100)[:, None] - Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var + noise = np.random.randn(*X.shape)*real_std + Y = np.sin(X*2*np.pi) + noise X_full = X Y_full = np.sin(X_full) - Y = Y/Y.max() - deg_free = 1000 - real_sd = np.sqrt(real_var) + #Y = Y/Y.max() + deg_free = 10000 - kernel = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1]) - real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free))) + #GP + kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1]) + mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp.ensure_default_constraints() + mgp.randomize() + mgp.optimize() - t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std**2) + kernelst = kernelgp.copy() + real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free)) + + t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std2) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernel) - m.constrain_positive('t_noise_std2') - m.ensure_default_constraints() + + plt.figure(1) + plt.suptitle('Student likelihood') + m = GPy.models.GP(X, stu_t_likelihood, kernelst) + m.constrain_fixed('rbf_var', mgp._get_params()[0]) + m.constrain_fixed('rbf_len', mgp._get_params()[1]) + m.update_likelihood_approximation() + print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood()) + plt.subplot(221) + m.plot() + plt.title('Student t original data noise') + + #Fix student t noise variance to same a GP + gp_noise = mgp._get_params()[2] + m['t_noise_std2'] = gp_noise + m.update_likelihood_approximation() + print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood()) + plt.subplot(222) + m.plot() + plt.title('Student t GP noise') + + #Fix student t noise to variance converted from the GP + real_stu_t_std2gp = (gp_noise)*((deg_free - 2)/float(deg_free)) + m['t_noise_std2'] = real_stu_t_std2gp + m.update_likelihood_approximation() + print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood()) + plt.subplot(223) + m.plot() + plt.title('Student t GP noise converted') + + m.constrain_positive('t_noise_std2') + m.randomize() + m.update_likelihood_approximation() + m.optimize() + print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood()) + plt.subplot(224) + m.plot() + plt.title('Student t optimised') + + plt.figure(2) + print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood()) + plt.suptitle('Gaussian likelihood optimised') + mgp.plot() + print "Real std: {}".format(real_std) + print "Real variance {}".format(real_std**2) + + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + + return m def debug_student_t_noise_approx(): plot = False @@ -218,16 +272,16 @@ def student_t_approx(): """ Example of regressing with a student t likelihood """ - real_var = 0.2 + real_std = 0.1 #Start a function, any function - X = np.linspace(0.0, 10.0, 30)[:, None] - Y = np.sin(X) + np.random.randn(*X.shape)*real_var + X = np.linspace(0.0, 10.0, 50)[:, None] + Y = np.sin(X) + np.random.randn(*X.shape)*real_std Yc = Y.copy() X_full = np.linspace(0.0, 10.0, 500)[:, None] Y_full = np.sin(X_full) - #Y = Y/Y.max() + Y = Y/Y.max() Yc[10] += 100 Yc[25] += 10 @@ -238,10 +292,9 @@ def student_t_approx(): #Add student t random noise to datapoints deg_free = 8 - real_sd = np.sqrt(real_var) - print "Real noise: ", real_sd + print "Real noise: ", real_std - initial_var_guess = 0.01 + initial_var_guess = 0.1 #t_rv = t(deg_free, loc=0, scale=real_var) #noise = t_rvrvs(size=Y.shape) #Y += noise @@ -293,7 +346,7 @@ def student_t_approx(): plt.figure(2) plt.suptitle('Student-t likelihood') - edited_real_sd = real_sd #initial_var_guess + edited_real_sd = real_std #initial_var_guess print "Clean student t, rasm" t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd) @@ -301,6 +354,7 @@ def student_t_approx(): m = GPy.models.GP(X, stu_t_likelihood, kernel6) m.ensure_default_constraints() m.constrain_positive('t_noise') + m.randomize() m.update_likelihood_approximation() m.optimize() print(m) @@ -316,6 +370,7 @@ def student_t_approx(): m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4) m.ensure_default_constraints() m.constrain_positive('t_noise') + m.randomize() m.update_likelihood_approximation() m.optimize() print(m) diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index 984112a5..c5894ed6 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -89,7 +89,7 @@ class Laplace(likelihood): expl = 0.5*expl_a + 0.5*expl_b # Might need to be -? dL_dthetaK_exp = dK_dthetaK(expl, X) dL_dthetaK_imp = dK_dthetaK(impl, X) - print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) + #print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp return dL_dthetaK diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index bfc759d7..595fa63c 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -193,16 +193,11 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - A = gammaln((self.v + 1) * 0.5) - B = -gammaln(self.v * 0.5) - C = - 0.5*np.log(self.sigma2 * self.v * np.pi) - D = (-(self.v + 1)*0.5)*np.log(1 + (e**2)/(self.v*self.sigma2)) objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) - 0.5*np.log(self.sigma2 * self.v * np.pi) + (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v)) ) - #print "A: {} B: {} C: {} D: {} obj: {}".format(A,B,C,D.sum(),objective.sum()) return np.sum(objective) def dlik_df(self, y, f, extra_data=None): @@ -266,15 +261,6 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - #sigma = np.sqrt(self.sigma2) - #dlik_dsigma = ( - (1/sigma) + - #((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) ) - #) - #dlik_dsigma = ( - 1 + - #((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) ) - #) - #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1 - #dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v)) dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) return dlik_dsigma @@ -286,10 +272,6 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - #sigma = np.sqrt(self.sigma2) - #dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here? - #/ ((self.v*self.sigma2 + e**2)**2) - #) dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2) return dlik_grad_dsigma @@ -301,12 +283,6 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - #sigma = np.sqrt(self.sigma2) - #dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) / - #((e**2 + self.sigma2*self.v)**3) - #) - #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2)) - #/ ((e**2 + (self.sigma**2)*self.v)**3) ) dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / (self.sigma2*self.v + (e**2))**3 ) @@ -344,8 +320,8 @@ class student_t(likelihood_function): #Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now #need the 95 and 5 percentiles. #FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles - p_025 = mu - 2.*true_var - p_975 = mu + 2.*true_var + p_025 = mu - 2.*np.sqrt(true_var) + p_975 = mu + 2.*np.sqrt(true_var) return mu, np.nan*mu, p_025, p_975 diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 20337ef5..cd4b7dac 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -152,7 +152,7 @@ class GP(model): else: dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK)) #print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL)) - print "dL_dthetaK: {} dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL) + #print "dL_dthetaK: {} dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL) return np.hstack((dL_dthetaK, dL_dthetaL)) #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))