diff --git a/python/examples/laplace_approximations.py b/python/examples/laplace_approximations.py index aa8cdcb4..73c8f67f 100644 --- a/python/examples/laplace_approximations.py +++ b/python/examples/laplace_approximations.py @@ -16,47 +16,75 @@ def student_t_approx(): #Add student t random noise to datapoints deg_free = 2.5 - t_rv = t(deg_free, loc=5, scale=1) + t_rv = t(deg_free, loc=0, scale=1) noise = t_rv.rvs(size=Y.shape) Y += noise + #Add some extreme value noise to some of the datapoints + #percent_corrupted = 0.05 + #corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted)) + #indices = np.arange(Y.shape[0]) + #np.random.shuffle(indices) + #corrupted_indices = indices[:corrupted_datums] + #print corrupted_indices + #noise = t_rv.rvs(size=(len(corrupted_indices), 1)) + #Y[corrupted_indices] += noise + # Kernel object - print X.shape - kernel = GPy.kern.rbf(X.shape[1]) + #print X.shape + #kernel = GPy.kern.rbf(X.shape[1]) - #A GP should completely break down due to the points as they get a lot of weight - # create simple GP model - m = GPy.models.GP_regression(X, Y, kernel=kernel) + ##A GP should completely break down due to the points as they get a lot of weight + ## create simple GP model + #m = GPy.models.GP_regression(X, Y, kernel=kernel) - # optimize - m.ensure_default_constraints() - m.optimize() - # plot - #m.plot() - print m + ## optimize + #m.ensure_default_constraints() + #m.optimize() + ## plot + ##m.plot() + #print m #with a student t distribution, since it has heavy tails it should work well - likelihood_function = student_t(deg_free, sigma=1) - lap = Laplace(Y, likelihood_function) - cov = kernel.K(X) - lap.fit_full(cov) - #Get one sample (just look at a single Y - #mode = float(lap.f_hat[0]) - #variance = float((deg_free/(deg_free-2))) #BUG: Not convinced this is giving reasonable variables - #variance = float((deg_free/(deg_free-2)) + np.diagonal(lap.hess_hat)[0]) #BUG: Not convinced this is giving reasonable variables + #likelihood_function = student_t(deg_free, sigma=1) + #lap = Laplace(Y, likelihood_function) + #cov = kernel.K(X) + #lap.fit_full(cov) - test_range = np.arange(0, 10, 0.1) - plt.plot(test_range, t_rv.pdf(test_range)) - for i in xrange(X.shape[0]): - mode = lap.f_hat[i] - covariance = lap.hess_hat_i[i,i] - scaling = np.exp(lap.ln_z_hat) - normalised_approx = norm(loc=mode, scale=covariance) - print "Normal with mode %f, and variance %f" % (mode, covariance) - plt.plot(test_range, normalised_approx.pdf(test_range)) - plt.show() + #test_range = np.arange(0, 10, 0.1) + #plt.plot(test_range, t_rv.pdf(test_range)) + #for i in xrange(X.shape[0]): + #mode = lap.f_hat[i] + #covariance = lap.hess_hat_i[i,i] + #scaling = np.exp(lap.ln_z_hat) + #normalised_approx = norm(loc=mode, scale=covariance) + #print "Normal with mode %f, and variance %f" % (mode, covariance) + #plt.plot(test_range, scaling*normalised_approx.pdf(test_range)) + #plt.show() + #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + + # Likelihood object + t_distribution = student_t(deg_free, sigma=1) + stu_t_likelihood = Laplace(Y, t_distribution) + kernel = GPy.kern.rbf(X.shape[1]) + + m = GPy.models.GP(X, stu_t_likelihood, kernel) + m.ensure_default_constraints() + + m.update_likelihood_approximation() + print "NEW MODEL" + print(m) + + # optimize + #m.optimize() + print(m) + + # plot + m.plot() import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + return m + def noisy_laplace_approx(): """ diff --git a/python/likelihoods/Laplace.py b/python/likelihoods/Laplace.py index 9d622b0d..23db6abd 100644 --- a/python/likelihoods/Laplace.py +++ b/python/likelihoods/Laplace.py @@ -5,6 +5,7 @@ import GPy from functools import partial from GPy.likelihoods.likelihood import likelihood from GPy.util.linalg import pdinv,mdot +import numpy.testing.assert_array_equal class Laplace(likelihood): """Laplace approximation to a posterior""" @@ -35,6 +36,29 @@ class Laplace(likelihood): self.NORMAL_CONST = -((0.5 * self.N) * np.log(2 * np.pi)) + #Initial values for the GP variables + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + + def predictive_values(self,mu,var): + return self.likelihood_function.predictive_values(mu,var) + + def _get_params(self): + return np.zeros(0) + + def _get_param_names(self): + return [] + + def _set_params(self,p): + pass # TODO: Laplace likelihood might want to take some parameters... + + def _gradients(self,partial): + raise NotImplementedError + #return np.zeros(0) # TODO: Laplace likelihood might want to take some parameters... + def _compute_GP_variables(self): """ Generates data Y which would give the normal distribution identical to the laplace approximation @@ -63,11 +87,14 @@ class Laplace(likelihood): #Do we really need to inverse Sigma_tilde_i? :( (self.Sigma_tilde, _, _, self.log_Sig_i_det) = pdinv(self.Sigma_tilde_i) Y_tilde = mdot(self.Sigma_tilde, self.hess_hat, self.f_hat) #f_hat? should be f but we must have optimized for them I guess? - self.Z_tilde = np.exp(self.ln_z_hat - self.NORMAL_CONST + (0.5 * mdot(Y_tilde, (self.Sigma_tilde_i, Y_tilde)))) + self.Z_tilde = np.exp(self.ln_z_hat - self.NORMAL_CONST + (0.5 * mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde)))) + + self.Z = self.Z_tilde self.Y = Y_tilde self.covariance_matrix = self.Sigma_tilde - self.precision = np.diag(self.Sigma_tilde)[:, None] - self.YYT = np.dot(self.Y, self.Y) + self.precision = 1/np.diag(self.Sigma_tilde)[:, None] + self.YYT = np.dot(self.Y, self.Y.T) + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT def fit_full(self, K): """ @@ -76,7 +103,6 @@ class Laplace(likelihood): :K: Covariance matrix """ f = np.zeros((self.N, 1)) - #K = np.diag(np.ones(self.N)) (self.Ki, _, _, self.log_Kdet) = pdinv(K) LOG_K_CONST = -(0.5 * self.log_Kdet) OBJ_CONST = self.NORMAL_CONST + LOG_K_CONST @@ -95,23 +121,25 @@ class Laplace(likelihood): return np.squeeze(res) def obj_hess(f): - res = -1 * (np.diag(self.likelihood_function.link_hess(self.data[:,0], f)) - self.Ki) + res = -1 * (-np.diag(self.likelihood_function.link_hess(self.data[:,0], f)) - self.Ki) return np.squeeze(res) self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) - print self.f_hat #At this point get the hessian matrix - self.hess_hat = -1*np.diag(self.likelihood_function.link_hess(self.data[:,0], self.f_hat)) #-1*obj_hess(self.f_hat) + self.Ki - #self.hess_hat = -1*obj_hess(self.f_hat) + self.Ki - (self.hess_hat_i, _, _, self.log_hess_hat_det) = pdinv(self.hess_hat + self.Ki) + self.hess_hat = np.diag(self.likelihood_function.link_hess(self.data[:,0], self.f_hat)) + self.Ki + (self.hess_hat_i, _, _, self.log_hess_hat_det) = pdinv(self.hess_hat) + (self.hess_hat, _, _, self.log_hess_hat_i_det) = pdinv(self.hess_hat_i) + + np.testing.assert_array_equal(self.hess_hat, hess_hat_new) #Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...) - self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1? + #self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1? #z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to #the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode #z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n) - self.ln_z_hat = -0.5*np.log(self.log_hess_hat_det) + self.height_unnormalised - self.NORMAL_CONST #Unsure whether its log_hess or log_hess_i - + #Unsure whether its log_hess or log_hess_i + self.ln_z_hat = -0.5*np.log(self.log_hess_hat_det) - 0.5*self.log_Kdet + self.likelihood_function.link_function(self.data[:,0], self.f_hat) - mdot(f.T, (self.Ki, f)) + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT return self._compute_GP_variables() diff --git a/python/likelihoods/likelihood_function.py b/python/likelihoods/likelihood_function.py index 8adbf86c..e70cdc8d 100644 --- a/python/likelihoods/likelihood_function.py +++ b/python/likelihoods/likelihood_function.py @@ -1,7 +1,7 @@ from scipy.special import gammaln import numpy as np from GPy.likelihoods.likelihood_functions import likelihood_function - +from scipy import stats class student_t(likelihood_function): """Student t likelihood distribution @@ -72,3 +72,17 @@ class student_t(likelihood_function): #hess = ((self.v + 1) * e) / ((((self.sigma**2) * self.v) + e**2)**2) hess = ((self.v + 1) * (e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2) * self.v) + e**2)**2) return hess + + def predictive_values(self, mu, var): + """ + Compute mean, and conficence interval (percentiles 5 and 95) of the prediction + """ + mean = np.exp(mu) + p_025 = stats.t.ppf(025,mean) + p_975 = stats.t.ppf(975,mean) + + #p_025 = tmp[:,0] + #p_975 = tmp[:,1] + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + return mean,p_025,p_975 +