diff --git a/python/examples/laplace_approximations.py b/python/examples/laplace_approximations.py index 5fb39e08..37681849 100644 --- a/python/examples/laplace_approximations.py +++ b/python/examples/laplace_approximations.py @@ -10,20 +10,23 @@ def student_t_approx(): """ Example of regressing with a student t likelihood """ + real_var = 0.1 #Start a function, any function - X = np.linspace(0.0, 10.0, 100)[:, None] - Y = np.sin(X) + np.random.randn(*X.shape)*0.1 + X = np.linspace(0.0, 10.0, 30)[:, None] + Y = np.sin(X) + np.random.randn(*X.shape)*real_var Yc = Y.copy() - Y = Y/Y.max() + #Y = Y/Y.max() - Yc[10] += 5 - Yc[15] += 20 - Yc = Yc/Yc.max() + #Yc[10] += 100 + Yc[25] += 10 + Yc[23] += 10 + Yc[24] += 10 + #Yc = Yc/Yc.max() #Add student t random noise to datapoints - deg_free = 1000000 #100000.5 - real_var = 0.1 + deg_free = 20 #100000.5 + real_sd = np.sqrt(real_var) #t_rv = t(deg_free, loc=0, scale=real_var) #noise = t_rvrvs(size=Y.shape) #Y += noise @@ -38,36 +41,37 @@ def student_t_approx(): #noise = t_rv.rvs(size=(len(corrupted_indices), 1)) #Y[corrupted_indices] += noise plt.figure(1) + plt.suptitle('Gaussian likelihood') # Kernel object kernel1 = GPy.kern.rbf(X.shape[1]) kernel2 = kernel1.copy() kernel3 = kernel1.copy() kernel4 = kernel1.copy() - #print "Clean Gaussian" - ##A GP should completely break down due to the points as they get a lot of weight - ## create simple GP model - #m = GPy.models.GP_regression(X, Y, kernel=kernel1) - ### optimize - #m.ensure_default_constraints() - ##m.unconstrain('noise') - ##m.constrain_fixed('noise', 0.1) - #m.optimize() - ## plot - #plt.subplot(221) - #m.plot() - #print m + print "Clean Gaussian" + #A GP should completely break down due to the points as they get a lot of weight + # create simple GP model + m = GPy.models.GP_regression(X, Y, kernel=kernel1) + ## optimize + m.ensure_default_constraints() + #m.unconstrain('noise') + #m.constrain_fixed('noise', 0.1) + m.optimize() + # plot + plt.subplot(211) + m.plot() + print m ##Corrupt - #print "Corrupt Gaussian" - #m = GPy.models.GP_regression(X, Yc, kernel=kernel2) - #m.ensure_default_constraints() - ##m.unconstrain('noise') - ##m.constrain_fixed('noise', 0.1) - #m.optimize() - #plt.subplot(222) - #m.plot() - #print m + print "Corrupt Gaussian" + m = GPy.models.GP_regression(X, Yc, kernel=kernel2) + m.ensure_default_constraints() + #m.unconstrain('noise') + #m.constrain_fixed('noise', 0.1) + m.optimize() + plt.subplot(212) + m.plot() + print m ##with a student t distribution, since it has heavy tails it should work well ##likelihood_function = student_t(deg_free, sigma=real_var) @@ -86,9 +90,13 @@ def student_t_approx(): ##plt.plot(test_range, scaling*normalised_approx.pdf(test_range)) ##plt.show() + plt.figure(2) + plt.suptitle('Student-t likelihood') + edited_real_sd = real_sd + # Likelihood object - t_distribution = student_t(deg_free, sigma=np.sqrt(real_var)) - stu_t_likelihood = Laplace(Y, t_distribution) + t_distribution = student_t(deg_free, sigma=edited_real_sd) + stu_t_likelihood = Laplace(Yc, t_distribution) print "Clean student t" m = GPy.models.GP(X, stu_t_likelihood, kernel3) @@ -100,9 +108,11 @@ def student_t_approx(): # plot plt.subplot(211) m.plot_f() + plt.ylim(-2.5,2.5) + #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT print "Corrupt student t" - t_distribution = student_t(deg_free, sigma=np.sqrt(real_var)) + t_distribution = student_t(deg_free, sigma=edited_real_sd) corrupt_stu_t_likelihood = Laplace(Yc, t_distribution) m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4) m.ensure_default_constraints() @@ -110,8 +120,8 @@ def student_t_approx(): m.optimize() print(m) plt.subplot(212) - m.plot_f() - + m.plot() + plt.ylim(-2.5,2.5) import ipdb; ipdb.set_trace() ### XXX BREAKPOINT return m diff --git a/python/likelihoods/Laplace.py b/python/likelihoods/Laplace.py index d86523d8..1411c22b 100644 --- a/python/likelihoods/Laplace.py +++ b/python/likelihoods/Laplace.py @@ -88,11 +88,12 @@ class Laplace(likelihood): and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$ """ - self.Sigma_tilde_i = self.W #self.hess_hat_i + self.Sigma_tilde_i = self.W #Check it isn't singular! - epsilon = 1e-2 + epsilon = 1e-6 if np.abs(det(self.Sigma_tilde_i)) < epsilon: - raise ValueError("inverse covariance must be non-singular to inverse!") + print "WARNING: Transformed covariance matrix is signular!" + #raise ValueError("inverse covariance must be non-singular to invert!") #Do we really need to inverse Sigma_tilde_i? :( if self.likelihood_function.log_concave: (self.Sigma_tilde, _, _, _) = pdinv(self.Sigma_tilde_i) @@ -110,8 +111,12 @@ class Laplace(likelihood): self.Y = Y_tilde[:, None] self.YYT = np.dot(self.Y, self.Y.T) self.covariance_matrix = self.Sigma_tilde - self.precision = 1 / np.diag(self.Sigma_tilde)[:, None] - import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + #if not self.likelihood_function.log_concave: + #self.covariance_matrix[self.covariance_matrix < 0] = 1e+6 #FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + ##If the likelihood is non-log-concave. We wan't to say that there is a negative variance + ##To cause the posterior to become less certain than the prior and likelihood, + ##This is a property only held by non-log-concave likelihoods + self.precision = 1 / np.diag(self.covariance_matrix)[:, None] def fit_full(self, K): """ diff --git a/python/likelihoods/likelihood_function.py b/python/likelihoods/likelihood_function.py index a299fe3a..7ac9c661 100644 --- a/python/likelihoods/likelihood_function.py +++ b/python/likelihoods/likelihood_function.py @@ -1,4 +1,5 @@ -from scipy.special import gammaln +from scipy.special import gammaln, gamma +from scipy import integrate import numpy as np from GPy.likelihoods.likelihood_functions import likelihood_function from scipy import stats @@ -79,9 +80,68 @@ class student_t(likelihood_function): def predictive_values(self, mu, var): """ Compute mean, and conficence interval (percentiles 5 and 95) of the prediction - """ - mean = np.exp(mu) - p_025 = stats.t.ppf(.025, mean) - p_975 = stats.t.ppf(.975, mean) - return mean, np.nan*mean, p_025, p_975 + Need to find what the variance is at the latent points for a student t*normal + (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + +(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) +*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + """ + #p_025 = stats.t.ppf(.025, mu) + #p_975 = stats.t.ppf(.975, mu) + + num_test_points = mu.shape[0] + #Each mu is the latent point f* at the test point x*, + #and the var is the gaussian variance at this point + #Take lots of samples from this, so we have lots of possible values + #for latent point f* for each test point x* weighted by how likely we were to pick it + print "Taking %d samples of f*".format(num_test_points) + num_f_samples = 10 + num_y_samples = 10 + student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples)) + print "Student t means shape: ", student_t_means.shape + + #Now we have lots of f*, lets work out the likelihood of getting this by sampling + #from a student t centred on this point, sample many points from this distribution + #centred on f* + #for test_point, f in enumerate(student_t_means): + #print test_point + #print f.shape + #student_t_samples = stats.t.rvs(self.v, loc=f[:,None], + #scale=self.sigma, + #size=(num_f_samples, num_y_samples)) + #print student_t_samples.shape + + student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None], + scale=self.sigma, + size=(num_test_points, num_y_samples, num_f_samples)) + student_t_samples = np.reshape(student_t_samples, + (num_test_points, num_y_samples*num_f_samples)) + + #Now take the 97.5 and 0.25 percentile of these points + p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None] + p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None] + + p_025 = 1+p_025 + p_975 = 1+p_975 + + ##Alernenately we could sample from int p(y|f*)p(f*|x*) df* + def t_gaussian(f, mu, var): + return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5)) + * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2))) + ) + + def t_gauss_int(mu, var): + print "Mu: ", mu + print "var: ", var + result = integrate.quad(t_gaussian, -np.inf, 0.975, args=(mu, var)) + print "Result: ", result + return result[0] + + vec_t_gauss_int = np.vectorize(t_gauss_int) + + p_025 = vec_t_gauss_int(mu, var) + p_975 = vec_t_gauss_int(mu, var) + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + + return mu, np.nan*mu, p_025, p_975