following naming convention better, lots of inverses which should be able to get rid of one or two, unsure if it works

This commit is contained in:
Alan Saul 2013-03-18 15:59:12 +00:00
parent 34ae852eea
commit 2bf1cf0eb6
3 changed files with 39 additions and 30 deletions

View file

@ -41,18 +41,21 @@ def student_t_approx():
cov = kernel.K(X) cov = kernel.K(X)
lap.fit_full(cov) lap.fit_full(cov)
#Get one sample (just look at a single Y #Get one sample (just look at a single Y
mode = float(lap.f_hat[0]) #mode = float(lap.f_hat[0])
variance = float((deg_free/(deg_free-2))) #BUG: Not convinced this is giving reasonable variables #variance = float((deg_free/(deg_free-2))) #BUG: Not convinced this is giving reasonable variables
#variance = float((deg_free/(deg_free-2)) + np.diagonal(lap.hess_hat)[0]) #BUG: Not convinced this is giving reasonable variables #variance = float((deg_free/(deg_free-2)) + np.diagonal(lap.hess_hat)[0]) #BUG: Not convinced this is giving reasonable variables
normalised_approx = norm(loc=mode, scale=variance)
print "Normal with mode %f, and variance %f" % (mode, variance)
print lap.height_unnormalised
test_range = np.arange(0, 10, 0.1) test_range = np.arange(0, 10, 0.1)
print np.diagonal(lap.hess_hat)
plt.plot(test_range, t_rv.pdf(test_range)) plt.plot(test_range, t_rv.pdf(test_range))
for i in xrange(X.shape[0]):
mode = lap.f_hat[i]
covariance = lap.hess_hat_i[i,i]
scaling = np.exp(lap.ln_z_hat)
normalised_approx = norm(loc=mode, scale=covariance)
print "Normal with mode %f, and variance %f" % (mode, covariance)
plt.plot(test_range, normalised_approx.pdf(test_range)) plt.plot(test_range, normalised_approx.pdf(test_range))
plt.show() plt.show()
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
def noisy_laplace_approx(): def noisy_laplace_approx():

View file

@ -1,12 +1,10 @@
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import GPy import GPy
from GPy.util.linalg import jitchol #from GPy.util.linalg import jitchol
from functools import partial from functools import partial
from GPy.likelihoods.likelihood import likelihood from GPy.likelihoods.likelihood import likelihood
from GPy.util.linalg import pdinv,mdot from GPy.util.linalg import pdinv,mdot
from scipy.stats import norm
class Laplace(likelihood): class Laplace(likelihood):
"""Laplace approximation to a posterior""" """Laplace approximation to a posterior"""
@ -35,6 +33,8 @@ class Laplace(likelihood):
#Inital values #Inital values
self.N, self.D = self.data.shape self.N, self.D = self.data.shape
self.NORMAL_CONST = -((0.5 * self.N) * np.log(2 * np.pi))
def _compute_GP_variables(self): def _compute_GP_variables(self):
""" """
Generates data Y which would give the normal distribution identical to the laplace approximation Generates data Y which would give the normal distribution identical to the laplace approximation
@ -59,12 +59,15 @@ class Laplace(likelihood):
and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$ and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$
""" """
self.Sigma_tilde = self.hess_hat - self.Sigma_tilde_i = self.hess_hat + self.Ki
self.Z = #Do we really need to inverse Sigma_tilde_i? :(
#self.Y = (self.Sigma_tilde, _, _, self.log_Sig_i_det) = pdinv(self.Sigma_tilde_i)
#self.YYT = Y_tilde = mdot(self.Sigma_tilde, self.hess_hat, self.f_hat) #f_hat? should be f but we must have optimized for them I guess?
#self.covariance_matrix = self.Z_tilde = np.exp(self.ln_z_hat - self.NORMAL_CONST + (0.5 * mdot(Y_tilde, (self.Sigma_tilde_i, Y_tilde))))
#self.precision = self.Y = Y_tilde
self.covariance_matrix = self.Sigma_tilde
self.precision = np.diag(self.Sigma_tilde)[:, None]
self.YYT = np.dot(self.Y, self.Y)
def fit_full(self, K): def fit_full(self, K):
""" """
@ -75,38 +78,40 @@ class Laplace(likelihood):
f = np.zeros((self.N, 1)) f = np.zeros((self.N, 1))
#K = np.diag(np.ones(self.N)) #K = np.diag(np.ones(self.N))
(self.Ki, _, _, self.log_Kdet) = pdinv(K) (self.Ki, _, _, self.log_Kdet) = pdinv(K)
obj_constant = (0.5 * self.log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi)) LOG_K_CONST = -(0.5 * self.log_Kdet)
OBJ_CONST = self.NORMAL_CONST + LOG_K_CONST
#Find \hat(f) using a newton raphson optimizer for example #Find \hat(f) using a newton raphson optimizer for example
#TODO: Add newton-raphson as subclass of optimizer class #TODO: Add newton-raphson as subclass of optimizer class
#FIXME: Can we get rid of this horrible reshaping? #FIXME: Can we get rid of this horrible reshaping?
def obj(f): def obj(f):
f = f[:, None] #f = f[:, None]
res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (self.Ki, f)) + obj_constant) res = -1 * (self.likelihood_function.link_function(self.data[:,0], f) - 0.5 * mdot(f.T, (self.Ki, f)) + OBJ_CONST)
return float(res) return float(res)
def obj_grad(f): def obj_grad(f):
f = f[:, None] #f = f[:, None]
res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(self.Ki, f)) res = -1 * (self.likelihood_function.link_grad(self.data[:,0], f) - mdot(self.Ki, f))
return np.squeeze(res) return np.squeeze(res)
def obj_hess(f): def obj_hess(f):
f = f[:, None] res = -1 * (np.diag(self.likelihood_function.link_hess(self.data[:,0], f)) - self.Ki)
res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - self.Ki)
return np.squeeze(res) return np.squeeze(res)
self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
print self.f_hat print self.f_hat
#At this point get the hessian matrix #At this point get the hessian matrix
self.hess_hat = obj_hess(self.f_hat) self.hess_hat = -1*np.diag(self.likelihood_function.link_hess(self.data[:,0], self.f_hat)) #-1*obj_hess(self.f_hat) + self.Ki
#self.hess_hat = -1*obj_hess(self.f_hat) + self.Ki
(self.hess_hat_i, _, _, self.log_hess_hat_det) = pdinv(self.hess_hat + self.Ki)
#Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...) #Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)
self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1? self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1?
#z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to #z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to
#the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode #the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
#z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n) #z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
self.z_hat = np.exp(-0.5*np.log(np.linalg.det(hess_hat)) + self.height_unnormalised) self.ln_z_hat = -0.5*np.log(self.log_hess_hat_det) + self.height_unnormalised - self.NORMAL_CONST #Unsure whether its log_hess or log_hess_i
return self._compute_GP_variables() return self._compute_GP_variables()

View file

@ -28,7 +28,7 @@ class student_t(likelihood_function):
:returns: float(likelihood evaluated for this point) :returns: float(likelihood evaluated for this point)
""" """
assert y.shape[0] == f.shape[0] assert y.shape == f.shape
e = y - f e = y - f
objective = (gammaln((self.v + 1) * 0.5) objective = (gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -49,7 +49,7 @@ class student_t(likelihood_function):
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
""" """
assert y.shape[0] == f.shape[0] assert y.shape == f.shape
e = y - f e = y - f
grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2)) grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
return grad return grad
@ -67,7 +67,8 @@ class student_t(likelihood_function):
:f: latent variables f :f: latent variables f
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
""" """
assert y.shape[0] == f.shape[0] assert y.shape == f.shape
e = y - f e = y - f
hess = ((self.v + 1) * e) / ((((self.sigma**2) * self.v) + e**2)**2) #hess = ((self.v + 1) * e) / ((((self.sigma**2) * self.v) + e**2)**2)
hess = ((self.v + 1) * (e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2) * self.v) + e**2)**2)
return hess return hess