Got most of laplace approximation working

This commit is contained in:
Alan Saul 2013-03-13 17:55:41 +00:00
parent ad2c266c65
commit 3f114aa020
9 changed files with 124 additions and 45 deletions

0
python/__init__.py Normal file
View file

View file

View file

@ -1,8 +1,9 @@
import GPy
import numpy as np
import scipy as sp
import scipy.stats
import matplotlib.pyplot as plt
from scipy.stats import t
from coxGP.python.likelihoods.Laplace import Laplace
from coxGP.python.likelihoods.likelihood_function import student_t
def student_t_approx():
@ -13,6 +14,41 @@ def student_t_approx():
X = np.sort(np.random.uniform(0, 15, 70))[:, None]
Y = np.sin(X)
#Add student t random noise to datapoints
deg_free = 1
noise = t.rvs(deg_free, loc=1.8, scale=1, size=Y.shape)
Y += noise
# Kernel object
print X.shape
kernel = GPy.kern.rbf(X.shape[1])
#A GP should completely break down due to the points as they get a lot of weight
# create simple GP model
m = GPy.models.GP_regression(X, Y, kernel=kernel)
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
#m.plot()
print m
#with a student t distribution, since it has heavy tails it should work well
likelihood_function = student_t(deg_free, sigma=1)
lap = Laplace(Y, likelihood_function)
cov = kernel.K(X)
lap.fit_full(cov)
def noisy_laplace_approx():
"""
Example of regressing with a student t likelihood
"""
#Start a function, any function
X = np.sort(np.random.uniform(0, 15, 70))[:, None]
Y = np.sin(X)
#Add some extreme value noise to some of the datapoints
percent_corrupted = 0.05
corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted))
@ -20,12 +56,12 @@ def student_t_approx():
np.random.shuffle(indices)
corrupted_indices = indices[:corrupted_datums]
print corrupted_indices
noise = np.random.uniform(-10,10,(len(corrupted_indices), 1))
noise = np.random.uniform(-10, 10, (len(corrupted_indices), 1))
Y[corrupted_indices] += noise
#A GP should completely break down due to the points as they get a lot of weight
# create simple GP model
m = GPy.models.GP_regression(X,Y)
m = GPy.models.GP_regression(X, Y)
# optimize
m.ensure_default_constraints()

View file

@ -1,8 +1,14 @@
import nump as np
import numpy as np
import scipy as sp
import GPy
from GPy.util.linalg import jitchol
from functools import partial
from GPy.likelihoods.likelihood import likelihood
from GPy.util.linalg import pdinv,mdot
class Laplace(GPy.likelihoods.likelihood):
class Laplace(likelihood):
"""Laplace approximation to a posterior"""
def __init__(self,data,likelihood_function):
@ -23,8 +29,6 @@ class Laplace(GPy.likelihoods.likelihood):
:likelihood_function: @todo
"""
GPy.likelihoods.likelihood.__init__(self)
self.data = data
self.likelihood_function = likelihood_function
@ -38,7 +42,7 @@ class Laplace(GPy.likelihoods.likelihood):
GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle}
that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood
"""
raise NotImplementedError
z_hat = N(f_hat|f_hat, hess_hat) / self.height_unnormalised
def fit_full(self, K):
"""
@ -46,9 +50,38 @@ class Laplace(GPy.likelihoods.likelihood):
For nomenclature see Rasmussen & Williams 2006
:K: Covariance matrix
"""
self.f = np.zeros(self.N)
f = np.zeros((self.N, 1))
print K.shape
print f.shape
print self.data.shape
(Ki, _, _, log_Kdet) = pdinv(K)
obj_constant = (0.5 * log_Kdet) - ((0.5 * self.N) * np.log(2*np.pi))
#Find \hat(f) using a newton raphson optimizer for example
#TODO: Add newton-raphson as subclass of optimizer class
#FIXME: Can we get rid of this horrible reshaping?
def obj(f):
f = f[:, None]
res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (Ki, f)) + obj_constant)
return float(res)
def obj_grad(f):
f = f[:, None]
res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(Ki, f))
return np.squeeze(res)
def obj_hess(f):
f = f[:, None]
res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - Ki)
return np.squeeze(res)
self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
#At this point get the hessian matrix
self.hess_hat = obj_hess(f_hat)
#Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)
self.height_unnormalised = obj(f_hat) #FIXME: Is it -1?
return _compute_GP_variables()

View file

View file

@ -1,62 +1,72 @@
import GPy
from scipy.special import gamma, gammaln
from scipy.special import gammaln
import numpy as np
from GPy.likelihoods.likelihood_functions import likelihood_function
class student_t(GPy.likelihoods.likelihood_function):
class student_t(likelihood_function):
"""Student t likelihood distribution
For nomanclature see Bayesian Data Analysis 2003 p576
$$\ln(\frac{\Gamma(\frac{(v+1)}{2})}{\Gamma(\sqrt(v \pi \Gamma(\frac{v}{2}))})+ \ln(1+\frac{(y_i-f_i)^2}{\sigma v})^{-\frac{(v+1)}{2}}$$
TODO:Double check this
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
Laplace:
Needs functions to calculate
ln p(yi|fi)
dln p(yi|fi)_dfi
d2ln p(yi|fi)_d2fi
d2ln p(yi|fi)_d2fifj
"""
def __init__(self, deg_free, sigma=1):
self.v = deg_free
self.sigma = 1
def link_function(self, y_i, f_i):
"""link_function $\ln p(y_i|f_i)$
$$\ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2}) - \ln \frac{v \pi \sigma}{2} - \frac{v+1}{2}\ln (1 + \frac{(y_{i} - f_{i})^{2}}{v\sigma})$$
TODO: Double check this
def link_function(self, y, f):
"""link_function $\ln p(y|f)$
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
:y_i: datum number i
:f_i: latent variable f_i
:y: datum number i
:f: latent variable f
:returns: float(likelihood evaluated for this point)
"""
e = y_i - f_i
return gammaln((v+1)*0.5) - gammaln(v*0.5) - np.ln(v*np.pi*sigma)*0.5 - (v+1)*0.5*np.ln(1 + ((e/sigma)**2)/v) #Check the /v!
e = y - f
#print "Link ", y.shape, f.shape, e.shape
objective = (gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
+ np.log(self.sigma * np.sqrt(self.v * np.pi))
- (self.v + 1) * 0.5
* np.log(1 + ((e**2 / self.sigma**2) / self.v))
)
return np.sum(objective)
def link_grad(self, y_i, f_i):
"""gradient of the link function at y_i, given f_i w.r.t f_i
def link_grad(self, y, f):
"""
Gradient of the link function at y, given f w.r.t f
derivative of log((gamma((v+1)/2)/gamma(sqrt(v*pi*gamma(v/2))))*(1+(t^2)/(a*v))^((-(v+1))/2)) with respect to t
$$\frac{(y_i - f_i)(v + 1)}{\sigma v (y_{i} - f_{i})^{2}}$$
TODO: Double check this
$$\frac{d}{df}p(y_{i}|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$
:y_i: datum number i
:f_i: latent variable f_i
:y: datum number i
:f: latent variable f
:returns: float(gradient of likelihood evaluated at this point)
"""
pass
def link_hess(self, y_i, f_i, f_j):
"""hessian at this point (the hessian will be 0 unless i == j)
i.e. second derivative w.r.t f_i and f_j
second derivative of
:y_i: @todo
:f_i: @todo
:f_j: @todo
:returns: @todo
e = y - f
#print "Grad ", y.shape, f.shape, e.shape
grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
return grad
def link_hess(self, y, f):
"""
if f_i =
pass
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
Will return diaganol of hessian, since every where else it is 0
$$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$
:y: datum number i
:f: latent variable f
:returns: float(second derivative of likelihood evaluated at this point)
"""
e = y - f
hess = ((self.v + 1) * e) / ((((self.sigma**2)*self.v) + e**2)**2)
return hess

View file

View file