GPy/python/likelihoods/Laplace.py

113 lines
4.3 KiB
Python
Raw Normal View History

import numpy as np
import scipy as sp
import GPy
from GPy.util.linalg import jitchol
from functools import partial
from GPy.likelihoods.likelihood import likelihood
from GPy.util.linalg import pdinv,mdot
2013-03-14 15:30:22 +00:00
from scipy.stats import norm
class Laplace(likelihood):
"""Laplace approximation to a posterior"""
2013-03-14 15:30:22 +00:00
def __init__(self, data, likelihood_function):
"""
Laplace Approximation
First find the moments \hat{f} and the hessian at this point (using Newton-Raphson)
then find the z^{prime} which allows this to be a normalised gaussian instead of a
non-normalized gaussian
Finally we must compute the GP variables (i.e. generate some Y^{squiggle} and z^{squiggle}
which makes a gaussian the same as the laplace approximation
Arguments
---------
:data: @todo
:likelihood_function: @todo
"""
self.data = data
self.likelihood_function = likelihood_function
#Inital values
self.N, self.D = self.data.shape
def _compute_GP_variables(self):
"""
Generates data Y which would give the normal distribution identical to the laplace approximation
GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle}
that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood
Given we are approximating $p(y|f)p(f)$ with a normal distribution (given $p(y|f)$ is not normal)
then we have a rescaled normal distibution z*N(f|f_hat,hess_hat^-1) with the same area as p(y|f)p(f)
due to the z rescaling.
at the moment the data Y correspond to the normal approximation z*N(f|f_hat,hess_hat^1)
This function finds the data D=(Y_tilde,X) that would produce z*N(f|f_hat,hess_hat^1)
giving a normal approximation of z_tilde*p(Y_tilde|f,X)p(f)
$$\tilde{Y} = \tilde{\Sigma} Hf$$
where
$$\tilde{\Sigma}^{-1} = H - K^{-1}$$
i.e. $$\tilde{\Sigma}^{-1} = diag(\nabla\nabla \log(y|f))$$
since $diag(\nabla\nabla \log(y|f)) = H - K^{-1}$
and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$
"""
self.Sigma_tilde = self.hess_hat -
self.Z =
2013-03-14 15:30:22 +00:00
#self.Y =
#self.YYT =
#self.covariance_matrix =
#self.precision =
def fit_full(self, K):
"""
The laplace approximation algorithm
For nomenclature see Rasmussen & Williams 2006
:K: Covariance matrix
"""
f = np.zeros((self.N, 1))
2013-03-14 15:30:22 +00:00
#K = np.diag(np.ones(self.N))
(self.Ki, _, _, self.log_Kdet) = pdinv(K)
obj_constant = (0.5 * self.log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi))
#Find \hat(f) using a newton raphson optimizer for example
#TODO: Add newton-raphson as subclass of optimizer class
#FIXME: Can we get rid of this horrible reshaping?
def obj(f):
f = f[:, None]
res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (self.Ki, f)) + obj_constant)
return float(res)
def obj_grad(f):
f = f[:, None]
res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(self.Ki, f))
return np.squeeze(res)
def obj_hess(f):
f = f[:, None]
res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - self.Ki)
return np.squeeze(res)
self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
2013-03-14 15:30:22 +00:00
print self.f_hat
#At this point get the hessian matrix
2013-03-14 15:30:22 +00:00
self.hess_hat = obj_hess(self.f_hat)
#Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)
self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1?
#z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to
#the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
#z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
self.z_hat = np.exp(-0.5*np.log(np.linalg.det(hess_hat)) + self.height_unnormalised)
2013-03-14 15:30:22 +00:00
return self._compute_GP_variables()