GPy/python/likelihoods/Laplace.py

import numpy as np
import scipy as sp
import GPy
from GPy.util.linalg import jitchol
from functools import partial
from GPy.likelihoods.likelihood import likelihood
from GPy.util.linalg import pdinv,mdot
from scipy.stats import norm


class Laplace(likelihood):
    """Laplace approximation to a posterior"""

    def __init__(self, data, likelihood_function):
        """
        Laplace Approximation

        First find the moments \hat{f} and the hessian at this point (using Newton-Raphson)
        then find the z^{prime} which allows this to be a normalised gaussian instead of a
        non-normalized gaussian

        Finally we must compute the GP variables (i.e. generate some Y^{squiggle} and z^{squiggle}
        which makes a gaussian the same as the laplace approximation

        Arguments
        ---------

        :data: @todo
        :likelihood_function: @todo

        """
        self.data = data
        self.likelihood_function = likelihood_function

        #Inital values
        self.N, self.D = self.data.shape

    def _compute_GP_variables(self):
        """
        Generates data Y which would give the normal distribution identical to the laplace approximation

        GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle}
        that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood

        Given we are approximating $p(y|f)p(f)$ with a normal distribution (given $p(y|f)$ is not normal)
        then we have a rescaled normal distibution z*N(f|f_hat,hess_hat^-1) with the same area as p(y|f)p(f)
        due to the z rescaling.

        at the moment the data Y correspond to the normal approximation z*N(f|f_hat,hess_hat^1)

        This function finds the data D=(Y_tilde,X) that would produce z*N(f|f_hat,hess_hat^1)
        giving a normal approximation of z_tilde*p(Y_tilde|f,X)p(f)

        $$\tilde{Y} = \tilde{\Sigma} Hf$$
        where
        $$\tilde{\Sigma}^{-1} = H - K^{-1}$$
        i.e. $$\tilde{\Sigma}^{-1} = diag(\nabla\nabla \log(y|f))$$
        since $diag(\nabla\nabla \log(y|f)) = H - K^{-1}$
        and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$

        """
        self.Sigma_tilde = self.hess_hat -
        self.Z =
        #self.Y =
        #self.YYT =
        #self.covariance_matrix =
        #self.precision =

    def fit_full(self, K):
        """
        The laplace approximation algorithm
        For nomenclature see Rasmussen & Williams 2006
        :K: Covariance matrix
        """
        f = np.zeros((self.N, 1))
        #K = np.diag(np.ones(self.N))
        (self.Ki, _, _, self.log_Kdet) = pdinv(K)
        obj_constant = (0.5 * self.log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi))

        #Find \hat(f) using a newton raphson optimizer for example
        #TODO: Add newton-raphson as subclass of optimizer class

        #FIXME: Can we get rid of this horrible reshaping?
        def obj(f):
            f = f[:, None]
            res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (self.Ki, f)) + obj_constant)
            return float(res)

        def obj_grad(f):
            f = f[:, None]
            res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(self.Ki, f))
            return np.squeeze(res)

        def obj_hess(f):
            f = f[:, None]
            res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - self.Ki)
            return np.squeeze(res)

        self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
        print self.f_hat

        #At this point get the hessian matrix
        self.hess_hat = obj_hess(self.f_hat)

        #Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)
        self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1?
        #z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to
        #the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
        #z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
        self.z_hat = np.exp(-0.5*np.log(np.linalg.det(hess_hat)) + self.height_unnormalised)

        return self._compute_GP_variables()
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`import numpy as np`
			`import scipy as sp`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`import GPy`
			`from GPy.util.linalg import jitchol`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`from functools import partial`
			`from GPy.likelihoods.likelihood import likelihood`
			`from GPy.util.linalg import pdinv,mdot`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`from scipy.stats import norm`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00

			`class Laplace(likelihood):`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""Laplace approximation to a posterior"""`

Trying to 'debug' 2013-03-14 15:30:22 +00:00			`def __init__(self, data, likelihood_function):`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""`
			`Laplace Approximation`

			`First find the moments \hat{f} and the hessian at this point (using Newton-Raphson)`
			`then find the z^{prime} which allows this to be a normalised gaussian instead of a`
			`non-normalized gaussian`

			`Finally we must compute the GP variables (i.e. generate some Y^{squiggle} and z^{squiggle}`
			`which makes a gaussian the same as the laplace approximation`

			`Arguments`
			`---------`

			`:data: @todo`
			`:likelihood_function: @todo`

			`"""`
			`self.data = data`
			`self.likelihood_function = likelihood_function`

			`#Inital values`
			`self.N, self.D = self.data.shape`

			`def _compute_GP_variables(self):`
			`"""`
			`Generates data Y which would give the normal distribution identical to the laplace approximation`

			`GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle}`
			`that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00
			`Given we are approximating $p(y\|f)p(f)$ with a normal distribution (given $p(y\|f)$ is not normal)`
			`then we have a rescaled normal distibution z*N(f\|f_hat,hess_hat^-1) with the same area as p(y\|f)p(f)`
			`due to the z rescaling.`

			`at the moment the data Y correspond to the normal approximation z*N(f\|f_hat,hess_hat^1)`

			`This function finds the data D=(Y_tilde,X) that would produce z*N(f\|f_hat,hess_hat^1)`
			`giving a normal approximation of z_tilde*p(Y_tilde\|f,X)p(f)`

			`$$\tilde{Y} = \tilde{\Sigma} Hf$$`
			`where`
			`$$\tilde{\Sigma}^{-1} = H - K^{-1}$$`
			`i.e. $$\tilde{\Sigma}^{-1} = diag(\nabla\nabla \log(y\|f))$$`
			`since $diag(\nabla\nabla \log(y\|f)) = H - K^{-1}$`
			`and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$`

Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`self.Sigma_tilde = self.hess_hat -`
			`self.Z =`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`#self.Y =`
			`#self.YYT =`
			`#self.covariance_matrix =`
			`#self.precision =`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
			`def fit_full(self, K):`
			`"""`
			`The laplace approximation algorithm`
			`For nomenclature see Rasmussen & Williams 2006`
			`:K: Covariance matrix`
			`"""`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`f = np.zeros((self.N, 1))`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`#K = np.diag(np.ones(self.N))`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`(self.Ki, _, _, self.log_Kdet) = pdinv(K)`
			`obj_constant = (0.5 * self.log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi))`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
			`#Find \hat(f) using a newton raphson optimizer for example`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`#TODO: Add newton-raphson as subclass of optimizer class`

			`#FIXME: Can we get rid of this horrible reshaping?`
			`def obj(f):`
			`f = f[:, None]`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (self.Ki, f)) + obj_constant)`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`return float(res)`

			`def obj_grad(f):`
			`f = f[:, None]`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(self.Ki, f))`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`return np.squeeze(res)`

			`def obj_hess(f):`
			`f = f[:, None]`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - self.Ki)`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`return np.squeeze(res)`

			`self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`print self.f_hat`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
			`#At this point get the hessian matrix`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`self.hess_hat = obj_hess(self.f_hat)`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00
			`#Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)`
got an idea of how to implement! written in docs 2013-03-15 17:38:13 +00:00			`self.height_unnormalised = -1obj(self.f_hat) #FIXME: Is it - obj constant and -1?`
			`#z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to`
			`#the area of p(f)p(y\|f) we do this by matching the height of the distributions at the mode`
			`#z_hat = -0.5ln\|H\| - 0.5ln\|K\| - 0.5f_hatK^{-1}*f_hat \sum_{n} ln p(y_n\|f_n)`
			`self.z_hat = np.exp(-0.5*np.log(np.linalg.det(hess_hat)) + self.height_unnormalised)`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`return self._compute_GP_variables()`