got an idea of how to implement! written in docs

This commit is contained in:
Alan Saul 2013-03-15 17:38:13 +00:00
parent f9535c858a
commit 34ae852eea

View file

@ -41,10 +41,26 @@ class Laplace(likelihood):
GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle} GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle}
that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood
Given we are approximating $p(y|f)p(f)$ with a normal distribution (given $p(y|f)$ is not normal)
then we have a rescaled normal distibution z*N(f|f_hat,hess_hat^-1) with the same area as p(y|f)p(f)
due to the z rescaling.
at the moment the data Y correspond to the normal approximation z*N(f|f_hat,hess_hat^1)
This function finds the data D=(Y_tilde,X) that would produce z*N(f|f_hat,hess_hat^1)
giving a normal approximation of z_tilde*p(Y_tilde|f,X)p(f)
$$\tilde{Y} = \tilde{\Sigma} Hf$$
where
$$\tilde{\Sigma}^{-1} = H - K^{-1}$$
i.e. $$\tilde{\Sigma}^{-1} = diag(\nabla\nabla \log(y|f))$$
since $diag(\nabla\nabla \log(y|f)) = H - K^{-1}$
and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$
""" """
#z_hat = N(f_hat|f_hat, hess_hat) / self.height_unnormalised self.Sigma_tilde = self.hess_hat -
normalised_approx = norm(loc=self.f_hat, scale=self.hess_hat) self.Z =
self.Z = normalised_approx.pdf(self.f_hat)/self.height_unnormalised
#self.Y = #self.Y =
#self.YYT = #self.YYT =
#self.covariance_matrix = #self.covariance_matrix =
@ -58,8 +74,8 @@ class Laplace(likelihood):
""" """
f = np.zeros((self.N, 1)) f = np.zeros((self.N, 1))
#K = np.diag(np.ones(self.N)) #K = np.diag(np.ones(self.N))
(Ki, _, _, log_Kdet) = pdinv(K) (self.Ki, _, _, self.log_Kdet) = pdinv(K)
obj_constant = (0.5 * log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi)) obj_constant = (0.5 * self.log_Kdet) - ((0.5 * self.N) * np.log(2 * np.pi))
#Find \hat(f) using a newton raphson optimizer for example #Find \hat(f) using a newton raphson optimizer for example
#TODO: Add newton-raphson as subclass of optimizer class #TODO: Add newton-raphson as subclass of optimizer class
@ -67,17 +83,17 @@ class Laplace(likelihood):
#FIXME: Can we get rid of this horrible reshaping? #FIXME: Can we get rid of this horrible reshaping?
def obj(f): def obj(f):
f = f[:, None] f = f[:, None]
res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (Ki, f)) + obj_constant) res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (self.Ki, f)) + obj_constant)
return float(res) return float(res)
def obj_grad(f): def obj_grad(f):
f = f[:, None] f = f[:, None]
res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(Ki, f)) res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(self.Ki, f))
return np.squeeze(res) return np.squeeze(res)
def obj_hess(f): def obj_hess(f):
f = f[:, None] f = f[:, None]
res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - Ki) res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - self.Ki)
return np.squeeze(res) return np.squeeze(res)
self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
@ -87,6 +103,10 @@ class Laplace(likelihood):
self.hess_hat = obj_hess(self.f_hat) self.hess_hat = obj_hess(self.f_hat)
#Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...) #Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...)
self.height_unnormalised = obj(self.f_hat) #FIXME: Is it -1? self.height_unnormalised = -1*obj(self.f_hat) #FIXME: Is it - obj constant and *-1?
#z_hat is how much we need to scale the normal distribution by to get the area of our approximation close to
#the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
#z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
self.z_hat = np.exp(-0.5*np.log(np.linalg.det(hess_hat)) + self.height_unnormalised)
return self._compute_GP_variables() return self._compute_GP_variables()