Merge branch 'params' of github.com:SheffieldML/GPy into params

Conflicts:
	GPy/core/sparse_gp.py
This commit is contained in:
Ricardo 2014-01-28 13:52:38 +00:00
commit df50d2a990
7 changed files with 58 additions and 142 deletions

View file

@ -59,10 +59,10 @@ class GP(Model):
self.parameters_changed() self.parameters_changed()
def parameters_changed(self): def parameters_changed(self):
self.posterior = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self._dL_dK = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y)
def log_likelihood(self): def log_likelihood(self):
return self.posterior.log_marginal return self._log_marginal_likelihood
def dL_dtheta_K(self): def dL_dtheta_K(self):
return self.kern.dK_dtheta(self.posterior.dL_dK, self.X) return self.kern.dK_dtheta(self.posterior.dL_dK, self.X)

View file

@ -383,69 +383,6 @@ class Model(Parameterized):
sgd.run() sgd.run()
self.optimization_runs.append(sgd) self.optimization_runs.append(sgd)
def Laplace_covariance(self):
"""return the covariance matrix of a Laplace approximation at the current (stationary) point."""
# TODO add in the prior contributions for MAP estimation
# TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian()
else:
print "numerically calculating Hessian. please be patient!"
x = self._get_params()
def f(x):
self._set_params(x)
return self.log_likelihood()
h = ndt.Hessian(f) # @UndefinedVariable
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa, aa] = 0.
return A
def Laplace_evidence(self):
"""Returns an estiamte of the model evidence based on the Laplace approximation.
Uses a numerical estimate of the Hessian if none is available analytically."""
A = self.Laplace_covariance()
try:
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except:
return np.nan
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
# def __str__(self):
# s = Parameterized.__str__(self).split('\n')
# #def __str__(self, names=None):
# # if names is None:
# # names = self._get_print_names()
# #s = Parameterized.__str__(self, names=names).split('\n')
# # add priors to the string
# if self.priors is not None:
# strs = [str(p) if p is not None else '' for p in self.priors]
# else:
# strs = [''] * len(self._get_params())
# # strs = [''] * len(self._get_param_names())
# # name_indices = self.grep_param_names("|".join(names))
# # strs = np.array(strs)[name_indices]
# width = np.array(max([len(p) for p in strs] + [5])) + 4
#
# log_like = self.log_likelihood()
# log_prior = self.log_prior()
# obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like)
# if len(''.join(strs)) != 0:
# obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
# obj_funct += '\n\n'
# s[0] = obj_funct + s[0]
# s[0] += "|{h:^{col}}".format(h='prior', col=width)
# s[1] += '-' * (width + 1)
#
# for p in range(2, len(strs) + 2):
# s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
#
# return '\n'.join(s)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the ,odel by comparing to a numerical Check the gradient of the ,odel by comparing to a numerical

View file

@ -5,6 +5,7 @@ import numpy as np
from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
from gp import GP from gp import GP
from parameterization.param import Param from parameterization.param import Param
from ..inference.latent_function_inference import varDTC
class SparseGP(GP): class SparseGP(GP):
""" """
@ -54,49 +55,16 @@ class SparseGP(GP):
self.add_parameter(self.kern, gradient=self.dL_dtheta) self.add_parameter(self.kern, gradient=self.dL_dtheta)
self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood)) self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood))
def parameters_changed(self): def parameters_changed(self):
# kernel computations, using BGPLVM notation self.posterior = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self.Kmm = self.kern.K(self.Z)
#The derivative of the bound wrt the inducing inputs Z
self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance) self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance) self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
#self.posterior = self.inference_method.inference(??)
super(SparseGP, self).parameters_changed()
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
""" """

View file

@ -53,6 +53,6 @@ class ExactGaussianInference(object):
likelihood.update_gradients(np.diag(dL_dK)) likelihood.update_gradients(np.diag(dL_dK))
return Posterior(log_marginal, dL_dK, LW, alpha, K) return Posterior(LW, alpha, K), log_marginal, dL_dK

View file

@ -14,10 +14,8 @@ class Posterior(object):
schemes and the model classes. schemes and the model classes.
""" """
def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
""" """
log_marginal: log p(Y|X)
dL_dK: d/dK log p(Y|X)
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
K : the proir covariance (required for lazy computation of various quantities) K : the proir covariance (required for lazy computation of various quantities)
@ -26,12 +24,10 @@ class Posterior(object):
Not all of the above need to be supplied! You *must* supply: Not all of the above need to be supplied! You *must* supply:
log_marginal
dL_dK
K (for lazy computation) K (for lazy computation)
You may supply either: You may supply either:
cc
woodbury_chol woodbury_chol
woodbury_vector woodbury_vector
@ -46,8 +42,6 @@ class Posterior(object):
From the supplied quantities, all of the others will be computed on demand (lazy computation) From the supplied quantities, all of the others will be computed on demand (lazy computation)
""" """
#obligatory #obligatory
self.log_marginal = log_marginal
self.dL_dK = dL_dK
self._K = K self._K = K
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)): if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
@ -64,7 +58,7 @@ class Posterior(object):
self._covariance = cov self._covariance = cov
self._K_chol = K_chol self._K_chol = K_chol
#copmute this lazily #compute this lazily
self._precision = None self._precision = None
@property @property

View file

@ -2,10 +2,11 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from posterior import Posterior
from .../util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class DTCVar(object): class VarDTC(object):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
@ -32,15 +33,29 @@ class DTCVar(object):
#if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L. #if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO raise NotImplementedError, 'TODO' #TODO
def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y): def inference(self, kern, X, X_variance, Z, likelihood, Y):
num_inducing, num_data = Kmn.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#see whether we're using variational uncertain inputs
uncertain_inputs = (X_variance is None)
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z)
if uncertain_inputs:
psi0 = kern.psi0(Z, X, X_variance)
psi1 = kern.psi1(Z, X, X_variance)
psi2 = kern.psi2(Z, X, X_variance)
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
#factor Kmm # TODO: cache? #factor Kmm # TODO: cache?
_Lm = jitchol(Kmm) Lm = jitchol(Kmm)
# The rather complex computations of A # The rather complex computations of A
if has_uncertain_inputs: if uncertain_inputs:
if likelihood.is_heteroscedastic: if likelihood.is_heteroscedastic:
psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0) psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
else: else:
@ -56,7 +71,7 @@ class DTCVar(object):
tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1))) tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
else: else:
tmp = psi1 * (np.sqrt(likelihood.precision)) tmp = psi1 * (np.sqrt(likelihood.precision))
tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1) tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
A = tdot(tmp) A = tdot(tmp)
# factor B # factor B
@ -67,11 +82,23 @@ class DTCVar(object):
psi1Vf = np.dot(psi1.T, likelihood.VVT_factor) psi1Vf = np.dot(psi1.T, likelihood.VVT_factor)
# back substutue C into psi1Vf # back substutue C into psi1Vf
tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0) tmp, info1 = dtrtrs(Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0) _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0)
# tmp, info2 = dpotrs(LB, tmp, lower=1) # tmp, info2 = dpotrs(LB, tmp, lower=1)
tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1) Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
#compute log marginal likelihood
if likelihood.is_heteroscedastic:
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
else:
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
D = 0.5 * data_fit
log_marginal = A + B + C + D
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(_LBi_Lmi_psi1Vf) tmp = tdot(_LBi_Lmi_psi1Vf)
@ -80,12 +107,12 @@ class DTCVar(object):
tmp = -0.5 * DBi_plus_BiPBi tmp = -0.5 * DBi_plus_BiPBi
tmp += -0.5 * B * output_dim tmp += -0.5 * B * output_dim
tmp += output_dim * np.eye(num_inducing) tmp += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(_Lm, tmp) dL_dKmm = backsub_both_sides(Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten() dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T) dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if likelihood.is_heteroscedastic: if likelihood.is_heteroscedastic:
@ -117,7 +144,7 @@ class DTCVar(object):
else: else:
LBi = chol_inv(LB) LBi = chol_inv(LB)
Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0) Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
@ -135,14 +162,4 @@ class DTCVar(object):
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision) partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit) partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
#def log_likelihood(self):
if likelihood.is_heteroscedastic:
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
else:
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
D = 0.5 * data_fit
return A + B + C + D + likelihood.Z

View file

@ -71,8 +71,8 @@ def align_subplots(N,M,xlim=None, ylim=None):
removeUpperTicks() removeUpperTicks()
def align_subplot_array(axes,xlim=None, ylim=None): def align_subplot_array(axes,xlim=None, ylim=None):
"""make all of the axes in the array hae the same limits, turn off unnecessary ticks """
Make all of the axes in the array hae the same limits, turn off unnecessary ticks
use pb.subplots() to get an array of axes use pb.subplots() to get an array of axes
""" """
#find sensible xlim,ylim #find sensible xlim,ylim