mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
tidying in sparse gp
This commit is contained in:
parent
d074280213
commit
052b888793
3 changed files with 49 additions and 65 deletions
|
|
@ -3,9 +3,10 @@
|
|||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
|
||||
from gp import GP
|
||||
from parameterization.param import Param
|
||||
from ..inference.latent_function_inference import varDTC
|
||||
from posterior import Posterior
|
||||
|
||||
class SparseGP(GP):
|
||||
"""
|
||||
|
|
@ -55,49 +56,16 @@ class SparseGP(GP):
|
|||
self.add_parameter(self.kern, gradient=self.dL_dtheta)
|
||||
self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood))
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
self.Kmm = self.kern.K(self.Z)
|
||||
self.posterior = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
|
||||
#The derivative of the bound wrt the inducing inputs Z
|
||||
self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
|
||||
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance)
|
||||
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.X, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
#self.posterior = self.inference_method.inference(??)
|
||||
super(SparseGP, self).parameters_changed()
|
||||
|
||||
|
||||
def dL_dtheta(self):
|
||||
"""
|
||||
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
|
||||
"""
|
||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
else:
|
||||
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
|
||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
||||
|
||||
return dL_dtheta
|
||||
|
||||
def dL_dZ(self):
|
||||
"""
|
||||
The derivative of the bound wrt the inducing inputs Z
|
||||
"""
|
||||
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
else:
|
||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
|
||||
return dL_dZ
|
||||
self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
|
||||
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,15 +32,29 @@ class DTCVar(object):
|
|||
#if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
|
||||
def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y):
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
|
||||
num_inducing, num_data = Kmn.shape
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#see whether we're using variational uncertain inputs
|
||||
uncertain_inputs = (X_variance is None)
|
||||
|
||||
# kernel computations, using BGPLVM notation
|
||||
Kmm = kern.K(Z)
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X, X_variance)
|
||||
psi1 = kern.psi1(Z, X, X_variance)
|
||||
psi2 = kern.psi2(Z, X, X_variance)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = kern.K(X, Z)
|
||||
|
||||
#factor Kmm # TODO: cache?
|
||||
_Lm = jitchol(Kmm)
|
||||
Lm = jitchol(Kmm)
|
||||
|
||||
# The rather complex computations of A
|
||||
if has_uncertain_inputs:
|
||||
if uncertain_inputs:
|
||||
if likelihood.is_heteroscedastic:
|
||||
psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
|
||||
else:
|
||||
|
|
@ -56,7 +70,7 @@ class DTCVar(object):
|
|||
tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
|
||||
else:
|
||||
tmp = psi1 * (np.sqrt(likelihood.precision))
|
||||
tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1)
|
||||
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
|
||||
A = tdot(tmp)
|
||||
|
||||
# factor B
|
||||
|
|
@ -67,11 +81,23 @@ class DTCVar(object):
|
|||
psi1Vf = np.dot(psi1.T, likelihood.VVT_factor)
|
||||
|
||||
# back substutue C into psi1Vf
|
||||
tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
|
||||
tmp, info1 = dtrtrs(Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
|
||||
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0)
|
||||
# tmp, info2 = dpotrs(LB, tmp, lower=1)
|
||||
tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||
Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1)
|
||||
Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
|
||||
#compute log marginal likelihood
|
||||
if likelihood.is_heteroscedastic:
|
||||
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
|
||||
else:
|
||||
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
|
||||
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
|
||||
D = 0.5 * data_fit
|
||||
log_marginal = A + B + C + D
|
||||
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp = tdot(_LBi_Lmi_psi1Vf)
|
||||
|
|
@ -80,12 +106,12 @@ class DTCVar(object):
|
|||
tmp = -0.5 * DBi_plus_BiPBi
|
||||
tmp += -0.5 * B * output_dim
|
||||
tmp += output_dim * np.eye(num_inducing)
|
||||
dL_dKmm = backsub_both_sides(_Lm, tmp)
|
||||
dL_dKmm = backsub_both_sides(Lm, tmp)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||
dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
|
||||
if likelihood.is_heteroscedastic:
|
||||
|
||||
|
|
@ -117,7 +143,7 @@ class DTCVar(object):
|
|||
else:
|
||||
|
||||
LBi = chol_inv(LB)
|
||||
Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
|
||||
Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
|
||||
|
||||
|
||||
|
|
@ -135,14 +161,4 @@ class DTCVar(object):
|
|||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
|
||||
partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
|
||||
|
||||
#def log_likelihood(self):
|
||||
if likelihood.is_heteroscedastic:
|
||||
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
|
||||
else:
|
||||
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
|
||||
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
|
||||
D = 0.5 * data_fit
|
||||
return A + B + C + D + likelihood.Z
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ class Posterior(object):
|
|||
"""
|
||||
An object to represent a Gaussian posterior over latent function values.
|
||||
This may be computed exactly for Gaussian likelihoods, or approximated for
|
||||
non-Gaussian likelihoods.
|
||||
non-Gaussian likelihoods.
|
||||
|
||||
The purpose of this class is to serve as an interface between the inference
|
||||
schemes and the model classes.
|
||||
schemes and the model classes.
|
||||
|
||||
"""
|
||||
def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
|
||||
|
|
@ -31,7 +31,7 @@ class Posterior(object):
|
|||
K (for lazy computation)
|
||||
|
||||
You may supply either:
|
||||
cc
|
||||
|
||||
woodbury_chol
|
||||
woodbury_vector
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ class Posterior(object):
|
|||
self._covariance = cov
|
||||
self._K_chol = K_chol
|
||||
|
||||
#copmute this lazily
|
||||
#compute this lazily
|
||||
self._precision = None
|
||||
|
||||
@property
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue