some hacking on sparse_gp inference

This commit is contained in:
James Hensman 2014-01-29 17:02:44 +00:00
parent ae03b63afb
commit a31ff3acc3
3 changed files with 50 additions and 40 deletions

View file

@ -48,9 +48,9 @@ class GP(Model):
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
else: else:
inference_method = expectation_propagation inference_method = expectation_propagation
print "defaulting to ", inference_method, "for latent function inference" print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method self.inference_method = inference_method
self.add_parameter(self.kern) self.add_parameter(self.kern)

View file

@ -6,6 +6,7 @@ from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv,
from gp import GP from gp import GP
from parameterization.param import Param from parameterization.param import Param
from ..inference.latent_function_inference import varDTC from ..inference.latent_function_inference import varDTC
from .. import likelihoods
class SparseGP(GP): class SparseGP(GP):
""" """
@ -35,24 +36,21 @@ class SparseGP(GP):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = varDTC.Gaussian_inference() inference_method = varDTC.VarDTC()
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError, "what to do what to do?" raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference" print "defaulting to ", inference_method, "for latent function inference"
GP.__init__(self, X, Y, likelihood, inference_method, kernel, name)
self.Z = Z self.Z = Z
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
if X_variance is None: if not (X_variance is None):
self.has_uncertain_inputs = False
self.X_variance = None
else:
assert X_variance.shape == X.shape assert X_variance.shape == X.shape
self.has_uncertain_inputs = True self.X_variance = X_variance
self.X_variance = X_variance
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
self.Z = Param('inducing inputs', self.Z) self.Z = Param('inducing inputs', self.Z)
self.add_parameter(self.Z, gradient=self.dL_dZ, index=0) self.add_parameter(self.Z, gradient=self.dL_dZ, index=0)

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import jitchol, backsub_both_sides, tdot
import numpy as np import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -17,7 +17,7 @@ class VarDTC(object):
""" """
def __init__(self): def __init__(self):
self._YYTfactor_cache = caching.cache() #self._YYTfactor_cache = caching.cache()
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def get_YYTfactor(self, Y): def get_YYTfactor(self, Y):
@ -42,7 +42,13 @@ class VarDTC(object):
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#see whether we're using variational uncertain inputs #see whether we're using variational uncertain inputs
uncertain_inputs = (X_variance is None) uncertain_inputs = not (X_variance is None)
#see whether we've got a different noise variance for each datum
beta = 1./np.squeeze(likelihood.variance)
het_noise = False
if beta.size <1:
het_noise = True
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
Kmm = kern.K(Z) Kmm = kern.K(Z)
@ -59,7 +65,7 @@ class VarDTC(object):
# The rather complex computations of A # The rather complex computations of A
if uncertain_inputs: if uncertain_inputs:
if likelihood.is_heteroscedastic: if het_noise:
psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0) psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
else: else:
psi2_beta = psi2.sum(0) * likelihood.precision psi2_beta = psi2.sum(0) * likelihood.precision
@ -70,10 +76,10 @@ class VarDTC(object):
tmp = evecs * np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp = tmp.T tmp = tmp.T
else: else:
if likelihood.is_heteroscedastic: if het_noise:
tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else: else:
tmp = psi1 * (np.sqrt(likelihood.precision)) tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1) tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
A = tdot(tmp) A = tdot(tmp)
@ -82,7 +88,7 @@ class VarDTC(object):
LB = jitchol(B) LB = jitchol(B)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
VVT_factor = self.get_VVTfactor(Y, likelihood.precision) VVT_factor = self.get_VVTfactor(Y, beta)
psi1Vf = np.dot(psi1.T, VVT_factor) psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf # back substutue C into psi1Vf
@ -92,12 +98,12 @@ class VarDTC(object):
Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1) Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
#compute log marginal likelihood #compute log marginal likelihood
if likelihood.is_heteroscedastic: if het_noise:
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y) A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A)) B = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(_A))
else: else:
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * likelihood.trYYT
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A)) B = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(_A))
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2)) C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
D = 0.5 * data_fit D = 0.5 * data_fit
log_marginal = A + B + C + D log_marginal = A + B + C + D
@ -112,21 +118,20 @@ class VarDTC(object):
tmp += output_dim * np.eye(num_inducing) tmp += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, tmp) dL_dKmm = backsub_both_sides(Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi
dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten() dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T) dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if likelihood.is_heteroscedastic: if het_noise:
if uncertain_inputs:
if has_uncertain_inputs: dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T
dL_dpsi2 = None dL_dpsi2 = None
else: else:
dL_dpsi2 = likelihood.precision * dL_dpsi2_beta dL_dpsi2 = likelihood.precision * dL_dpsi2_beta
if has_uncertain_inputs: if uncertain_inputs:
# repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else: else:
@ -139,18 +144,14 @@ class VarDTC(object):
if likelihood.size == 0: if likelihood.size == 0:
# save computation here. # save computation here.
partial_for_likelihood = None partial_for_likelihood = None
elif likelihood.is_heteroscedastic: elif het_noise:
if uncertain_inputs:
if has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else: else:
LBi = chol_inv(LB) LBi = chol_inv(LB)
Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0) Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2 partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2 partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2
@ -165,15 +166,26 @@ class VarDTC(object):
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision) partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit) partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
#put the gradients in the right places
likelihood.update_gradients(np.diag(partial_for_likelihood))
if uncertain_inputs:
kern.update_gradients_variational(??)
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
else:
kern.update_gradients_sparse(??)
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
#get sufficient things for posterior prediction #get sufficient things for posterior prediction
if VVT_factor.shape[1] == Y.shape[1]: if VVT_factor.shape[1] == Y.shape[1]:
Cpsi1V = Cpsi1Vf woodbury_vector = Cpsi1Vf # == Cpsi1V
woodbury_chol = ??
else: else:
raise NotImplementedError #TODO raise NotImplementedError #TODO
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_chol=None, woodbury_vector=Cpsi1V, K=None, mean=None, cov=None, K_chol=None) post = Posterior(woodbury_chol=woodbury_chol, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return
return Posterior, log_marginal, grad_dict