From d074280213863f18f0a085f6b75f452e3e566ada Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 27 Jan 2014 13:04:23 +0000 Subject: [PATCH 1/3] removed some superfluous things from the model class --- GPy/core/model.py | 63 ----------------------------------------------- 1 file changed, 63 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 38ddc069..f4de0405 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -383,69 +383,6 @@ class Model(Parameterized): sgd.run() self.optimization_runs.append(sgd) - def Laplace_covariance(self): - """return the covariance matrix of a Laplace approximation at the current (stationary) point.""" - # TODO add in the prior contributions for MAP estimation - # TODO fix the hessian for tied, constrained and fixed components - if hasattr(self, 'log_likelihood_hessian'): - A = -self.log_likelihood_hessian() - - else: - print "numerically calculating Hessian. please be patient!" - x = self._get_params() - def f(x): - self._set_params(x) - return self.log_likelihood() - h = ndt.Hessian(f) # @UndefinedVariable - A = -h(x) - self._set_params(x) - # check for almost zero components on the diagonal which screw up the cholesky - aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0] - A[aa, aa] = 0. - return A - - def Laplace_evidence(self): - """Returns an estiamte of the model evidence based on the Laplace approximation. - Uses a numerical estimate of the Hessian if none is available analytically.""" - A = self.Laplace_covariance() - try: - hld = np.sum(np.log(np.diag(jitchol(A)[0]))) - except: - return np.nan - return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld - -# def __str__(self): -# s = Parameterized.__str__(self).split('\n') -# #def __str__(self, names=None): -# # if names is None: -# # names = self._get_print_names() -# #s = Parameterized.__str__(self, names=names).split('\n') -# # add priors to the string -# if self.priors is not None: -# strs = [str(p) if p is not None else '' for p in self.priors] -# else: -# strs = [''] * len(self._get_params()) -# # strs = [''] * len(self._get_param_names()) -# # name_indices = self.grep_param_names("|".join(names)) -# # strs = np.array(strs)[name_indices] -# width = np.array(max([len(p) for p in strs] + [5])) + 4 -# -# log_like = self.log_likelihood() -# log_prior = self.log_prior() -# obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like) -# if len(''.join(strs)) != 0: -# obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior) -# obj_funct += '\n\n' -# s[0] = obj_funct + s[0] -# s[0] += "|{h:^{col}}".format(h='prior', col=width) -# s[1] += '-' * (width + 1) -# -# for p in range(2, len(strs) + 2): -# s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width) -# -# return '\n'.join(s) - - def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ Check the gradient of the ,odel by comparing to a numerical From 052b8887935df51a51a013c3dfe82465b6511186 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 27 Jan 2014 15:02:25 +0000 Subject: [PATCH 2/3] tidying in sparse gp --- GPy/core/sparse_gp.py | 50 +++-------------- .../latent_function_inference/dtcvar.py | 56 ++++++++++++------- .../latent_function_inference/posterior.py | 8 +-- 3 files changed, 49 insertions(+), 65 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index a05bb362..5a1dc595 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -3,9 +3,10 @@ import numpy as np import pylab as pb -from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from gp import GP from parameterization.param import Param +from ..inference.latent_function_inference import varDTC +from posterior import Posterior class SparseGP(GP): """ @@ -55,49 +56,16 @@ class SparseGP(GP): self.add_parameter(self.kern, gradient=self.dL_dtheta) self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood)) - def parameters_changed(self): - # kernel computations, using BGPLVM notation - self.Kmm = self.kern.K(self.Z) + self.posterior = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) + + #The derivative of the bound wrt the inducing inputs Z + self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: - self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance) - self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance) - self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance) + self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) + self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) else: - self.psi0 = self.kern.Kdiag(self.X) - self.psi1 = self.kern.K(self.X, self.Z) - self.psi2 = None - - #self.posterior = self.inference_method.inference(??) - super(SparseGP, self).parameters_changed() - - - def dL_dtheta(self): - """ - Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel - """ - dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) - if self.has_uncertain_inputs: - dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance) - dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance) - dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance) - else: - dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z) - dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) - - return dL_dtheta - - def dL_dZ(self): - """ - The derivative of the bound wrt the inducing inputs Z - """ - dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z) - if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) - dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) - else: - dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X) - return dL_dZ + self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X) def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): """ diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/dtcvar.py index d65a1758..af1dc87c 100644 --- a/GPy/inference/latent_function_inference/dtcvar.py +++ b/GPy/inference/latent_function_inference/dtcvar.py @@ -32,15 +32,29 @@ class DTCVar(object): #if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L. raise NotImplementedError, 'TODO' #TODO - def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y): + def inference(self, kern, X, X_variance, Z, likelihood, Y): - num_inducing, num_data = Kmn.shape + num_inducing, _ = Z.shape + num_data, output_dim = Y.shape + + #see whether we're using variational uncertain inputs + uncertain_inputs = (X_variance is None) + + # kernel computations, using BGPLVM notation + Kmm = kern.K(Z) + if uncertain_inputs: + psi0 = kern.psi0(Z, X, X_variance) + psi1 = kern.psi1(Z, X, X_variance) + psi2 = kern.psi2(Z, X, X_variance) + else: + psi0 = kern.Kdiag(X) + psi1 = kern.K(X, Z) #factor Kmm # TODO: cache? - _Lm = jitchol(Kmm) + Lm = jitchol(Kmm) # The rather complex computations of A - if has_uncertain_inputs: + if uncertain_inputs: if likelihood.is_heteroscedastic: psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0) else: @@ -56,7 +70,7 @@ class DTCVar(object): tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1))) else: tmp = psi1 * (np.sqrt(likelihood.precision)) - tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1) + tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1) A = tdot(tmp) # factor B @@ -67,11 +81,23 @@ class DTCVar(object): psi1Vf = np.dot(psi1.T, likelihood.VVT_factor) # back substutue C into psi1Vf - tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0) + tmp, info1 = dtrtrs(Lm, np.asfortranarray(psi1Vf), lower=1, trans=0) _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0) # tmp, info2 = dpotrs(LB, tmp, lower=1) tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) - Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1) + Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1) + + #compute log marginal likelihood + if likelihood.is_heteroscedastic: + A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y) + B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A)) + else: + A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT + B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A)) + C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2)) + D = 0.5 * data_fit + log_marginal = A + B + C + D + # Compute dL_dKmm tmp = tdot(_LBi_Lmi_psi1Vf) @@ -80,12 +106,12 @@ class DTCVar(object): tmp = -0.5 * DBi_plus_BiPBi tmp += -0.5 * B * output_dim tmp += output_dim * np.eye(num_inducing) - dL_dKmm = backsub_both_sides(_Lm, tmp) + dL_dKmm = backsub_both_sides(Lm, tmp) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten() dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) + dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) if likelihood.is_heteroscedastic: @@ -117,7 +143,7 @@ class DTCVar(object): else: LBi = chol_inv(LB) - Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0) + Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) @@ -135,14 +161,4 @@ class DTCVar(object): partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision) partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit) - #def log_likelihood(self): - if likelihood.is_heteroscedastic: - A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y) - B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A)) - else: - A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT - B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A)) - C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2)) - D = 0.5 * data_fit - return A + B + C + D + likelihood.Z diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index 609ba3bc..ae5bff37 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -8,10 +8,10 @@ class Posterior(object): """ An object to represent a Gaussian posterior over latent function values. This may be computed exactly for Gaussian likelihoods, or approximated for - non-Gaussian likelihoods. + non-Gaussian likelihoods. The purpose of this class is to serve as an interface between the inference - schemes and the model classes. + schemes and the model classes. """ def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): @@ -31,7 +31,7 @@ class Posterior(object): K (for lazy computation) You may supply either: - cc + woodbury_chol woodbury_vector @@ -64,7 +64,7 @@ class Posterior(object): self._covariance = cov self._K_chol = K_chol - #copmute this lazily + #compute this lazily self._precision = None @property From ca1cb4eb2213a3eebfae64426f8c647f5d4a50bd Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 27 Jan 2014 15:37:20 +0000 Subject: [PATCH 3/3] removed marginal and derivative from posterior object --- GPy/core/gp.py | 4 ++-- GPy/core/sparse_gp.py | 1 - .../latent_function_inference/exact_gaussian_inference.py | 2 +- GPy/inference/latent_function_inference/posterior.py | 8 +------- .../latent_function_inference/{dtcvar.py => varDTC.py} | 7 ++++--- 5 files changed, 8 insertions(+), 14 deletions(-) rename GPy/inference/latent_function_inference/{dtcvar.py => varDTC.py} (98%) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 5b356744..6f02d7df 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -60,10 +60,10 @@ class GP(Model): self.parameters_changed() def parameters_changed(self): - self.posterior = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y) + self.posterior, self._log_marginal_likelihood, self._dL_dK = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y) def log_likelihood(self): - return self.posterior.log_marginal + return self._log_marginal_likelihood def dL_dtheta_K(self): return self.kern.dK_dtheta(self.posterior.dL_dK, self.X) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 5a1dc595..d91c2ca1 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -6,7 +6,6 @@ import pylab as pb from gp import GP from parameterization.param import Param from ..inference.latent_function_inference import varDTC -from posterior import Posterior class SparseGP(GP): """ diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 3c874a06..9313316c 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -53,6 +53,6 @@ class ExactGaussianInference(object): likelihood.update_gradients(np.diag(dL_dK)) - return Posterior(log_marginal, dL_dK, LW, alpha, K) + return Posterior(LW, alpha, K), log_marginal, dL_dK diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index ae5bff37..c4b0ec62 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -14,10 +14,8 @@ class Posterior(object): schemes and the model classes. """ - def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): + def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): """ - log_marginal: log p(Y|X) - dL_dK: d/dK log p(Y|X) woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M K : the proir covariance (required for lazy computation of various quantities) @@ -26,8 +24,6 @@ class Posterior(object): Not all of the above need to be supplied! You *must* supply: - log_marginal - dL_dK K (for lazy computation) You may supply either: @@ -46,8 +42,6 @@ class Posterior(object): From the supplied quantities, all of the others will be computed on demand (lazy computation) """ #obligatory - self.log_marginal = log_marginal - self.dL_dK = dL_dK self._K = K if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)): diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/varDTC.py similarity index 98% rename from GPy/inference/latent_function_inference/dtcvar.py rename to GPy/inference/latent_function_inference/varDTC.py index af1dc87c..bd4a59e1 100644 --- a/GPy/inference/latent_function_inference/dtcvar.py +++ b/GPy/inference/latent_function_inference/varDTC.py @@ -2,10 +2,11 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from posterior import Posterior -from .../util.linalg import pdinv, dpotrs, tdot +from ...util.linalg import pdinv, dpotrs, tdot +import numpy as np log_2_pi = np.log(2*np.pi) -class DTCVar(object): +class VarDTC(object): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. @@ -19,7 +20,7 @@ class DTCVar(object): self._YYTfactor_cache = caching.cache() self.const_jitter = 1e-6 - def get_YYTfactor(self, Y): + def get_YYTfactor(self, Y): """ find a matrix L which satisfies LLT = YYT.