From d074280213863f18f0a085f6b75f452e3e566ada Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 27 Jan 2014 13:04:23 +0000
Subject: [PATCH 1/3] removed some superfluous things from the model class

---
 GPy/core/model.py | 63 -----------------------------------------------
 1 file changed, 63 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 38ddc069..f4de0405 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -383,69 +383,6 @@ class Model(Parameterized):
         sgd.run()
         self.optimization_runs.append(sgd)
 
-    def Laplace_covariance(self):
-        """return the covariance matrix of a Laplace approximation at the current (stationary) point."""
-        # TODO add in the prior contributions for MAP estimation
-        # TODO fix the hessian for tied, constrained and fixed components
-        if hasattr(self, 'log_likelihood_hessian'):
-            A = -self.log_likelihood_hessian()
-
-        else:
-            print "numerically calculating Hessian. please be patient!"
-            x = self._get_params()
-            def f(x):
-                self._set_params(x)
-                return self.log_likelihood()
-            h = ndt.Hessian(f) # @UndefinedVariable
-            A = -h(x)
-            self._set_params(x)
-        # check for almost zero components on the diagonal which screw up the cholesky
-        aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
-        A[aa, aa] = 0.
-        return A
-
-    def Laplace_evidence(self):
-        """Returns an estiamte of the model evidence based on the Laplace approximation.
-        Uses a numerical estimate of the Hessian if none is available analytically."""
-        A = self.Laplace_covariance()
-        try:
-            hld = np.sum(np.log(np.diag(jitchol(A)[0])))
-        except:
-            return np.nan
-        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
-
-#     def __str__(self):
-#         s = Parameterized.__str__(self).split('\n')
-#         #def __str__(self, names=None):
-#         #    if names is None:
-#         #        names = self._get_print_names()
-#         #s = Parameterized.__str__(self, names=names).split('\n')
-#         # add priors to the string
-#         if self.priors is not None:
-#             strs = [str(p) if p is not None else '' for p in self.priors]
-#         else:
-#             strs = [''] * len(self._get_params())
-#        #         strs = [''] * len(self._get_param_names())
-#        #     name_indices = self.grep_param_names("|".join(names))
-#        #     strs = np.array(strs)[name_indices]
-#         width = np.array(max([len(p) for p in strs] + [5])) + 4
-# 
-#         log_like = self.log_likelihood()
-#         log_prior = self.log_prior()
-#         obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like)
-#         if len(''.join(strs)) != 0:
-#             obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
-#         obj_funct += '\n\n'
-#         s[0] = obj_funct + s[0]
-#         s[0] += "|{h:^{col}}".format(h='prior', col=width)
-#         s[1] += '-' * (width + 1)
-# 
-#         for p in range(2, len(strs) + 2):
-#             s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
-# 
-#         return '\n'.join(s)
-
-
     def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
         """
         Check the gradient of the ,odel by comparing to a numerical

From 052b8887935df51a51a013c3dfe82465b6511186 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 27 Jan 2014 15:02:25 +0000
Subject: [PATCH 2/3] tidying in sparse gp

---
 GPy/core/sparse_gp.py                         | 50 +++--------------
 .../latent_function_inference/dtcvar.py       | 56 ++++++++++++-------
 .../latent_function_inference/posterior.py    |  8 +--
 3 files changed, 49 insertions(+), 65 deletions(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index a05bb362..5a1dc595 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -3,9 +3,10 @@
 
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
 from gp import GP
 from parameterization.param import Param
+from ..inference.latent_function_inference import varDTC
+from posterior import Posterior
 
 class SparseGP(GP):
     """
@@ -55,49 +56,16 @@ class SparseGP(GP):
         self.add_parameter(self.kern, gradient=self.dL_dtheta)
         self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood))
 
-
     def parameters_changed(self):
-        # kernel computations, using BGPLVM notation
-        self.Kmm = self.kern.K(self.Z)
+        self.posterior = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
+
+                #The derivative of the bound wrt the inducing inputs Z
+        self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z)
         if self.has_uncertain_inputs:
-            self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
-            self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance)
-            self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
+            self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
+            self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
         else:
-            self.psi0 = self.kern.Kdiag(self.X)
-            self.psi1 = self.kern.K(self.X, self.Z)
-            self.psi2 = None
-
-        #self.posterior = self.inference_method.inference(??)
-        super(SparseGP, self).parameters_changed()
-
-
-    def dL_dtheta(self):
-        """
-        Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
-        """
-        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
-        if self.has_uncertain_inputs:
-            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
-            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
-            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
-        else:
-            dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
-            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
-
-        return dL_dtheta
-
-    def dL_dZ(self):
-        """
-        The derivative of the bound wrt the inducing inputs Z
-        """
-        dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
-        if self.has_uncertain_inputs:
-            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
-            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
-        else:
-            dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
-        return dL_dZ
+            self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
 
     def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
         """
diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/dtcvar.py
index d65a1758..af1dc87c 100644
--- a/GPy/inference/latent_function_inference/dtcvar.py
+++ b/GPy/inference/latent_function_inference/dtcvar.py
@@ -32,15 +32,29 @@ class DTCVar(object):
             #if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
             raise NotImplementedError, 'TODO' #TODO
 
-    def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y):
+    def inference(self, kern, X, X_variance, Z, likelihood, Y):
 
-        num_inducing, num_data = Kmn.shape
+        num_inducing, _ = Z.shape
+        num_data, output_dim = Y.shape
+
+        #see whether we're using variational uncertain inputs
+        uncertain_inputs = (X_variance is None)
+
+        # kernel computations, using BGPLVM notation
+        Kmm = kern.K(Z)
+        if uncertain_inputs:
+            psi0 = kern.psi0(Z, X, X_variance)
+            psi1 = kern.psi1(Z, X, X_variance)
+            psi2 = kern.psi2(Z, X, X_variance)
+        else:
+            psi0 = kern.Kdiag(X)
+            psi1 = kern.K(X, Z)
 
         #factor Kmm # TODO: cache?
-        _Lm = jitchol(Kmm)
+        Lm = jitchol(Kmm)
 
         # The rather complex computations of A
-        if has_uncertain_inputs:
+        if uncertain_inputs:
             if likelihood.is_heteroscedastic:
                 psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
             else:
@@ -56,7 +70,7 @@ class DTCVar(object):
                 tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
             else:
                 tmp = psi1 * (np.sqrt(likelihood.precision))
-        tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1)
+        tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
         A = tdot(tmp)
 
         # factor B
@@ -67,11 +81,23 @@ class DTCVar(object):
         psi1Vf = np.dot(psi1.T, likelihood.VVT_factor)
 
         # back substutue C into psi1Vf
-        tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
+        tmp, info1 = dtrtrs(Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
         _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0)
         # tmp, info2 = dpotrs(LB, tmp, lower=1)
         tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
-        Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1)
+        Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
+
+        #compute log marginal likelihood
+        if likelihood.is_heteroscedastic:
+            A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
+            B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
+        else:
+            A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
+            B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
+        C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
+        D = 0.5 * data_fit
+        log_marginal = A + B + C + D
+
 
         # Compute dL_dKmm
         tmp = tdot(_LBi_Lmi_psi1Vf)
@@ -80,12 +106,12 @@ class DTCVar(object):
         tmp = -0.5 * DBi_plus_BiPBi
         tmp += -0.5 * B * output_dim
         tmp += output_dim * np.eye(num_inducing)
-        dL_dKmm = backsub_both_sides(_Lm, tmp)
+        dL_dKmm = backsub_both_sides(Lm, tmp)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
         dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
         dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
-        dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
+        dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
 
         if likelihood.is_heteroscedastic:
 
@@ -117,7 +143,7 @@ class DTCVar(object):
             else:
 
                 LBi = chol_inv(LB)
-                Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
+                Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
                 _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
 
 
@@ -135,14 +161,4 @@ class DTCVar(object):
             partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
             partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
 
-    #def log_likelihood(self):
-        if likelihood.is_heteroscedastic:
-            A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
-            B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
-        else:
-            A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
-            B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
-        C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
-        D = 0.5 * data_fit
-        return A + B + C + D + likelihood.Z
 
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index 609ba3bc..ae5bff37 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -8,10 +8,10 @@ class Posterior(object):
     """
     An object to represent a Gaussian posterior over latent function values.
     This may be computed exactly for Gaussian likelihoods, or approximated for
-    non-Gaussian likelihoods. 
+    non-Gaussian likelihoods.
 
     The purpose of this class is to serve as an interface between the inference
-    schemes and the model classes. 
+    schemes and the model classes.
 
     """
     def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
@@ -31,7 +31,7 @@ class Posterior(object):
           K (for lazy computation)
 
        You may supply either:
-         cc
+
           woodbury_chol
           woodbury_vector
 
@@ -64,7 +64,7 @@ class Posterior(object):
         self._covariance = cov
         self._K_chol = K_chol
 
-        #copmute this lazily
+        #compute this lazily
         self._precision = None
 
     @property

From ca1cb4eb2213a3eebfae64426f8c647f5d4a50bd Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 27 Jan 2014 15:37:20 +0000
Subject: [PATCH 3/3] removed marginal and derivative from posterior object

---
 GPy/core/gp.py                                            | 4 ++--
 GPy/core/sparse_gp.py                                     | 1 -
 .../latent_function_inference/exact_gaussian_inference.py | 2 +-
 GPy/inference/latent_function_inference/posterior.py      | 8 +-------
 .../latent_function_inference/{dtcvar.py => varDTC.py}    | 7 ++++---
 5 files changed, 8 insertions(+), 14 deletions(-)
 rename GPy/inference/latent_function_inference/{dtcvar.py => varDTC.py} (98%)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 5b356744..6f02d7df 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -60,10 +60,10 @@ class GP(Model):
         self.parameters_changed()
 
     def parameters_changed(self):
-        self.posterior = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y)
+        self.posterior, self._log_marginal_likelihood, self._dL_dK = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y)
 
     def log_likelihood(self):
-        return self.posterior.log_marginal
+        return self._log_marginal_likelihood
 
     def dL_dtheta_K(self):
         return self.kern.dK_dtheta(self.posterior.dL_dK, self.X)
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 5a1dc595..d91c2ca1 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -6,7 +6,6 @@ import pylab as pb
 from gp import GP
 from parameterization.param import Param
 from ..inference.latent_function_inference import varDTC
-from posterior import Posterior
 
 class SparseGP(GP):
     """
diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 3c874a06..9313316c 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -53,6 +53,6 @@ class ExactGaussianInference(object):
 
         likelihood.update_gradients(np.diag(dL_dK))
 
-        return Posterior(log_marginal, dL_dK, LW, alpha, K)
+        return Posterior(LW, alpha, K), log_marginal, dL_dK
 
 
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index ae5bff37..c4b0ec62 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -14,10 +14,8 @@ class Posterior(object):
     schemes and the model classes.
 
     """
-    def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
+    def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
         """
-        log_marginal: log p(Y|X)
-        dL_dK: d/dK log p(Y|X)
         woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
         woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
         K : the proir covariance (required for lazy computation of various quantities)
@@ -26,8 +24,6 @@ class Posterior(object):
 
         Not all of the above need to be supplied! You *must* supply:
 
-          log_marginal
-          dL_dK
           K (for lazy computation)
 
        You may supply either:
@@ -46,8 +42,6 @@ class Posterior(object):
         From the supplied quantities, all of the others will be computed on demand (lazy computation)
         """
         #obligatory
-        self.log_marginal = log_marginal
-        self.dL_dK = dL_dK
         self._K = K
 
         if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/varDTC.py
similarity index 98%
rename from GPy/inference/latent_function_inference/dtcvar.py
rename to GPy/inference/latent_function_inference/varDTC.py
index af1dc87c..bd4a59e1 100644
--- a/GPy/inference/latent_function_inference/dtcvar.py
+++ b/GPy/inference/latent_function_inference/varDTC.py
@@ -2,10 +2,11 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from posterior import Posterior
-from .../util.linalg import pdinv, dpotrs, tdot
+from ...util.linalg import pdinv, dpotrs, tdot
+import numpy as np
 log_2_pi = np.log(2*np.pi)
 
-class DTCVar(object):
+class VarDTC(object):
     """
     An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
 
@@ -19,7 +20,7 @@ class DTCVar(object):
         self._YYTfactor_cache = caching.cache()
         self.const_jitter = 1e-6
 
-     def get_YYTfactor(self, Y):
+    def get_YYTfactor(self, Y):
         """
         find a matrix L which satisfies LLT = YYT.