From 7812a7dc7d8cbf622141d9a9469a4e86e807fb29 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 5 Dec 2013 10:56:18 +0000
Subject: [PATCH] moved functional part of sparseGP to inference/dtcvar

---
 GPy/core/sparse_gp.py                         | 116 --------------
 .../latent_function_inference/dtcvar.py       | 149 ++++++++++++++++++
 2 files changed, 149 insertions(+), 116 deletions(-)
 create mode 100644 GPy/inference/latent_function_inference/dtcvar.py

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 2ac47419..6445988a 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -87,122 +87,6 @@ class SparseGP(GPBase):
             self.psi0 = self.kern.Kdiag(self.X)
             self.psi1 = self.kern.K(self.X, self.Z)
             self.psi2 = None
-
-    def _computations(self):
-        if self._const_jitter is None or not(self._const_jitter.shape[0] == self.num_inducing):
-            self._const_jitter = np.eye(self.num_inducing) * 1e-8
-
-        # factor Kmm
-        self._Lm = jitchol(self.Kmm + self._const_jitter)
-
-        # The rather complex computations of self._A
-        if self.has_uncertain_inputs:
-            if self.likelihood.is_heteroscedastic:
-                psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
-            else:
-                psi2_beta = self.psi2.sum(0) * self.likelihood.precision
-            evals, evecs = linalg.eigh(psi2_beta)
-            clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
-            if not np.array_equal(evals, clipped_evals):
-                pass # print evals
-            tmp = evecs * np.sqrt(clipped_evals)
-            tmp = tmp.T
-        else:
-            if self.likelihood.is_heteroscedastic:
-                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
-            else:
-                tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
-        tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
-        self._A = tdot(tmp)
-
-        # factor B
-        self.B = np.eye(self.num_inducing) + self._A
-        self.LB = jitchol(self.B)
-
-        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
-        self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
-
-        # back substutue C into psi1Vf
-        tmp, info1 = dtrtrs(self._Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
-        self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
-        # tmp, info2 = dpotrs(self.LB, tmp, lower=1)
-        tmp, info2 = dtrtrs(self.LB, self._LBi_Lmi_psi1Vf, lower=1, trans=1)
-        self.Cpsi1Vf, info3 = dtrtrs(self._Lm, tmp, lower=1, trans=1)
-
-        # Compute dL_dKmm
-        tmp = tdot(self._LBi_Lmi_psi1Vf)
-        self.data_fit = np.trace(tmp)
-        self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
-        tmp = -0.5 * self.DBi_plus_BiPBi
-        tmp += -0.5 * self.B * self.output_dim
-        tmp += self.output_dim * np.eye(self.num_inducing)
-        self.dL_dKmm = backsub_both_sides(self._Lm, tmp)
-
-        # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
-        self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
-        self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
-        dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
-
-        if self.likelihood.is_heteroscedastic:
-
-            if self.has_uncertain_inputs:
-                self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
-            else:
-                self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T
-                self.dL_dpsi2 = None
-        else:
-            dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
-            if self.has_uncertain_inputs:
-                # repeat for each of the N psi_2 matrices
-                self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
-            else:
-                # subsume back into psi1 (==Kmn)
-                self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2)
-                self.dL_dpsi2 = None
-
-
-        # the partial derivative vector for the likelihood
-        if self.likelihood.size == 0:
-            # save computation here.
-            self.partial_for_likelihood = None
-        elif self.likelihood.is_heteroscedastic:
-
-            if self.has_uncertain_inputs:
-                raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
-
-            else:
-
-                LBi = chol_inv(self.LB)
-                Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
-                _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
-
-
-                self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
-                self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
-
-                self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
-
-                self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
-                self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
-
-        else:
-            # likelihood is not heteroscedatic
-            self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
-            self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
-            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
-
-    def log_likelihood(self):
-        """ Compute the (lower bound on the) log marginal likelihood """
-        if self.likelihood.is_heteroscedastic:
-            A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
-            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self._A))
-        else:
-            A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
-            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self._A))
-        C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
-        D = 0.5 * self.data_fit
-        return A + B + C + D + self.likelihood.Z
-
     def parameters_changed(self):
         self._compute_kernel_matrices()
         self._computations()
diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/dtcvar.py
new file mode 100644
index 00000000..7a956a9a
--- /dev/null
+++ b/GPy/inference/latent_function_inference/dtcvar.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from posterior import Posterior
+from .../util.linalg import pdinv, dpotrs, tdot
+log_2_pi = np.log(2*np.pi)
+
+class DTCVar(object):
+    """
+    An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
+
+    The function self.inference returns a Posterior object, which summarizes
+    the posterior.
+
+    For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
+
+    """
+    def __init__(self):
+        self._YYTfactor_cache = caching.cache()
+        self.const_jitter = 1e-6
+
+     def get_YYTfactor(self, Y):
+        """
+        find a matrix L which satisfies LLT = YYT. 
+
+        Note that L may have fewer columns than Y.
+        """
+        N, D = Y.shape
+        if (N>D):
+            return Y
+        else:
+            #if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
+            raise NotImplementedError, 'TODO' #TODO
+
+    def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y):
+
+        num_inducing, num_data = Kmn.shape
+        const_jitter = np.eye(num_inducing) * self.const_jitter
+
+        #factor Kmm # TODO: cache?
+        _Lm = jitchol(Kmm + _const_jitter)
+
+        # The rather complex computations of A
+        if has_uncertain_inputs:
+            if likelihood.is_heteroscedastic:
+                psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
+            else:
+                psi2_beta = psi2.sum(0) * likelihood.precision
+            evals, evecs = linalg.eigh(psi2_beta)
+            clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
+            if not np.array_equal(evals, clipped_evals):
+                pass # print evals
+            tmp = evecs * np.sqrt(clipped_evals)
+            tmp = tmp.T
+        else:
+            if likelihood.is_heteroscedastic:
+                tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
+            else:
+                tmp = psi1 * (np.sqrt(likelihood.precision))
+        tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1)
+        A = tdot(tmp)
+
+        # factor B
+        B = np.eye(num_inducing) + A
+        LB = jitchol(B)
+
+        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
+        psi1Vf = np.dot(psi1.T, likelihood.VVT_factor)
+
+        # back substutue C into psi1Vf
+        tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
+        _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0)
+        # tmp, info2 = dpotrs(LB, tmp, lower=1)
+        tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
+        Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1)
+
+        # Compute dL_dKmm
+        tmp = tdot(_LBi_Lmi_psi1Vf)
+        data_fit = np.trace(tmp)
+        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + tmp)
+        tmp = -0.5 * DBi_plus_BiPBi
+        tmp += -0.5 * B * output_dim
+        tmp += output_dim * np.eye(num_inducing)
+        dL_dKmm = backsub_both_sides(_Lm, tmp)
+
+        # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
+        dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
+        dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
+        dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
+
+        if likelihood.is_heteroscedastic:
+
+            if has_uncertain_inputs:
+                dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
+            else:
+                dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T
+                dL_dpsi2 = None
+        else:
+            dL_dpsi2 = likelihood.precision * dL_dpsi2_beta
+            if has_uncertain_inputs:
+                # repeat for each of the N psi_2 matrices
+                dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
+            else:
+                # subsume back into psi1 (==Kmn)
+                dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
+                dL_dpsi2 = None
+
+
+        # the partial derivative vector for the likelihood
+        if likelihood.size == 0:
+            # save computation here.
+            partial_for_likelihood = None
+        elif likelihood.is_heteroscedastic:
+
+            if has_uncertain_inputs:
+                raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
+
+            else:
+
+                LBi = chol_inv(LB)
+                Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
+                _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
+
+
+                partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2
+                partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2
+
+                partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*likelihood.precision**2
+
+                partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * likelihood.precision**2
+                partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * likelihood.precision**2
+
+        else:
+            # likelihood is not heteroscedatic
+            partial_for_likelihood = -0.5 * num_data * output_dim * likelihood.precision + 0.5 * likelihood.trYYT * likelihood.precision ** 2
+            partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
+            partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
+
+    #def log_likelihood(self):
+        if likelihood.is_heteroscedastic:
+            A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
+            B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
+        else:
+            A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
+            B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
+        C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
+        D = 0.5 * data_fit
+        return A + B + C + D + likelihood.Z
+