diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index ba81f096..5be3e944 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -74,25 +74,27 @@ class GP(Model):
 
     def _raw_predict(self, _Xnew, full_cov=False):
         """
-        Internal helper function for making predictions, does not account
-        for normalization or likelihood
+        For making predictions, does not account for normalization or likelihood
 
         full_cov is a boolean which defines whether the full covariance matrix
         of the prediction is computed. If full_cov is False (default), only the
         diagonal of the covariance is returned.
 
+        $$
+        p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
+                       = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
+        \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        $$
+
         """
         Kx = self.kern.K(_Xnew, self.X).T
-        #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
         WiKx = np.dot(self.posterior.woodbury_inv, Kx)
         mu = np.dot(Kx.T, self.posterior.woodbury_vector)
         if full_cov:
             Kxx = self.kern.K(_Xnew)
-            #var = Kxx - tdot(LiKx.T)
-            var = np.dot(Kx.T, WiKx)
+            var = Kxx - np.dot(Kx.T, WiKx)
         else:
             Kxx = self.kern.Kdiag(_Xnew)
-            #var = Kxx - np.sum(LiKx*LiKx, 0)
             var = Kxx - np.sum(WiKx*Kx, 0)
             var = var.reshape(-1, 1)
 
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 0b796171..7bf0ca2a 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -88,8 +88,9 @@ class SparseGP(GP):
             mu = np.dot(Kx.T, self.posterior.woodbury_vector)
             if full_cov:
                 Kxx = self.kern.K(Xnew)
-                #var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
-                var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
+                var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
+                #var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
+                var = var.squeeze()
             else:
                 Kxx = self.kern.Kdiag(Xnew)
                 var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index a996e1df..d3989731 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -73,20 +73,37 @@ class Posterior(object):
 
     @property
     def mean(self):
+        """
+        Posterior mean
+        $$
+        K_{xx}v
+        v := \texttt{Woodbury vector}
+        $$
+        """
         if self._mean is None:
             self._mean = np.dot(self._K, self.woodbury_vector)
         return self._mean
 
     @property
     def covariance(self):
+        """
+        Posterior covariance
+        $$
+        K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
+        W_{xx} := \texttt{Woodbury inv}
+        $$
+        """
         if self._covariance is None:
             #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
-            self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
-            #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
+            self._covariance = self._K[:, :, None] - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
+            #old_covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
         return self._covariance.squeeze()
 
     @property
     def precision(self):
+        """
+        Inverse of posterior covariance
+        """
         if self._precision is None:
             cov = np.atleast_3d(self.covariance)
             self._precision = np.zeros(cov.shape) # if one covariance per dimension
@@ -96,8 +113,15 @@ class Posterior(object):
 
     @property
     def woodbury_chol(self):
+        """
+        return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
+        $$
+        L_{W}L_{W}^{\top} = W^{-1}
+        W^{-1} := \texttt{Woodbury inv}
+        $$
+        """
         if self._woodbury_chol is None:
-            #compute woodbury chol from 
+            #compute woodbury chol from
             if self._woodbury_inv is not None:
                 winv = np.atleast_3d(self._woodbury_inv)
                 self._woodbury_chol = np.zeros(winv.shape)
@@ -121,6 +145,13 @@ class Posterior(object):
 
     @property
     def woodbury_inv(self):
+        """
+        The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
+        $$
+        (K_{xx} + \Sigma_{xx})^{-1}
+        \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        $$
+        """
         if self._woodbury_inv is None:
             self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
             #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
@@ -129,17 +160,22 @@ class Posterior(object):
 
     @property
     def woodbury_vector(self):
+        """
+        Woodbury vector in the gaussian likelihood case only is defined as
+        $$
+        (K_{xx} + \Sigma)^{-1}Y
+        \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        $$
+        """
         if self._woodbury_vector is None:
             self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
         return self._woodbury_vector
 
     @property
     def K_chol(self):
+        """
+        Cholesky of the prior covariance K
+        """
         if self._K_chol is None:
             self._K_chol = jitchol(self._K)
         return self._K_chol
-
-
-
-
-
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/model_tests.py
similarity index 83%
rename from GPy/testing/unit_tests.py
rename to GPy/testing/model_tests.py
index 37a9f07d..2767b559 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/model_tests.py
@@ -6,6 +6,60 @@ import unittest
 import numpy as np
 import GPy
 
+class MiscTests(unittest.TestCase):
+    def setUp(self):
+        self.N = 20
+        self.N_new = 50
+        self.D = 1
+        self.X = np.random.uniform(-3., 3., (self.N, 1))
+        self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
+        self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
+
+    def test_raw_predict(self):
+        k = GPy.kern.RBF(1)
+        m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
+        m.randomize()
+        Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance)
+        K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
+        mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y)
+
+        mu, covar = m._raw_predict(self.X_new, full_cov=True)
+        self.assertEquals(mu.shape, (self.N_new, self.D))
+        self.assertEquals(covar.shape, (self.N_new, self.N_new))
+        np.testing.assert_almost_equal(K_hat, covar)
+        np.testing.assert_almost_equal(mu_hat, mu)
+
+        mu, var = m._raw_predict(self.X_new)
+        self.assertEquals(mu.shape, (self.N_new, self.D))
+        self.assertEquals(var.shape, (self.N_new, 1))
+        np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
+        np.testing.assert_almost_equal(mu_hat, mu)
+
+    def test_sparse_raw_predict(self):
+        k = GPy.kern.RBF(1)
+        m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
+        m.randomize()
+        Z = m.Z[:]
+        X = self.X[:]
+
+        #Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
+        Kinv = m.posterior.woodbury_inv
+        K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
+
+        mu, covar = m._raw_predict(self.X_new, full_cov=True)
+        self.assertEquals(mu.shape, (self.N_new, self.D))
+        self.assertEquals(covar.shape, (self.N_new, self.N_new))
+        np.testing.assert_almost_equal(K_hat, covar)
+        #np.testing.assert_almost_equal(mu_hat, mu)
+
+        mu, var = m._raw_predict(self.X_new)
+        self.assertEquals(mu.shape, (self.N_new, self.D))
+        self.assertEquals(var.shape, (self.N_new, 1))
+        np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
+        #np.testing.assert_almost_equal(mu_hat, mu)
+
+
+
 class GradientTests(unittest.TestCase):
     def setUp(self):
         ######################################