From 0e2ec01839d9ba78e6da3816e3c582e46858b5f3 Mon Sep 17 00:00:00 2001
From: Andrei Paleyes <paleyes@amazon.com>
Date: Fri, 5 Jan 2018 11:40:59 +0000
Subject: [PATCH] Implemented utility function to compute covariance between
 points in GP Model

---
 GPy/core/gp.py             | 22 +++++++++++++
 GPy/testing/model_tests.py | 63 ++++++++++++++++++++++++++++----------
 2 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 7bad7648..6e28d5b9 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -8,6 +8,7 @@ from .mapping import Mapping
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
+from ..util.linalg import dtrtrs
 from ..util.normalizer import Standardize
 from paramz import ObsAr
 
@@ -678,3 +679,24 @@ class GP(Model):
         """
         mu_star, var_star = self._raw_predict(x_test)
         return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
+
+    def posterior_covariance(self, X1, X2):
+        """
+        Computes the posterior covariance between points.
+
+        :param X1: some input observations
+        :param X2: other input observations
+        """
+        # ndim == 3 is a model for missing data
+        if self.posterior.woodbury_chol.ndim != 2:
+            raise RuntimeError("This method does not support posterior for missing data models")
+
+        Kx1 = self.kern.K(self.X, X1)
+        Kx2 = self.kern.K(self.X, X2)
+        K12 = self.kern.K(X1, X2)
+
+        tmp1 = dtrtrs(self.posterior.woodbury_chol, Kx1)[0]
+        tmp2 = dtrtrs(self.posterior.woodbury_chol, Kx2)[0]
+        var = K12 - tmp1.T.dot(tmp2)
+
+        return var
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 68e95ec0..c8b10a54 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -259,29 +259,18 @@ class MiscTests(unittest.TestCase):
         np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
 
     def test_missing_data(self):
-        from GPy import kern
-        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
-        from GPy.examples.dimensionality_reduction import _simulate_matern
+        Q = 4
 
-        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
-        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
-        Y = Ylist[0]
-
-        inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
-        Ymissing = Y.copy()
-        Ymissing[inan] = np.nan
-
-        k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
-        m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
-                          kernel=k, missing_data=True)
+        k = GPy.kern.Linear(Q, ARD=True) + GPy.kern.White(Q, np.exp(-2)) # + kern.bias(Q)
+        m = _create_missing_data_model(k, Q)
         assert(m.checkgrad())
         mul, varl = m.predict(m.X)
 
-        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
-        m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
-                          kernel=k, missing_data=True)
+        k = GPy.kern.RBF(Q, ARD=True) + GPy.kern.White(Q, np.exp(-2)) # + kern.bias(Q)
+        m2 = _create_missing_data_model(k, Q)
         assert(m.checkgrad())
         m2.kern.rbf.lengthscale[:] = 1e6
+
         m2.X[:] = m.X.param_array
         m2.likelihood[:] = m.likelihood[:]
         m2.kern.white[:] = m.kern.white[:]
@@ -1082,6 +1071,46 @@ class GradientTests(np.testing.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
+    def test_posterior_covariance(self):
+        k = GPy.kern.Poly(2, order=1)
+        X1 = np.array([
+                 [-2, 2],
+                 [-1, 1]
+             ])
+        X2 = np.array([
+                 [2, 3],
+                 [-1, 3]
+             ])
+        Y = np.array([[1], [2]])
+        m = GPy.models.GPRegression(X1, Y, kernel=k)
+
+        result = m.posterior_covariance(X1, X2)
+        expected = np.array([[0.4, 2.2], [1.0, 1.0]]) / 3.0
+
+        self.assertTrue(np.allclose(result, expected))
+
+    def test_posterior_covariance_missing_data(self):
+        Q = 4
+        k = GPy.kern.Linear(Q, ARD=True)
+        m = _create_missing_data_model(k, Q)
+
+        with self.assertRaises(RuntimeError):
+            m.posterior_covariance(np.array([[1], [2]]), np.array([[3], [4]]))
+
+def _create_missing_data_model(kernel, Q):
+    D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
+    _, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False)
+    Y = Ylist[0]
+
+    inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
+    Ymissing = Y.copy()
+    Ymissing[inan] = np.nan
+
+    m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
+                      kernel=kernel, missing_data=True)
+
+    return m
+
 if __name__ == "__main__":
     print("Running unit tests, please be (very) patient...")
     unittest.main()