From 2001cd6dfd77300e1286245cf68897c17d3f0af0 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 1 Apr 2016 15:27:03 +0100
Subject: [PATCH] [predict] added noiseless convenience function to gp, bc of
 whining about it...

---
 GPy/core/gp.py             | 49 ++++++++++++++++++++++++++++++++------
 GPy/testing/model_tests.py | 24 +++++++++----------
 2 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index d9db66ef..c2e67338 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -217,9 +217,13 @@ class GP(Model):
             mu += self.mean_function.f(Xnew)
         return mu, var
 
-    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None):
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
         """
-        Predict the function(s) at the new point(s) Xnew.
+        Predict the function(s) at the new point(s) Xnew. This includes the likelihood
+        variance added to the predicted underlying function (usually referred to as f).
+
+        In order to predict without adding in the likelihood give
+        `include_likelihood=False`, or refer to self.predict_noiseless().
 
         :param Xnew: The points at which to make a prediction
         :type Xnew: np.ndarray (Nnew x self.input_dim)
@@ -229,6 +233,8 @@ class GP(Model):
         :param Y_metadata: metadata about the predicting point to pass to the likelihood
         :param kern: The kernel to use for prediction (defaults to the model
                      kern). this is useful for examining e.g. subprocesses.
+        :param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
+
         :returns: (mean, var):
             mean: posterior mean, a Numpy array, Nnew x self.input_dim
             var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@@ -243,11 +249,40 @@ class GP(Model):
         if self.normalizer is not None:
             mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
 
-        # now push through likelihood
-        if likelihood is None:
-            likelihood = self.likelihood
-        mean, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
-        return mean, var
+        if include_likelihood:
+            # now push through likelihood
+            if likelihood is None:
+                likelihood = self.likelihood
+            mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
+        return mu, var
+
+    def predict_noiseless(self,  Xnew, full_cov=False, Y_metadata=None, kern=None):
+        """
+        Convenience function to predict the underlying function of the GP (often
+        referred to as f) without adding the likelihood variance on the
+        prediction function.
+
+        This is most likely what you want to use for your predictions.
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray (Nnew x self.input_dim)
+        :param full_cov: whether to return the full covariance matrix, or just
+                         the diagonal
+        :type full_cov: bool
+        :param Y_metadata: metadata about the predicting point to pass to the likelihood
+        :param kern: The kernel to use for prediction (defaults to the model
+                     kern). this is useful for examining e.g. subprocesses.
+
+        :returns: (mean, var):
+            mean: posterior mean, a Numpy array, Nnew x self.input_dim
+            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+
+           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
+           This is to allow for different normalizations of the output dimensions.
+
+        Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
+        """
+        return self.predict(Xnew, full_cov, Y_metadata, kern, None, False)
 
     def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
         """
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 59a086cd..a148e43d 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -27,7 +27,7 @@ class MiscTests(unittest.TestCase):
         Test whether the predicted variance of normal GP goes negative under numerical unstable situation.
         Thanks simbartonels@github for reporting the bug and providing the following example.
         """
-        
+
         # set seed for reproducability
         np.random.seed(3)
         # Definition of the Branin test function
@@ -69,13 +69,13 @@ class MiscTests(unittest.TestCase):
         K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
         mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
 
-        mu, covar = m._raw_predict(self.X_new, full_cov=True)
+        mu, covar = m.predict_noiseless(self.X_new, full_cov=True)
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(covar.shape, (self.N_new, self.N_new))
         np.testing.assert_almost_equal(K_hat, covar)
         np.testing.assert_almost_equal(mu_hat, mu)
 
-        mu, var = m._raw_predict(self.X_new)
+        mu, var = m.predict_noiseless(self.X_new)
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(var.shape, (self.N_new, 1))
         np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
@@ -166,7 +166,7 @@ class MiscTests(unittest.TestCase):
         # S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
         Y_mu_true = 2*X_pred_mu
         Y_var_true = 4*X_pred_var
-        Y_mu_pred, Y_var_pred = m._raw_predict(X_pred)
+        Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred)
         np.testing.assert_allclose(Y_mu_true, Y_mu_pred, rtol=1e-4)
         np.testing.assert_allclose(Y_var_true, Y_var_pred, rtol=1e-4)
 
@@ -181,13 +181,13 @@ class MiscTests(unittest.TestCase):
         K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
         K_hat = np.clip(K_hat, 1e-15, np.inf)
 
-        mu, covar = m._raw_predict(self.X_new, full_cov=True)
+        mu, covar = m.predict_noiseless(self.X_new, full_cov=True)
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(covar.shape, (self.N_new, self.N_new))
         np.testing.assert_almost_equal(K_hat, covar)
         # np.testing.assert_almost_equal(mu_hat, mu)
 
-        mu, var = m._raw_predict(self.X_new)
+        mu, var = m.predict_noiseless(self.X_new)
         self.assertEquals(mu.shape, (self.N_new, self.D))
         self.assertEquals(var.shape, (self.N_new, 1))
         np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
@@ -365,7 +365,7 @@ class MiscTests(unittest.TestCase):
         warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f)
         warp_m.optimize()
         warp_preds = warp_m.predict(self.X)
- 
+
         np.testing.assert_almost_equal(preds, warp_preds)
 
     @unittest.skip('Comment this to plot the modified sine function')
@@ -378,7 +378,7 @@ class MiscTests(unittest.TestCase):
         Y = np.sin(X) + np.random.normal(0,0.1,151)
         Y = np.exp(Y) - 5
         #Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y]) + 0
-        
+
         #np.seterr(over='raise')
         import matplotlib.pyplot as plt
         warp_k = GPy.kern.RBF(1)
@@ -390,7 +390,7 @@ class MiscTests(unittest.TestCase):
         #warp_m.randomize()
         #warp_m['.*warp_tanh.psi*'][:,0:2].constrain_bounded(0,100)
         #warp_m['.*warp_tanh.psi*'][:,0:1].constrain_fixed(1)
-        
+
         #print(warp_m.checkgrad())
         #warp_m.plot()
         #plt.show()
@@ -406,7 +406,7 @@ class MiscTests(unittest.TestCase):
         warp_f.plot(X.min()-10, X.max()+10)
         plt.show()
 
-        
+
 
 class GradientTests(np.testing.TestCase):
     def setUp(self):
@@ -738,12 +738,12 @@ class GradientTests(np.testing.TestCase):
         lik = GPy.likelihoods.Gaussian()
         m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
         self.assertTrue(m.checkgrad())
-        
+
     def test_ssgplvm(self):
         from GPy import kern
         from GPy.models import SSGPLVM
         from GPy.examples.dimensionality_reduction import _simulate_matern
-    
+
         D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
         _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
         Y = Ylist[0]