fix the issue of negative prediction variance of normal GP

2026-05-11 04:52:37 +02:00 · 2016-01-21 11:22:57 +00:00 · 2016-01-21 11:22:57 +00:00 · 8b279175c5
commit 8b279175c5
parent 5f417565fb
6 changed files with 84 additions and 5 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -212,6 +212,12 @@ class GP(Model):
                        = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
            \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
        """
+        if hasattr(self.posterior, '_raw_predict'):
+            mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
+            if self.mean_function is not None:
+                mu += self.mean_function.f(Xnew)
+            return mu, var
+            
        if kern is None:
            kern = self.kern

--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -126,7 +126,12 @@ class SparseGP(GP):
        The implementation of that will follow. However, for each dimension the
        covariance changes, so if full_cov is False (standard), we return the variance
        for each dimension [NxD].
-        """
+        """        
+        if hasattr(self.posterior, '_raw_predict'):
+            mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
+            if self.mean_function is not None:
+                mu += self.mean_function.f(Xnew)
+            return mu, var

        if kern is None: kern = self.kern

--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@ -1,14 +1,13 @@
 # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

-from .posterior import Posterior
+from .posterior import PosteriorExact as Posterior
 from ...util.linalg import pdinv, dpotrs, tdot
 from ...util import diag
 import numpy as np
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)

-
 class ExactGaussianInference(LatentFunctionInference):
    """
    An object for inference when the likelihood is Gaussian.
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

 import numpy as np
-from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
+from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot

 class Posterior(object):
    """
@ -187,3 +187,35 @@ class Posterior(object):
        if self._K_chol is None:
            self._K_chol = jitchol(self._K)
        return self._K_chol
+    
+class PosteriorExact(Posterior):
+     
+    def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
+        
+        Kx = kern.K(pred_var, Xnew)
+        mu = np.dot(Kx.T, self.woodbury_vector)
+        if len(mu.shape)==1:
+            mu = mu.reshape(-1,1)
+        if full_cov:
+            Kxx = kern.K(Xnew)
+            if self._woodbury_chol.ndim == 2:
+                tmp = dtrtrs(self._woodbury_chol, Kx)[0]
+                var = Kxx - tdot(tmp.T)
+            elif self._woodbury_chol.ndim == 3: # Missing data
+                var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
+                for i in range(var.shape[2]):
+                    tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
+                    var[:, :, i] = (Kxx - tdot(tmp.T))
+            var = var
+        else:
+            Kxx = kern.Kdiag(Xnew)
+            if self._woodbury_chol.ndim == 2:
+                tmp = dtrtrs(self._woodbury_chol, Kx)[0]
+                var = (Kxx - np.square(tmp).sum(0))[:,None]
+            elif self._woodbury_chol.ndim == 3: # Missing data
+                var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
+                for i in range(var.shape[1]):
+                    tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
+                    var[:, i] = (Kxx - np.square(tmp).sum(0))
+            var = var
+        return mu, var
--- a/GPy/testing/inference_tests.py
+++ b/GPy/testing/inference_tests.py
@ -50,6 +50,5 @@ class InferenceXTestCase(unittest.TestCase):
        x, mi = m.infer_newX(m.Y, optimize=True)
        np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)

-
 if __name__ == "__main__":
    unittest.main()
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -22,6 +22,44 @@ class MiscTests(unittest.TestCase):
        self.assertTrue(m.checkgrad())
        m.predict(m.X)

+    def test_raw_predict_numerical_stability(self):
+        """
+        Test whether the predicted variance of normal GP goes negative under numerical unstable situation.
+        Thanks simbartonels@github for reporting the bug and providing the following example.
+        """
+        
+        # set seed for reproducability
+        np.random.seed(3)
+        # Definition of the Branin test function
+        def branin(X):
+            y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2
+            y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10
+            return(y)
+        # Training set defined as a 5*5 grid:
+        xg1 = np.linspace(-5,10,5)
+        xg2 = np.linspace(0,15,5)
+        X = np.zeros((xg1.size * xg2.size,2))
+        for i,x1 in enumerate(xg1):
+            for j,x2 in enumerate(xg2):
+                X[i+xg1.size*j,:] = [x1,x2]
+        Y = branin(X)[:,None]
+        # Fit a GP
+        # Create an exponentiated quadratic plus bias covariance function
+        k = GPy.kern.RBF(input_dim=2, ARD = True)
+        # Build a GP model
+        m = GPy.models.GPRegression(X,Y,k)
+        # fix the noise variance
+        m.likelihood.variance.fix(1e-5)
+        # Randomize the model and optimize
+        m.randomize()
+        m.optimize()
+        # Compute the mean of model prediction on 1e5 Monte Carlo samples
+        Xp = np.random.uniform(size=(1e5,2))
+        Xp[:,0] = Xp[:,0]*15-5
+        Xp[:,1] = Xp[:,1]*15
+        _, var = m.predict(Xp)
+        self.assertTrue(np.all(var>=0.))
+
    def test_raw_predict(self):
        k = GPy.kern.RBF(1)
        m = GPy.models.GPRegression(self.X, self.Y, kernel=k)