fix the issue of negative prediction variance of normal GP

This commit is contained in:
Zhenwen Dai 2016-01-21 11:22:57 +00:00
parent 5f417565fb
commit 8b279175c5
6 changed files with 84 additions and 5 deletions

View file

@ -212,6 +212,12 @@ class GP(Model):
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
""" """
if hasattr(self.posterior, '_raw_predict'):
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
return mu, var
if kern is None: if kern is None:
kern = self.kern kern = self.kern

View file

@ -127,6 +127,11 @@ class SparseGP(GP):
covariance changes, so if full_cov is False (standard), we return the variance covariance changes, so if full_cov is False (standard), we return the variance
for each dimension [NxD]. for each dimension [NxD].
""" """
if hasattr(self.posterior, '_raw_predict'):
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
return mu, var
if kern is None: kern = self.kern if kern is None: kern = self.kern

View file

@ -1,14 +1,13 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from .posterior import Posterior from .posterior import PosteriorExact as Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag from ...util import diag
import numpy as np import numpy as np
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class ExactGaussianInference(LatentFunctionInference): class ExactGaussianInference(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian. An object for inference when the likelihood is Gaussian.

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
class Posterior(object): class Posterior(object):
""" """
@ -187,3 +187,35 @@ class Posterior(object):
if self._K_chol is None: if self._K_chol is None:
self._K_chol = jitchol(self._K) self._K_chol = jitchol(self._K)
return self._K_chol return self._K_chol
class PosteriorExact(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, self.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = Kxx - tdot(tmp.T)
elif self._woodbury_chol.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
for i in range(var.shape[2]):
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
var[:, :, i] = (Kxx - tdot(tmp.T))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = (Kxx - np.square(tmp).sum(0))[:,None]
elif self._woodbury_chol.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
for i in range(var.shape[1]):
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
var[:, i] = (Kxx - np.square(tmp).sum(0))
var = var
return mu, var

View file

@ -50,6 +50,5 @@ class InferenceXTestCase(unittest.TestCase):
x, mi = m.infer_newX(m.Y, optimize=True) x, mi = m.infer_newX(m.Y, optimize=True)
np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2) np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View file

@ -22,6 +22,44 @@ class MiscTests(unittest.TestCase):
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
m.predict(m.X) m.predict(m.X)
def test_raw_predict_numerical_stability(self):
"""
Test whether the predicted variance of normal GP goes negative under numerical unstable situation.
Thanks simbartonels@github for reporting the bug and providing the following example.
"""
# set seed for reproducability
np.random.seed(3)
# Definition of the Branin test function
def branin(X):
y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2
y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10
return(y)
# Training set defined as a 5*5 grid:
xg1 = np.linspace(-5,10,5)
xg2 = np.linspace(0,15,5)
X = np.zeros((xg1.size * xg2.size,2))
for i,x1 in enumerate(xg1):
for j,x2 in enumerate(xg2):
X[i+xg1.size*j,:] = [x1,x2]
Y = branin(X)[:,None]
# Fit a GP
# Create an exponentiated quadratic plus bias covariance function
k = GPy.kern.RBF(input_dim=2, ARD = True)
# Build a GP model
m = GPy.models.GPRegression(X,Y,k)
# fix the noise variance
m.likelihood.variance.fix(1e-5)
# Randomize the model and optimize
m.randomize()
m.optimize()
# Compute the mean of model prediction on 1e5 Monte Carlo samples
Xp = np.random.uniform(size=(1e5,2))
Xp[:,0] = Xp[:,0]*15-5
Xp[:,1] = Xp[:,1]*15
_, var = m.predict(Xp)
self.assertTrue(np.all(var>=0.))
def test_raw_predict(self): def test_raw_predict(self):
k = GPy.kern.RBF(1) k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k) m = GPy.models.GPRegression(self.X, self.Y, kernel=k)