mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-11 13:02:38 +02:00
fix the issue of negative prediction variance of normal GP
This commit is contained in:
parent
5f417565fb
commit
8b279175c5
6 changed files with 84 additions and 5 deletions
|
|
@ -212,6 +212,12 @@ class GP(Model):
|
||||||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||||
"""
|
"""
|
||||||
|
if hasattr(self.posterior, '_raw_predict'):
|
||||||
|
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
|
||||||
|
if self.mean_function is not None:
|
||||||
|
mu += self.mean_function.f(Xnew)
|
||||||
|
return mu, var
|
||||||
|
|
||||||
if kern is None:
|
if kern is None:
|
||||||
kern = self.kern
|
kern = self.kern
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,11 @@ class SparseGP(GP):
|
||||||
covariance changes, so if full_cov is False (standard), we return the variance
|
covariance changes, so if full_cov is False (standard), we return the variance
|
||||||
for each dimension [NxD].
|
for each dimension [NxD].
|
||||||
"""
|
"""
|
||||||
|
if hasattr(self.posterior, '_raw_predict'):
|
||||||
|
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
|
||||||
|
if self.mean_function is not None:
|
||||||
|
mu += self.mean_function.f(Xnew)
|
||||||
|
return mu, var
|
||||||
|
|
||||||
if kern is None: kern = self.kern
|
if kern is None: kern = self.kern
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,13 @@
|
||||||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from .posterior import Posterior
|
from .posterior import PosteriorExact as Posterior
|
||||||
from ...util.linalg import pdinv, dpotrs, tdot
|
from ...util.linalg import pdinv, dpotrs, tdot
|
||||||
from ...util import diag
|
from ...util import diag
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from . import LatentFunctionInference
|
from . import LatentFunctionInference
|
||||||
log_2_pi = np.log(2*np.pi)
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
|
||||||
class ExactGaussianInference(LatentFunctionInference):
|
class ExactGaussianInference(LatentFunctionInference):
|
||||||
"""
|
"""
|
||||||
An object for inference when the likelihood is Gaussian.
|
An object for inference when the likelihood is Gaussian.
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
|
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
|
||||||
|
|
||||||
class Posterior(object):
|
class Posterior(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -187,3 +187,35 @@ class Posterior(object):
|
||||||
if self._K_chol is None:
|
if self._K_chol is None:
|
||||||
self._K_chol = jitchol(self._K)
|
self._K_chol = jitchol(self._K)
|
||||||
return self._K_chol
|
return self._K_chol
|
||||||
|
|
||||||
|
class PosteriorExact(Posterior):
|
||||||
|
|
||||||
|
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
|
||||||
|
|
||||||
|
Kx = kern.K(pred_var, Xnew)
|
||||||
|
mu = np.dot(Kx.T, self.woodbury_vector)
|
||||||
|
if len(mu.shape)==1:
|
||||||
|
mu = mu.reshape(-1,1)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = kern.K(Xnew)
|
||||||
|
if self._woodbury_chol.ndim == 2:
|
||||||
|
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
|
||||||
|
var = Kxx - tdot(tmp.T)
|
||||||
|
elif self._woodbury_chol.ndim == 3: # Missing data
|
||||||
|
var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
|
||||||
|
for i in range(var.shape[2]):
|
||||||
|
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
|
||||||
|
var[:, :, i] = (Kxx - tdot(tmp.T))
|
||||||
|
var = var
|
||||||
|
else:
|
||||||
|
Kxx = kern.Kdiag(Xnew)
|
||||||
|
if self._woodbury_chol.ndim == 2:
|
||||||
|
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
|
||||||
|
var = (Kxx - np.square(tmp).sum(0))[:,None]
|
||||||
|
elif self._woodbury_chol.ndim == 3: # Missing data
|
||||||
|
var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
|
||||||
|
for i in range(var.shape[1]):
|
||||||
|
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
|
||||||
|
var[:, i] = (Kxx - np.square(tmp).sum(0))
|
||||||
|
var = var
|
||||||
|
return mu, var
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,5 @@ class InferenceXTestCase(unittest.TestCase):
|
||||||
x, mi = m.infer_newX(m.Y, optimize=True)
|
x, mi = m.infer_newX(m.Y, optimize=True)
|
||||||
np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
|
np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,44 @@ class MiscTests(unittest.TestCase):
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
m.predict(m.X)
|
m.predict(m.X)
|
||||||
|
|
||||||
|
def test_raw_predict_numerical_stability(self):
|
||||||
|
"""
|
||||||
|
Test whether the predicted variance of normal GP goes negative under numerical unstable situation.
|
||||||
|
Thanks simbartonels@github for reporting the bug and providing the following example.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# set seed for reproducability
|
||||||
|
np.random.seed(3)
|
||||||
|
# Definition of the Branin test function
|
||||||
|
def branin(X):
|
||||||
|
y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2
|
||||||
|
y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10
|
||||||
|
return(y)
|
||||||
|
# Training set defined as a 5*5 grid:
|
||||||
|
xg1 = np.linspace(-5,10,5)
|
||||||
|
xg2 = np.linspace(0,15,5)
|
||||||
|
X = np.zeros((xg1.size * xg2.size,2))
|
||||||
|
for i,x1 in enumerate(xg1):
|
||||||
|
for j,x2 in enumerate(xg2):
|
||||||
|
X[i+xg1.size*j,:] = [x1,x2]
|
||||||
|
Y = branin(X)[:,None]
|
||||||
|
# Fit a GP
|
||||||
|
# Create an exponentiated quadratic plus bias covariance function
|
||||||
|
k = GPy.kern.RBF(input_dim=2, ARD = True)
|
||||||
|
# Build a GP model
|
||||||
|
m = GPy.models.GPRegression(X,Y,k)
|
||||||
|
# fix the noise variance
|
||||||
|
m.likelihood.variance.fix(1e-5)
|
||||||
|
# Randomize the model and optimize
|
||||||
|
m.randomize()
|
||||||
|
m.optimize()
|
||||||
|
# Compute the mean of model prediction on 1e5 Monte Carlo samples
|
||||||
|
Xp = np.random.uniform(size=(1e5,2))
|
||||||
|
Xp[:,0] = Xp[:,0]*15-5
|
||||||
|
Xp[:,1] = Xp[:,1]*15
|
||||||
|
_, var = m.predict(Xp)
|
||||||
|
self.assertTrue(np.all(var>=0.))
|
||||||
|
|
||||||
def test_raw_predict(self):
|
def test_raw_predict(self):
|
||||||
k = GPy.kern.RBF(1)
|
k = GPy.kern.RBF(1)
|
||||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue