the Opper-Archambeau method is now implemented as an inference method in the GPy style

This commit is contained in:
James Hensman 2015-08-18 11:39:27 +01:00
parent ea3bfbb597
commit 7ba26eda1c
4 changed files with 34 additions and 86 deletions

View file

@ -183,7 +183,7 @@ class GP(Model):
"""
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
For making predictions, does not account for normalization or likelihood
@ -199,24 +199,30 @@ class GP(Model):
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
Kx = kern.K(self.X, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
var[var<0.] = 0.
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):

View file

@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
from .dtc import DTC
from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch
#from .svgp import SVGP
from .var_gauss import VarGauss
# class FullLatentFunctionData(object):
#

View file

@ -2,19 +2,16 @@
# Distributed under the terms of the GNU General public License, see LICENSE.txt
import numpy as np
from scipy import stats
from scipy.special import erf
from ..core.model import Model
from ..core import GP
from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..likelihoods import Gaussian
from ..inference.latent_function_inference import VarGauss
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
class GPVariationalGaussianApproximation(GP):
"""
The Variational Gaussian Approximation revisited
@ -26,71 +23,15 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
num_data = Y.shape[0]
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
def log_likelihood(self):
return self._log_lik
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
if dF_dthetaL is not None:
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return mu, var.reshape(-1,1)

View file

@ -509,7 +509,8 @@ class GradientTests(np.testing.TestCase):
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
lik = GPy.likelihoods.Gaussian()
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
self.assertTrue(m.checkgrad())