diff --git a/GPy/core/gp.py b/GPy/core/gp.py index aeee34ac..ddef0647 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -183,7 +183,7 @@ class GP(Model): """ return self._log_marginal_likelihood - def _raw_predict(self, _Xnew, full_cov=False, kern=None): + def _raw_predict(self, Xnew, full_cov=False, kern=None): """ For making predictions, does not account for normalization or likelihood @@ -199,24 +199,30 @@ class GP(Model): if kern is None: kern = self.kern - Kx = kern.K(_Xnew, self.X).T - WiKx = np.dot(self.posterior.woodbury_inv, Kx) + Kx = kern.K(self.X, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = kern.K(_Xnew) - var = Kxx - np.dot(Kx.T, WiKx) + Kxx = kern.K(Xnew) + if self.posterior.woodbury_inv.ndim == 2: + var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) + elif self.posterior.woodbury_inv.ndim == 3: + var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) + for i in range(var.shape[2]): + var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) + var = var else: - Kxx = kern.Kdiag(_Xnew) - var = Kxx - np.sum(WiKx*Kx, 0) - var = var.reshape(-1, 1) - var[var<0.] = 0. + Kxx = kern.Kdiag(Xnew) + if self.posterior.woodbury_inv.ndim == 2: + var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None] + elif self.posterior.woodbury_inv.ndim == 3: + var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2])) + for i in range(var.shape[1]): + var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0))) + var = var + #add in the mean function + if self.mean_function is not None: + mu += self.mean_function.f(Xnew) - #force mu to be a column vector - if len(mu.shape)==1: mu = mu[:,None] - - #add the mean function in - if not self.mean_function is None: - mu += self.mean_function.f(_Xnew) return mu, var def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None): diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index f927aba4..7bdd6ff2 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC from .dtc import DTC from .fitc import FITC from .var_dtc_parallel import VarDTC_minibatch -#from .svgp import SVGP +from .var_gauss import VarGauss # class FullLatentFunctionData(object): # diff --git a/GPy/models/gp_var_gauss.py b/GPy/models/gp_var_gauss.py index 9f2229f0..ca68ed14 100644 --- a/GPy/models/gp_var_gauss.py +++ b/GPy/models/gp_var_gauss.py @@ -2,19 +2,16 @@ # Distributed under the terms of the GNU General public License, see LICENSE.txt import numpy as np -from scipy import stats -from scipy.special import erf -from ..core.model import Model +from ..core import GP from ..core.parameterization import ObsAr from .. import kern from ..core.parameterization.param import Param -from ..util.linalg import pdinv -from ..likelihoods import Gaussian +from ..inference.latent_function_inference import VarGauss log_2_pi = np.log(2*np.pi) -class GPVariationalGaussianApproximation(Model): +class GPVariationalGaussianApproximation(GP): """ The Variational Gaussian Approximation revisited @@ -26,71 +23,15 @@ class GPVariationalGaussianApproximation(Model): pages = {786--792}, } """ - def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None): - Model.__init__(self,'Variational GP') - if likelihood is None: - likelihood = Gaussian() - # accept the construction arguments - self.X = ObsAr(X) - self.Y = Y - self.num_data, self.input_dim = self.X.shape - self.Y_metadata = Y_metadata + def __init__(self, X, Y, kernel, likelihood, Y_metadata=None): - self.kern = kernel - self.likelihood = likelihood - self.link_parameter(self.kern) - self.link_parameter(self.likelihood) + num_data = Y.shape[0] + self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now. + self.beta = Param('beta', np.ones(num_data)) + + inf = VarGauss(self.alpha, self.beta) + super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf) - self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now. - self.beta = Param('beta', np.ones(self.num_data)) self.link_parameter(self.alpha) self.link_parameter(self.beta) - def log_likelihood(self): - return self._log_lik - - def parameters_changed(self): - K = self.kern.K(self.X) - m = K.dot(self.alpha) - KB = K*self.beta[:, None] - BKB = KB*self.beta[None, :] - A = np.eye(self.num_data) + BKB - Ai, LA, _, Alogdet = pdinv(A) - Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients - var = np.diag(Sigma).reshape(-1,1) - - F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata) - if dF_dthetaL is not None: - self.likelihood.gradient = dF_dthetaL.sum(1).sum(1) - dF_da = np.dot(K, dF_dm) - SigmaB = Sigma*self.beta - dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2 - KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha)) - dKL_da = m - A_A2 = Ai - Ai.dot(Ai) - dKL_db = np.diag(np.dot(KB.T, A_A2)) - self._log_lik = F.sum() - KL - self.alpha.gradient = dF_da - dKL_da - self.beta.gradient = dF_db - dKL_db - - # K-gradients - dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2) - tmp = Ai*self.beta[:, None]/self.beta[None, :] - dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T) - self.kern.update_gradients_full(dF_dK - dKL_dK, self.X) - - def _raw_predict(self, Xnew): - """ - Predict the function(s) at the new point(s) Xnew. - - :param Xnew: The points at which to make a prediction - :type Xnew: np.ndarray, Nnew x self.input_dim - """ - Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2)) - Kux = self.kern.K(self.X, Xnew) - mu = np.dot(Kux.T, self.alpha) - WiKux = np.dot(Wi, Kux) - Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(WiKux*Kux, 0) - - return mu, var.reshape(-1,1) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index f7cacb13..06e197f8 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -509,7 +509,8 @@ class GradientTests(np.testing.TestCase): X = X[:, None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None] kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) - m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern) + lik = GPy.likelihoods.Gaussian() + m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik) self.assertTrue(m.checkgrad())