Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-07-26 17:11:06 +02:00 · 2015-08-20 17:33:56 +01:00 · 2015-08-20 17:33:56 +01:00 · 0e01474aec
commit 0e01474aec
parent 1d2b771e09 0d995a56b2
14 changed files with 217 additions and 96 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -17,7 +17,7 @@ before_install:
  - sudo ln -s /run/shm /dev/shm

 install:
-  - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
+  - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose
  #- pip install . 
  - python setup.py build_ext --inplace
  #--use-mirrors
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -183,7 +183,7 @@ class GP(Model):
        """
        return self._log_marginal_likelihood

-    def _raw_predict(self, _Xnew, full_cov=False, kern=None):
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
        """
        For making predictions, does not account for normalization or likelihood

@ -199,24 +199,30 @@ class GP(Model):
        if kern is None:
            kern = self.kern

-        Kx = kern.K(_Xnew, self.X).T
-        WiKx = np.dot(self.posterior.woodbury_inv, Kx)
+        Kx = kern.K(self.X, Xnew)
        mu = np.dot(Kx.T, self.posterior.woodbury_vector)
        if full_cov:
-            Kxx = kern.K(_Xnew)
-            var = Kxx - np.dot(Kx.T, WiKx)
+            Kxx = kern.K(Xnew)
+            if self.posterior.woodbury_inv.ndim == 2:
+                var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
+            elif self.posterior.woodbury_inv.ndim == 3:
+                var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
+                for i in range(var.shape[2]):
+                    var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
+            var = var
        else:
-            Kxx = kern.Kdiag(_Xnew)
-            var = Kxx - np.sum(WiKx*Kx, 0)
-            var = var.reshape(-1, 1)
-            var[var<0.] = 0.
+            Kxx = kern.Kdiag(Xnew)
+            if self.posterior.woodbury_inv.ndim == 2:
+                var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
+            elif self.posterior.woodbury_inv.ndim == 3:
+                var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
+                for i in range(var.shape[1]):
+                    var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
+            var = var
+        #add in the mean function
+        if self.mean_function is not None:
+            mu += self.mean_function.f(Xnew)

-        #force mu to be a column vector
-        if len(mu.shape)==1: mu = mu[:,None]
-
-        #add the mean function in
-        if not self.mean_function is None:
-            mu += self.mean_function.f(_Xnew)
        return mu, var

    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -249,7 +255,7 @@ class GP(Model):
        mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
        return mean, var

-    def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
+    def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
        """
        Get the predictive quantiles around the prediction at X

@ -257,10 +263,12 @@ class GP(Model):
        :type X: np.ndarray (Xnew x self.input_dim)
        :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
        :type quantiles: tuple
+        :param kern: optional kernel to use for prediction
+        :type predict_kw: dict
        :returns: list of quantiles for each X and predictive quantiles for interval combination
        :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
        """
-        m, v = self._raw_predict(X,  full_cov=False)
+        m, v = self._raw_predict(X,  full_cov=False, kern=kern)
        if self.normalizer is not None:
            m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
        return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
 from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
-#from .svgp import SVGP
+from .var_gauss import VarGauss

 # class FullLatentFunctionData(object):
 #
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -171,7 +171,9 @@ class Laplace(LatentFunctionInference):
        #define the objective function (to be maximised)
        def obj(Ki_f, f):
            ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
+            print ll
            if np.isnan(ll):
+                import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
                return -np.inf
            else:
                return ll
--- a/GPy/inference/latent_function_inference/var_gauss.py
+++ b/GPy/inference/latent_function_inference/var_gauss.py
@ -0,0 +1,66 @@
+# Copyright (c) 2015, James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+import numpy as np
+from ...util.linalg import pdinv
+from .posterior import Posterior
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+class VarGauss(LatentFunctionInference):
+    """
+    The Variational Gaussian Approximation revisited
+
+    @article{Opper:2009,
+        title = {The Variational Gaussian Approximation Revisited},
+        author = {Opper, Manfred and Archambeau, C{\'e}dric},
+        journal = {Neural Comput.},
+        year = {2009},
+        pages = {786--792},
+    }
+    """
+    def __init__(self, alpha, beta):
+        """
+        :param alpha: GPy.core.Param varational parameter
+        :param beta: GPy.core.Param varational parameter
+        """
+        self.alpha, self.beta = alpha, beta
+
+    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
+        if mean_function is not None:
+            raise NotImplementedError
+        num_data, output_dim = Y.shape
+        assert output_dim ==1, "Only one output supported"
+
+        K = kern.K(X)
+        m = K.dot(self.alpha)
+        KB = K*self.beta[:, None]
+        BKB = KB*self.beta[None, :]
+        A = np.eye(num_data) + BKB
+        Ai, LA, _, Alogdet = pdinv(A)
+        Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :]  # posterior coavairance: need full matrix for gradients
+        var = np.diag(Sigma).reshape(-1,1)
+
+        F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
+        if dF_dthetaL is not None:
+            dL_dthetaL = dF_dthetaL.sum(1).sum(1)
+        else:
+            dL_dthetaL = np.array([])
+        dF_da = np.dot(K, dF_dm)
+        SigmaB = Sigma*self.beta
+        dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
+        KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
+        dKL_da = m
+        A_A2 = Ai - Ai.dot(Ai)
+        dKL_db = np.diag(np.dot(KB.T, A_A2))
+        log_marginal = F.sum() - KL
+        self.alpha.gradient = dF_da - dKL_da
+        self.beta.gradient = dF_db - dKL_db
+
+        # K-gradients
+        dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
+        tmp = Ai*self.beta[:, None]/self.beta[None, :]
+        dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
+
+        return Posterior(mean=m, cov=Sigma ,K=K),\
+               log_marginal,\
+               {'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -94,7 +94,7 @@ class Coregionalize(Kern):
            dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)


-        dkappa = np.diag(dL_dK_small)
+        dkappa = np.diag(dL_dK_small).copy()
        dL_dK_small += dL_dK_small.T
        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)

--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
                gh_x, gh_w = gh_points


+            gh_w = gh_w / np.sqrt(np.pi)
            shape = m.shape
            m,v,Y = m.flatten(), v.flatten(), Y.flatten()
            Ysign = np.where(Y==1,1,-1)
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -315,6 +315,9 @@ class Gaussian(Likelihood):
        return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v

    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
+        if not isinstance(self.gp_link, link_functions.Identity):
+            return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
+
        lik_var = float(self.variance)
        F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
        dF_dmu = (Y - m)/lik_var
--- a/GPy/models/gp_var_gauss.py
+++ b/GPy/models/gp_var_gauss.py
@ -2,19 +2,16 @@
 # Distributed under the terms of the GNU General public License, see LICENSE.txt

 import numpy as np
-from scipy import stats
-from scipy.special import erf
-from ..core.model import Model
+from ..core import GP
 from ..core.parameterization import ObsAr
 from .. import kern
 from ..core.parameterization.param import Param
-from ..util.linalg import pdinv
-from ..likelihoods import Gaussian
+from ..inference.latent_function_inference import VarGauss

 log_2_pi = np.log(2*np.pi)


-class GPVariationalGaussianApproximation(Model):
+class GPVariationalGaussianApproximation(GP):
    """
    The Variational Gaussian Approximation revisited

@ -26,70 +23,15 @@ class GPVariationalGaussianApproximation(Model):
        pages = {786--792},
    }
    """
-    def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
-        Model.__init__(self,'Variational GP')
-        if likelihood is None:
-            likelihood = Gaussian()
-        # accept the construction arguments
-        self.X = ObsAr(X)
-        self.Y = Y
-        self.num_data, self.input_dim = self.X.shape
-        self.Y_metadata = Y_metadata
+    def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):

-        self.kern = kernel
-        self.likelihood = likelihood
-        self.link_parameter(self.kern)
-        self.link_parameter(self.likelihood)
+        num_data = Y.shape[0]
+        self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
+        self.beta = Param('beta', np.ones(num_data))
+
+        inf = VarGauss(self.alpha, self.beta)
+        super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)

-        self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
-        self.beta = Param('beta', np.ones(self.num_data))
        self.link_parameter(self.alpha)
        self.link_parameter(self.beta)

-    def log_likelihood(self):
-        return self._log_lik
-
-    def parameters_changed(self):
-        K = self.kern.K(self.X)
-        m = K.dot(self.alpha)
-        KB = K*self.beta[:, None]
-        BKB = KB*self.beta[None, :]
-        A = np.eye(self.num_data) + BKB
-        Ai, LA, _, Alogdet = pdinv(A)
-        Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :]  # posterior coavairance: need full matrix for gradients
-        var = np.diag(Sigma).reshape(-1,1)
-
-        F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
-        self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
-        dF_da = np.dot(K, dF_dm)
-        SigmaB = Sigma*self.beta
-        dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
-        KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
-        dKL_da = m
-        A_A2 = Ai - Ai.dot(Ai)
-        dKL_db = np.diag(np.dot(KB.T, A_A2))
-        self._log_lik = F.sum() - KL
-        self.alpha.gradient = dF_da - dKL_da
-        self.beta.gradient = dF_db - dKL_db
-
-        # K-gradients
-        dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
-        tmp = Ai*self.beta[:, None]/self.beta[None, :]
-        dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
-        self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
-
-    def _raw_predict(self, Xnew):
-        """
-        Predict the function(s) at the new point(s) Xnew.
-
-        :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.input_dim
-        """
-        Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
-        Kux = self.kern.K(self.X, Xnew)
-        mu = np.dot(Kux.T, self.alpha)
-        WiKux = np.dot(Wi, Kux)
-        Kxx = self.kern.Kdiag(Xnew)
-        var = Kxx - np.sum(WiKux*Kux, 0)
-
-        return mu, var.reshape(-1,1)
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -110,7 +110,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
                else:
                    Y_metadata['output_index'] = extra_data
            m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
-            lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
+            fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
+            lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)


        for d in which_data_ycols:
--- a/GPy/testing/inference_tests.py
+++ b/GPy/testing/inference_tests.py
@ -8,7 +8,7 @@ The test cases for various inference algorithms
 import unittest, itertools
 import numpy as np
 import GPy
-
+#np.seterr(invalid='raise')

 class InferenceXTestCase(unittest.TestCase):

--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -9,8 +9,7 @@ import inspect
 from GPy.likelihoods import link_functions
 from GPy.core.parameterization import Param
 from functools import partial
-#np.random.seed(300)
-#np.random.seed(4)
+fixed_seed = 7

 #np.seterr(divide='raise')
 def dparam_partial(inst_func, *args):
@ -105,6 +104,7 @@ class TestNoiseModels(object):
    Generic model checker
    """
    def setUp(self):
+        np.random.seed(fixed_seed)
        self.N = 15
        self.D = 3
        self.X = np.random.rand(self.N, self.D)*10
@ -218,7 +218,8 @@ class TestNoiseModels(object):
                    "constraints": [(".*variance", self.constrain_positive)]
                },
                "laplace": True,
-                "ep": False # FIXME: Should be True when we have it working again
+                "ep": False, # FIXME: Should be True when we have it working again
+                "variational_expectations": True,
            },
            "Gaussian_log": {
                "model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
@ -227,7 +228,8 @@ class TestNoiseModels(object):
                    "vals": [self.var],
                    "constraints": [(".*variance", self.constrain_positive)]
                },
-                "laplace": True
+                "laplace": True,
+                "variational_expectations": True
            },
            #"Gaussian_probit": {
            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
@ -252,7 +254,8 @@ class TestNoiseModels(object):
                "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
                "laplace": True,
                "Y": self.binary_Y,
-                "ep": False # FIXME: Should be True when we have it working again
+                "ep": False, # FIXME: Should be True when we have it working again
+                "variational_expectations": True
            },
            "Exponential_default": {
                "model": GPy.likelihoods.Exponential(),
@ -347,6 +350,10 @@ class TestNoiseModels(object):
                ep = attributes["ep"]
            else:
                ep = False
+            if "variational_expectations" in attributes:
+                var_exp = attributes["variational_expectations"]
+            else:
+                var_exp = False

            #if len(param_vals) > 1:
                #raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -377,6 +384,11 @@ class TestNoiseModels(object):
            if ep:
                #ep likelihood gradcheck
                yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
+            if var_exp:
+                #Need to specify mu and var!
+                yield self.t_varexp, model, Y, Y_metadata
+                yield self.t_dexp_dmu, model, Y, Y_metadata
+                yield self.t_dexp_dvar, model, Y, Y_metadata


        self.tearDown()
@ -603,6 +615,87 @@ class TestNoiseModels(object):
        print(m)
        assert m.checkgrad(verbose=1, step=step)

+    ################
+    # variational expectations #
+    ################
+    @with_setup(setUp, tearDown)
+    def t_varexp(self, model, Y, Y_metadata):
+        #Test that the analytic implementation (if it exists) matches the generic gauss
+        #hermite implementation
+        print("\n{}".format(inspect.stack()[0][3]))
+        #Make mu and var (marginal means and variances of q(f)) draws from a GP
+        k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
+        L = GPy.util.linalg.jitchol(k)
+        mu = L.dot(np.random.randn(*Y.shape))
+        #Variance must be positive
+        var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
+
+        expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
+
+        #Implementation of gauss hermite integration
+        shape = mu.shape
+        gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
+        m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
+        #make a grid of points
+        X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
+        #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
+        # broadcast needs to be handled carefully.
+        logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
+        #average over the gird to get derivatives of the Gaussian's parameters
+        #division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
+        expectation_gh  = np.dot(logp, gh_w)/np.sqrt(np.pi)
+        expectation_gh = expectation_gh.reshape(*shape)
+
+        np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
+
+    @with_setup(setUp, tearDown)
+    def t_dexp_dmu(self, model, Y, Y_metadata):
+        print("\n{}".format(inspect.stack()[0][3]))
+        #Make mu and var (marginal means and variances of q(f)) draws from a GP
+        k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
+        L = GPy.util.linalg.jitchol(k)
+        mu = L.dot(np.random.randn(*Y.shape))
+        #Variance must be positive
+        var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
+        expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
+
+        #Function to get the nth returned value
+        def F(mu):
+            return expectation(m=mu)[0]
+        def dmu(mu):
+            return expectation(m=mu)[1]
+
+        grad = GradientChecker(F, dmu, mu.copy(), 'm')
+
+        grad.randomize()
+        print(grad)
+        print(model)
+        assert grad.checkgrad(verbose=1)
+
+    @with_setup(setUp, tearDown)
+    def t_dexp_dvar(self, model, Y, Y_metadata):
+        print("\n{}".format(inspect.stack()[0][3]))
+        #Make mu and var (marginal means and variances of q(f)) draws from a GP
+        k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
+        L = GPy.util.linalg.jitchol(k)
+        mu = L.dot(np.random.randn(*Y.shape))
+        #Variance must be positive
+        var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
+        expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
+
+        #Function to get the nth returned value
+        def F(var):
+            return expectation(v=var)[0]
+        def dvar(var):
+            return expectation(v=var)[2]
+
+        grad = GradientChecker(F, dvar, var.copy(), 'v')
+
+        self.constrain_positive('v', grad)
+        #grad.randomize()
+        print(grad)
+        print(model)
+        assert grad.checkgrad(verbose=1)

 class LaplaceTests(unittest.TestCase):
    """
@ -610,6 +703,7 @@ class LaplaceTests(unittest.TestCase):
    """

    def setUp(self):
+        np.random.seed(fixed_seed)
        self.N = 15
        self.D = 1
        self.X = np.random.rand(self.N, self.D)*10
--- a/GPy/testing/misc_tests.py
+++ b/GPy/testing/misc_tests.py
@ -13,10 +13,13 @@ class MiscTests(np.testing.TestCase):

    def test_safe_exp_upper(self):
        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter('always')  # always print
            assert np.isfinite(np.exp(self._lim_val_exp))
            assert np.isinf(np.exp(self._lim_val_exp + 1))
            assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))

+            print w
+            print len(w)
            assert len(w)==1 # should have one overflow warning

    def test_safe_exp_lower(self):
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -509,7 +509,8 @@ class GradientTests(np.testing.TestCase):
        X = X[:, None]
        Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
        kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
-        m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
+        lik = GPy.likelihoods.Gaussian()
+        m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
        self.assertTrue(m.checkgrad())