commiting code for some changes to api for calculating ep_gradients, also fixing some issues with gaussian hermite quadrature, no we have both avaialable ...

2026-05-11 04:52:37 +02:00 · 2017-07-03 18:24:02 +03:00 · 2017-07-03 18:24:02 +03:00 · 8b621a409c
commit 8b621a409c
parent ae27e1225d
3 changed files with 37 additions and 54 deletions
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -179,14 +179,14 @@ class EP(EPBase, ExactGaussianInference):
        elif self.ep_mode=="alternated":
            if getattr(self, '_ep_approximation', None) is None:
                #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
-                post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
+                post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
            else:
                #if we've already run EP, just use the existing approximation stored in self._ep_approximation
-                post_params, ga_approx, log_Z_tilde = self._ep_approximation
+                post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
        else:
            raise ValueError("ep_mode value not valid")
-        return self._inference(Y, K, ga_approx, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)
+        return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)
    def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -221,7 +221,7 @@ class EP(EPBase, ExactGaussianInference):
        # This terms cancel with the coreresponding terms in the marginal loglikelihood
        log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
                         # - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
-        return (post_params, ga_approx, log_Z_tilde)
+        return (post_params, ga_approx, cav_params, log_Z_tilde)
    def _init_approximations(self, K, num_data):
        #initial values - Gaussian factors
@ -281,7 +281,7 @@ class EP(EPBase, ExactGaussianInference):
        return log_marginal, post_params
-    def _inference(self, Y, K, ga_approx, likelihood, Z_tilde, Y_metadata=None):
+    def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
        log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
        tau_tilde_root = np.sqrt(ga_approx.tau)
@ -294,10 +294,7 @@ class EP(EPBase, ExactGaussianInference):
        symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
        dL_dK = 0.5 * (tdot(alpha) - Wi)
-        if not isinstance(likelihood, Gaussian):
+        dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
            dL_dthetaL = likelihood.ep_gradients(Y, ga_approx.tau, ga_approx.v, Y_metadata=Y_metadata)
        else:
            dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -57,7 +57,7 @@ class Gaussian(Likelihood):
    def update_gradients(self, grad):
        self.variance.gradient = grad
-    def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
+    def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag,Y_metadata=None):
        return dL_dKdiag.sum()
    def _preprocess_values(self, Y):
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -227,10 +227,10 @@ class Likelihood(Parameterized):
            self.__gh_points = np.polynomial.hermite.hermgauss(T)
        return self.__gh_points
-    def ep_gradients(self, Y, tau, v, Y_metadata=None, gh_points=None, approx=False, boost_grad=1.):
+    def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
        if self.size > 0:
            shape = Y.shape
-            tau,v,Y = tau.flatten(), v.flatten(),Y.flatten()
+            tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
            mu = v/tau
            sigma2 = 1./tau
@ -245,14 +245,19 @@ class Likelihood(Parameterized):
                        Y_metadata_i[key] = Y_metadata[key][index,:]
                    Y_metadata_list.append(Y_metadata_i)
-                val = self.site_derivatives_ep(Y[index], tau[index], v[index], Y_metadata_i)
+            if quad_mode == 'gk':
-                dlik_dtheta[:, index] = val.ravel()
+                f = partial(self.integrate_gk)
-
+                quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
-            f = partial(self.integrate)
+                quads = np.vstack(quads)
-            quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
+                quads.reshape(self.size, shape[0], shape[1])
-            quads = np.vstack(quads)
+            elif quad_mode == 'gh':
-            quads.reshape(self.size, shape[0], shape[1])
+                f = partial(self.integrate_gh)
-
+                quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
                quads = np.hstack(quads)
                quads = quads.T
            else:
                raise Exception("no other quadrature mode available")
            #     do a gaussian-hermite integration
            dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
            dL_dtheta = boost_grad * np.nansum(quads, axis=1)
            # dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
@ -261,23 +266,23 @@ class Likelihood(Parameterized):
        return dL_dtheta
-    def integrate(self, Y, mu, sigma):
+    def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
        # gaussian-kronrod integration.
        fmin = -np.inf
        fmax = np.inf
        SQRT_2PI = np.sqrt(2.*np.pi)
        def generate_integral(f):
-            a = np.exp(self.logpdf_link(f, Y)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
+            a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
                SQRT_2PI * sigma)
-            fn1 = a * self.dlogpdf_dtheta(f, Y)
+            fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
            fn = fn1
            return fn
        dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
        return dF_dtheta_i
-
+    def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
-
+        # gaussian-hermite quadrature.
    def site_derivatives_ep(self,obs,tau,v,Y_metadata_i=None, approx=False, gh_points=None):
        # "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
        # and the expectation is over the exact marginal posterior, which is not gaussian- and is
        # unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
@ -288,42 +293,23 @@ class Likelihood(Parameterized):
        # "writing it explicitly "
        # use them for gaussian-hermite quadrature
        mu = v/tau
        sigma2 = 1./tau
        sigma = np.sqrt(sigma2)
        # yc = 1 - Y_metadata_i['censored']
        fmin = -np.inf
        fmax = np.inf
        SQRT_2PI = np.sqrt(2.*np.pi)
        def get_integral_limits(obs, tau, v, yc):
            # find the mode fi, fmin, and fmax - limit the range of integration to gather better weights
            pass
        if gh_points is None:
            gh_x, gh_w = self._gh_points(32)
        else:
            gh_x, gh_w = gh_points
-        X = gh_x[None,:]*np.sqrt(2.*sigma2) + mu
+        X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
        # X = gh_x[None,:]
-        logp = self.logpdf(X, obs, Y_metadata=Y_metadata_i)
+        # Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
-        dlogp_dtheta = self.dlogpdf_dtheta(X, obs, Y_metadata=Y_metadata_i)
+        a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
        a = a.repeat(self.num_params,0)
        b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
        old_shape = b.shape
        fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
        fn = fn.reshape(old_shape)
-
+        dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
        if not approx:
            def generate_integral(x):
                a = np.exp(self.logpdf_link(x, obs, Y_metadata=Y_metadata_i)) * np.exp(-0.5 * np.square((x - mu) / sigma)) / (
                SQRT_2PI * sigma)
                fn1 = a * self.dlogpdf_dtheta(x, obs, Y_metadata=Y_metadata_i)
                fn =  fn1
                return fn
            dF_dtheta_i =  quadgk_int(generate_integral, fmin=fmin,fmax=fmax)
            return dF_dtheta_i
        dF_dtheta_i = np.dot(dlogp_dtheta, gh_w)/np.sqrt(np.pi)
        return dF_dtheta_i
    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):