diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
index 20e6ea3d..80d38c04 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -179,14 +179,14 @@ class EP(EPBase, ExactGaussianInference):
         elif self.ep_mode=="alternated":
             if getattr(self, '_ep_approximation', None) is None:
                 #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
-                post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
+                post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
             else:
                 #if we've already run EP, just use the existing approximation stored in self._ep_approximation
-                post_params, ga_approx, log_Z_tilde = self._ep_approximation
+                post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
         else:
             raise ValueError("ep_mode value not valid")
 
-        return self._inference(Y, K, ga_approx, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)
+        return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)
 
     def expectation_propagation(self, K, Y, likelihood, Y_metadata):
 
@@ -221,7 +221,7 @@ class EP(EPBase, ExactGaussianInference):
         # This terms cancel with the coreresponding terms in the marginal loglikelihood
         log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
                          # - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
-        return (post_params, ga_approx, log_Z_tilde)
+        return (post_params, ga_approx, cav_params, log_Z_tilde)
 
     def _init_approximations(self, K, num_data):
         #initial values - Gaussian factors
@@ -281,7 +281,7 @@ class EP(EPBase, ExactGaussianInference):
 
         return log_marginal, post_params
 
-    def _inference(self, Y, K, ga_approx, likelihood, Z_tilde, Y_metadata=None):
+    def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
         log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
 
         tau_tilde_root = np.sqrt(ga_approx.tau)
@@ -294,10 +294,7 @@ class EP(EPBase, ExactGaussianInference):
         symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
 
         dL_dK = 0.5 * (tdot(alpha) - Wi)
-        if not isinstance(likelihood, Gaussian):
-            dL_dthetaL = likelihood.ep_gradients(Y, ga_approx.tau, ga_approx.v, Y_metadata=Y_metadata)
-        else:
-            dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
+        dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
         return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
 
 
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 533c6558..f0b7a315 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -57,7 +57,7 @@ class Gaussian(Likelihood):
     def update_gradients(self, grad):
         self.variance.gradient = grad
 
-    def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
+    def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag,Y_metadata=None):
         return dL_dKdiag.sum()
 
     def _preprocess_values(self, Y):
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index d59cffdd..308c6a76 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -227,10 +227,10 @@ class Likelihood(Parameterized):
             self.__gh_points = np.polynomial.hermite.hermgauss(T)
         return self.__gh_points
 
-    def ep_gradients(self, Y, tau, v, Y_metadata=None, gh_points=None, approx=False, boost_grad=1.):
+    def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
         if self.size > 0:
             shape = Y.shape
-            tau,v,Y = tau.flatten(), v.flatten(),Y.flatten()
+            tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
             mu = v/tau
             sigma2 = 1./tau
 
@@ -245,14 +245,19 @@ class Likelihood(Parameterized):
                         Y_metadata_i[key] = Y_metadata[key][index,:]
                     Y_metadata_list.append(Y_metadata_i)
 
-                val = self.site_derivatives_ep(Y[index], tau[index], v[index], Y_metadata_i)
-                dlik_dtheta[:, index] = val.ravel()
-
-            f = partial(self.integrate)
-            quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
-            quads = np.vstack(quads)
-            quads.reshape(self.size, shape[0], shape[1])
-
+            if quad_mode == 'gk':
+                f = partial(self.integrate_gk)
+                quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
+                quads = np.vstack(quads)
+                quads.reshape(self.size, shape[0], shape[1])
+            elif quad_mode == 'gh':
+                f = partial(self.integrate_gh)
+                quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
+                quads = np.hstack(quads)
+                quads = quads.T
+            else:
+                raise Exception("no other quadrature mode available")
+            #     do a gaussian-hermite integration
             dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
             dL_dtheta = boost_grad * np.nansum(quads, axis=1)
             # dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
@@ -261,23 +266,23 @@ class Likelihood(Parameterized):
         return dL_dtheta
 
 
-    def integrate(self, Y, mu, sigma):
+    def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
+        # gaussian-kronrod integration.
         fmin = -np.inf
         fmax = np.inf
         SQRT_2PI = np.sqrt(2.*np.pi)
         def generate_integral(f):
-            a = np.exp(self.logpdf_link(f, Y)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
+            a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
                 SQRT_2PI * sigma)
-            fn1 = a * self.dlogpdf_dtheta(f, Y)
+            fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
             fn = fn1
             return fn
 
         dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
         return dF_dtheta_i
 
-
-
-    def site_derivatives_ep(self,obs,tau,v,Y_metadata_i=None, approx=False, gh_points=None):
+    def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
+        # gaussian-hermite quadrature.
         # "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
         # and the expectation is over the exact marginal posterior, which is not gaussian- and is
         # unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
@@ -288,42 +293,23 @@ class Likelihood(Parameterized):
         # "writing it explicitly "
         # use them for gaussian-hermite quadrature
 
-        mu = v/tau
-        sigma2 = 1./tau
-        sigma = np.sqrt(sigma2)
-
-        # yc = 1 - Y_metadata_i['censored']
-        fmin = -np.inf
-        fmax = np.inf
         SQRT_2PI = np.sqrt(2.*np.pi)
-
-        def get_integral_limits(obs, tau, v, yc):
-            # find the mode fi, fmin, and fmax - limit the range of integration to gather better weights
-            pass
-
         if gh_points is None:
             gh_x, gh_w = self._gh_points(32)
         else:
             gh_x, gh_w = gh_points
 
-        X = gh_x[None,:]*np.sqrt(2.*sigma2) + mu
-        # X = gh_x[None,:]
+        X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
 
-        logp = self.logpdf(X, obs, Y_metadata=Y_metadata_i)
-        dlogp_dtheta = self.dlogpdf_dtheta(X, obs, Y_metadata=Y_metadata_i)
+        # Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
+        a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
+        a = a.repeat(self.num_params,0)
+        b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
+        old_shape = b.shape
+        fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
+        fn = fn.reshape(old_shape)
 
-
-        if not approx:
-            def generate_integral(x):
-                a = np.exp(self.logpdf_link(x, obs, Y_metadata=Y_metadata_i)) * np.exp(-0.5 * np.square((x - mu) / sigma)) / (
-                SQRT_2PI * sigma)
-                fn1 = a * self.dlogpdf_dtheta(x, obs, Y_metadata=Y_metadata_i)
-                fn =  fn1
-                return fn
-            dF_dtheta_i =  quadgk_int(generate_integral, fmin=fmin,fmax=fmax)
-            return dF_dtheta_i
-
-        dF_dtheta_i = np.dot(dlogp_dtheta, gh_w)/np.sqrt(np.pi)
+        dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
         return dF_dtheta_i
 
     def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):