diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 2835ee29..37c424ad 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -4,7 +4,7 @@
 import numpy as np
 import itertools
 from ...util.caching import Cache_this
-from .kern import CombinationKernel
+from .kern import CombinationKernel, Kern
 from functools import reduce
 
 class Add(CombinationKernel):
@@ -21,8 +21,21 @@ class Add(CombinationKernel):
                 for part in kern.parts[::-1]:
                     kern.unlink_parameter(part)
                     subkerns.insert(i, part)
-
         super(Add, self).__init__(subkerns, name)
+        self._exact_psicomp = self._check_exact_psicomp()
+        
+    def _check_exact_psicomp(self):
+        from .. import RBF,Linear,Bias,White
+        n_kerns = len(self.parts)
+        n_rbf = len([k  for k in self.parts if isinstance(k,RBF)])
+        n_linear = len([k  for k in self.parts if isinstance(k,Linear)])
+        n_bias = len([k  for k in self.parts if isinstance(k,Bias)])
+        n_white = len([k  for k in self.parts if isinstance(k,White)])
+        n_others = n_kerns - n_rbf - n_linear - n_bias - n_white
+        if n_rbf+n_linear<=1 and n_bias<=1 and n_white<=1 and n_others==0:
+            return True
+        else:
+            return False
 
     @Cache_this(limit=2, force_kwargs=['which_parts'])
     def K(self, X, X2=None, which_parts=None):
@@ -87,14 +100,17 @@ class Add(CombinationKernel):
 
     @Cache_this(limit=1, force_kwargs=['which_parts'])
     def psi0(self, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior)
         return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
 
     @Cache_this(limit=1, force_kwargs=['which_parts'])
     def psi1(self, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior)
         return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
 
     @Cache_this(limit=1, force_kwargs=['which_parts'])
     def psi2(self, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior)
         psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
         #return psi2
         # compute the "cross" terms
@@ -130,6 +146,7 @@ class Add(CombinationKernel):
 
     @Cache_this(limit=1, force_kwargs=['which_parts'])
     def psi2n(self, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior)
         psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
         #return psi2
         # compute the "cross" terms
@@ -164,6 +181,7 @@ class Add(CombinationKernel):
         return psi2
 
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
         from .static import White, Bias
         for p1 in self.parts:
             #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
@@ -180,6 +198,7 @@ class Add(CombinationKernel):
             p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
 
     def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
         from .static import White, Bias
         target = np.zeros(Z.shape)
         for p1 in self.parts:
@@ -198,6 +217,7 @@ class Add(CombinationKernel):
         return target
 
     def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
+        if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
         from .static import White, Bias
         target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
         for p1 in self.parameters:
diff --git a/GPy/kern/_src/psi_comp/gaussherm.py b/GPy/kern/_src/psi_comp/gaussherm.py
index 923b3eb0..bb44b219 100644
--- a/GPy/kern/_src/psi_comp/gaussherm.py
+++ b/GPy/kern/_src/psi_comp/gaussherm.py
@@ -13,9 +13,6 @@ from ....util.linalg import tdot
 from . import PSICOMP
 
 class PSICOMP_GH(PSICOMP):
-    """
-    TODO: support Psi2 with shape NxMxM
-    """
     
     def __init__(self, degree=5, cache_K=True):
         self.degree = degree
@@ -48,7 +45,7 @@ class PSICOMP_GH(PSICOMP):
         
         psi0 = np.zeros((N,))
         psi1 = np.zeros((N,M))
-        psi2 = np.zeros((M,M))
+        psi2 = np.zeros((N,M,M)) if return_psi2_n else np.zeros((M,M))
         for i in xrange(self.degree):
             if self.cache_K:
                 X = Xs[i]
@@ -57,7 +54,10 @@ class PSICOMP_GH(PSICOMP):
             psi0 += self.weights[i]* kern.Kdiag(X)
             Kfu = kern.K(X,Z)
             psi1 += self.weights[i]* Kfu
-            psi2 += self.weights[i]* tdot(Kfu.T)
+            if return_psi2_n:
+                psi2 += self.weights[i]* Kfu[:,None,:]*Kfu[:,:,None]
+            else:
+                psi2 += self.weights[i]* tdot(Kfu.T)
         return psi0, psi1, psi2
     
     @Cache_this(limit=10, ignore_args=(0, 2,3,4))
@@ -84,7 +84,10 @@ class PSICOMP_GH(PSICOMP):
             dtheta += kern.gradient
             dX = kern.gradients_X_diag(dL_dpsi0_i, X)
             Kfu = kern.K(X,Z)
-            dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
+            if len(dL_dpsi2.shape)==2:
+                dL_dkfu = (dL_dpsi1+ Kfu.dot(dL_dpsi2+dL_dpsi2.T))*self.weights[i]
+            else:
+                dL_dkfu = (dL_dpsi1+ (Kfu[:,:,None]*(dL_dpsi2+np.swapaxes(dL_dpsi2, 1,2))).sum(1))*self.weights[i]
             kern.update_gradients_full(dL_dkfu, X, Z)
             dtheta += kern.gradient
             dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z)
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index cf102f99..5dcead8a 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -469,9 +469,10 @@ class Kernel_Psi_statistics_GradientTests(unittest.TestCase):
         self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2)
 
     def test_kernels(self):
-        from GPy.kern import RBF,Linear,MLP
+        from GPy.kern import RBF,Linear,MLP,Bias,White
         Q = self.Z.shape[1]
-        kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True)]
+        kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True),MLP(Q,ARD=True), RBF(Q,ARD=True)+Linear(Q,ARD=True)+Bias(Q)+White(Q)
+                   ,RBF(Q,ARD=True)+Bias(Q)+White(Q),  Linear(Q,ARD=True)+Bias(Q)+White(Q)]
 
         for k in kernels:
             k.randomize()