diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index ba72c912..cd2f9a97 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -352,13 +352,16 @@ class GP(Model):
         for i in range(self._predictive_variable.shape[0]):
             dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
 
+        if full_cov:
+            dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
+        else:
+            dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
+
         def compute_cov_inner(wi):
             if full_cov:
                 # full covariance gradients:
-                dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
                 var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
             else:
-                dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
                 var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
             return var_jac
 
@@ -371,7 +374,7 @@ class GP(Model):
             var_jac = compute_cov_inner(self.posterior.woodbury_inv)
         return mean_jac, var_jac
 
-    def predict_wishard_embedding(self, Xnew, kern=None):
+    def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True):
         """
         Predict the wishard embedding G of the GP. This is the density of the
         input of the GP defined by the probabilistic function mapping f.
@@ -391,13 +394,16 @@ class GP(Model):
         mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
         if var_jac.ndim == 3:
             Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
-            G = mumuT + Sigma
         else:
-            Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
-            G = mumuT + self.output_dim*Sigma
+            Sigma = self.output_dim*np.einsum('iq,ip->iqp', var_jac, var_jac)
+        G = 0.
+        if mean:
+            G += mumuT
+        if covariance:
+            G += Sigma
         return G
 
-    def predict_magnification(self, Xnew, kern=None):
+    def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
         """
         Predict the magnification factor as
 
@@ -405,7 +411,7 @@ class GP(Model):
 
         for each point N in Xnew
         """
-        G = self.predict_wishard_embedding(Xnew, kern)
+        G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
         from ..util.linalg import jitchol
         return np.array([np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
         #return np.array([np.sqrt(np.linalg.det(G[n, :, :])) for n in range(Xnew.shape[0])])
@@ -651,7 +657,7 @@ class GP(Model):
                 resolution=50, ax=None, marker='o', s=40,
                 fignum=None, legend=True,
                 plot_limits=None,
-                aspect='auto', updates=False, **kwargs):
+                aspect='auto', updates=False, plot_inducing=True, kern=None, **kwargs):
 
         import sys
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
@@ -659,7 +665,7 @@ class GP(Model):
 
         return dim_reduction_plots.plot_magnification(self, labels, which_indices,
                 resolution, ax, marker, s,
-                fignum, False, legend,
+                fignum, plot_inducing, legend,
                 plot_limits, aspect, updates, **kwargs)
 
 
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 5ab302db..ac5fb62b 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -181,18 +181,3 @@ class SparseGP(GP):
                     var[i] = np.diag(var_)+p0-t2
 
         return mu, var
-
-    def plot_magnification(self, labels=None, which_indices=None,
-                resolution=50, ax=None, marker='o', s=40,
-                fignum=None, legend=True,
-                plot_limits=None,
-                aspect='auto', updates=False, plot_inducing=True, **kwargs):
-
-        import sys
-        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        from ..plotting.matplot_dep import dim_reduction_plots
-
-        return dim_reduction_plots.plot_magnification(self, labels, which_indices,
-                resolution, ax, marker, s,
-                fignum, plot_inducing, legend,
-                plot_limits, aspect, updates, **kwargs)
\ No newline at end of file
diff --git a/GPy/inference/latent_function_inference/inferenceX.py b/GPy/inference/latent_function_inference/inferenceX.py
index fd213784..f253a31e 100644
--- a/GPy/inference/latent_function_inference/inferenceX.py
+++ b/GPy/inference/latent_function_inference/inferenceX.py
@@ -4,6 +4,7 @@
 import numpy as np
 from ...core import Model
 from ...core.parameterization import variational
+from ...util.linalg import tdot
 from GPy.core.parameterization.variational import VariationalPosterior
 
 def infer_newX(model, Y_new, optimize=True, init='L2'):
@@ -127,13 +128,13 @@ class InferenceX(Model):
             wv = wv[:,self.valid_dim]
             output_dim = self.valid_dim.sum()
             if self.ninan is not None:
-                self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - np.einsum('md,od->mo',wv, wv)[:, :, None]).sum(-1)
+                self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - tdot(wv)[:, :, None]).sum(-1)
             else:
-                self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))
+                self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - tdot(wv))
             self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
             self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
         else:
-            self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
+            self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - tdot(wv))/2. #np.einsum('md,od->mo',wv, wv)
             self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
             self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0])
 
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 696a8b04..3c97a08a 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -14,7 +14,7 @@ class Add(CombinationKernel):
 
     This kernel will take over the active dims of it's subkernels passed in.
     """
-    def __init__(self, subkerns, name='add'):
+    def __init__(self, subkerns, name='sum'):
         for i, kern in enumerate(subkerns[:]):
             if isinstance(kern, Add):
                 del subkerns[i]
@@ -71,11 +71,24 @@ class Add(CombinationKernel):
         target = np.zeros(X.shape)
         [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
         return target
-    
+
+    def gradients_XX(self, dL_dK, X, X2):
+        if X2 is None:
+            target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
+        else:
+            target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
+        [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
+        return target
+
+    def gradients_XX_diag(self, dL_dKdiag, X):
+        target = np.zeros(X.shape)
+        [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
+        return target
+
     @Cache_this(limit=2, force_kwargs=['which_parts'])
     def psi0(self, Z, variational_posterior):
         return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
-    
+
     @Cache_this(limit=2, force_kwargs=['which_parts'])
     def psi1(self, Z, variational_posterior):
         return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index aae4460a..d5482b80 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -102,7 +102,7 @@ class Kern(Parameterized):
         raise NotImplementedError
     def gradients_XX(self, dL_dK, X, X2):
         raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
-    def gradients_XX_diag(self, dL_dK, X, X2):
+    def gradients_XX_diag(self, dL_dKdiag, X):
         raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
     def gradients_X_diag(self, dL_dKdiag, X):
         raise NotImplementedError
@@ -180,7 +180,7 @@ class Kern(Parameterized):
     def __iadd__(self, other):
         return self.add(other)
 
-    def add(self, other, name='add'):
+    def add(self, other, name='sum'):
         """
         Add another kernel to this one.
 
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index 64d14018..e7bee6c2 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -24,6 +24,13 @@ class Static(Kern):
     def gradients_X_diag(self, dL_dKdiag, X):
         return np.zeros(X.shape)
 
+    def gradients_XX(self, dL_dK, X, X2):
+        if X2 is None:
+            X2 = X
+        return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
+    def gradients_XX_diag(self, dL_dKdiag, X):
+        return np.zeros(X.shape)
+
     def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
         return np.zeros(Z.shape)
 
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index ff0d6855..3ac703fe 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -137,20 +137,6 @@ class BayesianGPLVM(SparseGP_MPI):
                 fignum, plot_inducing, legend,
                 plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
 
-    def plot_magnification(self, labels=None, which_indices=None,
-                resolution=50, ax=None, marker='o', s=40,
-                fignum=None, legend=True,
-                plot_limits=None,
-                aspect='auto', updates=False, **kwargs):
-        import sys
-        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        from ..plotting.matplot_dep import dim_reduction_plots
-
-        return dim_reduction_plots.plot_magnification(self, labels, which_indices,
-                resolution, ax, marker, s,
-                fignum, False, legend,
-                plot_limits, aspect, updates, **kwargs)
-
     def do_test_latents(self, Y):
         """
         Compute the latent representation for a set of new points Y
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 6db46a81..a36f168d 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -9,7 +9,8 @@ import itertools
 try:
     import Tango
     from matplotlib.cm import get_cmap
-    import pylab as pb
+    from matplotlib import pyplot as pb
+    from matplotlib import cm
 except:
     pass
 
@@ -137,7 +138,7 @@ def plot_latent(model, labels=None, which_indices=None,
     view = ImshowController(ax, plot_function,
                             (xmin, ymin, xmax, ymax),
                             resolution, aspect=aspect, interpolation='bilinear',
-                            cmap=pb.cm.binary, **imshow_kwargs)
+                            cmap=cm.binary, **imshow_kwargs)
 
     # make sure labels are in order of input:
     labels = np.asarray(labels)
@@ -192,18 +193,18 @@ def plot_latent(model, labels=None, which_indices=None,
 
     if updates:
         try:
-            ax.figure.canvas.show()
+            fig.canvas.show()
         except Exception as e:
             print("Could not invoke show: {}".format(e))
-        raw_input('Enter to continue')
-        view.deactivate()
+        #raw_input('Enter to continue')
+        return view
     return ax
 
 def plot_magnification(model, labels=None, which_indices=None,
                 resolution=60, ax=None, marker='o', s=40,
                 fignum=None, plot_inducing=False, legend=True,
                 plot_limits=None,
-                aspect='auto', updates=False):
+                aspect='auto', updates=False, mean=True, covariance=True, kern=None):
     """
     :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
     :param resolution: the resolution of the grid on which to evaluate the predictive variance
@@ -211,6 +212,8 @@ def plot_magnification(model, labels=None, which_indices=None,
     if ax is None:
         fig = pb.figure(num=fignum)
         ax = fig.add_subplot(111)
+    else:
+        fig = ax.figure
     Tango.reset()
 
     if labels is None:
@@ -295,13 +298,13 @@ def plot_magnification(model, labels=None, which_indices=None,
     def plot_function(x):
         Xtest_full = np.zeros((x.shape[0], X.shape[1]))
         Xtest_full[:, [input_1, input_2]] = x
-        mf = model.predict_magnification(Xtest_full)
+        mf = model.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance)
         return mf
 
     view = ImshowController(ax, plot_function,
                             (xmin, ymin, xmax, ymax),
                             resolution, aspect=aspect, interpolation='bilinear',
-                            cmap=pb.cm.gray)
+                            cmap=cm.gray)
 
     # make sure labels are in order of input:
     ulabels = []
@@ -317,7 +320,7 @@ def plot_magnification(model, labels=None, which_indices=None,
         elif type(ul) is np.int64:
             this_label = 'class %i' % ul
         else:
-            this_label = 'class %i' % i
+            this_label = unicode(ul)
         m = marker.next()
 
         index = np.nonzero(labels == ul)[0]
@@ -327,7 +330,7 @@ def plot_magnification(model, labels=None, which_indices=None,
         else:
             x = X[index, input_1]
             y = X[index, input_2]
-        ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
+        ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
 
     ax.set_xlabel('latent dimension %i' % input_1)
     ax.set_ylabel('latent dimension %i' % input_2)
@@ -337,18 +340,27 @@ def plot_magnification(model, labels=None, which_indices=None,
 
     ax.set_xlim((xmin, xmax))
     ax.set_ylim((ymin, ymax))
-    ax.grid(b=False) # remove the grid if present, it doesn't look good
-    ax.set_aspect('auto') # set a nice aspect ratio
 
-    if plot_inducing:
+    if plot_inducing and hasattr(model, 'Z'):
         Z = model.Z
         ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7)
 
-    if updates:
-        fig.canvas.show()
-        raw_input('Enter to continue')
+    try:
+        fig.canvas.draw()
+        fig.tight_layout()
+        fig.canvas.draw()
+    except Exception as e:
+        print("Could not invoke tight layout: {}".format(e))
+        pass
 
-    pb.title('Magnification Factor')
+    if updates:
+        try:
+            fig.canvas.draw()
+            fig.canvas.show()
+        except Exception as e:
+            print("Could not invoke show: {}".format(e))
+        #raw_input('Enter to continue')
+        return view
     return ax
 
 
diff --git a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py
index 62b622c5..15fcd098 100644
--- a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py
+++ b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py
@@ -9,6 +9,9 @@ class AxisEventController(object):
     def __init__(self, ax):
         self.ax = ax
         self.activate()
+    def __del__(self):
+        self.deactivate()
+        return self
     def deactivate(self):
         for cb_class in self.ax.callbacks.callbacks.values():
             for cb_num in cb_class.keys():
@@ -81,9 +84,9 @@ class BufferedAxisChangedController(AxisChangedController):
     def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
         """
         Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed.
-        
+
         Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered)
-        
+
         :param plot_function:
             function to use for creating image for plotting (return ndarray-like)
             plot_function gets called with (2D!) Xtest grid if replotting required
diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index e8d2456e..a21dc84e 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -16,4 +16,5 @@ from . import diag
 from . import initialization
 from . import multioutput
 from . import linalg_gpu
+from . import parallel