fixed merge conflict on BPGLVM

2026-05-30 14:35:15 +02:00 · 2013-04-12 13:35:00 +01:00 · 2013-04-12 13:35:00 +01:00 · a42d84274d
commit a42d84274d
parent c8d64a4a69 51ff92e591
10 changed files with 367 additions and 67 deletions
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -3,29 +3,30 @@
 import numpy as np
 import pylab as pb
-from matplotlib import pyplot as plt
+from matplotlib import pyplot as plt, pyplot
 import GPy
 from GPy.models.mrd import MRD
 default_seed = np.random.seed(123344)
-def BGPLVM(seed = default_seed):
+def BGPLVM(seed=default_seed):
    N = 10
    M = 3
    Q = 2
    D = 4
-    #generate GPLVM-like data
+    # generate GPLVM-like data
    X = np.random.rand(N, Q)
    k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
    K = k.K(X)
-    Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+    Y = np.random.multivariate_normal(np.zeros(N), K, D).T
-    k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
+    k = GPy.kern.linear(Q, ARD=True) + GPy.kern.white(Q)
    # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
-    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
    m.constrain_positive('(rbf|bias|noise|white|S)')
    # m.constrain_fixed('S', 1)
@ -38,44 +39,44 @@ def BGPLVM(seed = default_seed):
    # pb.title('After optimisation')
    m.ensure_default_constraints()
    m.randomize()
-    m.checkgrad(verbose = 1)
+    m.checkgrad(verbose=1)
    return m
-def GPLVM_oil_100(optimize=True,M=15):
+def GPLVM_oil_100(optimize=True, M=15):
    data = GPy.util.datasets.oil_100()
    # create simple GP model
-    kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
+    kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
    m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M)
    m.data_labels = data['Y'].argmax(axis=1)
    # optimize
    m.ensure_default_constraints()
    if optimize:
-        m.optimize('scg',messages=1)
+        m.optimize('scg', messages=1)
    # plot
    print(m)
    m.plot_latent(labels=m.data_labels)
    return m
-def BGPLVM_oil(optimize=True,N=100,Q=10,M=15):
+def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
    data = GPy.util.datasets.oil()
    # create simple GP model
-    kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q,0.001)
+    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001)
-    m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M)
+    m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M)
    m.data_labels = data['Y'][:N].argmax(axis=1)
    # optimize
    if optimize:
-        m.constrain_fixed('noise',0.05)
+        m.constrain_fixed('noise', 0.05)
        m.ensure_default_constraints()
-        m.optimize('scg',messages=1)
+        m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
        m.unconstrain('noise')
        m.constrain_positive('noise')
-        m.optimize('scg',messages=1)
+        m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
    else:
        m.ensure_default_constraints()
@ -83,7 +84,7 @@ def BGPLVM_oil(optimize=True,N=100,Q=10,M=15):
    print(m)
    m.plot_latent(labels=m.data_labels)
    pb.figure()
-    pb.bar(np.arange(m.kern.D),1./m.input_sensitivity())
+    pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
    return m
 def oil_100():
@ -96,7 +97,52 @@ def oil_100():
    # plot
    print(m)
-    #m.plot_latent(labels=data['Y'].argmax(axis=1))
+    # m.plot_latent(labels=data['Y'].argmax(axis=1))
    return m
 def mrd_simulation():
    # num = 2
    ard1 = np.array([1., 1, 0, 0], dtype=float)
    ard2 = np.array([0., 1, 1, 0], dtype=float)
    ard1[ard1 == 0] = 1E-10
    ard2[ard2 == 0] = 1E-10
 #     ard1i = 1. / ard1
 #     ard2i = 1. / ard2
 #     k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
 #     Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
 #     Y1 -= Y1.mean(0)
 #
 #     k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
 #     Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
 #     Y2 -= Y2.mean(0)
 #     make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
    D1, D2, N, M, Q = 50, 100, 150, 15, 4
    x = np.linspace(0, 2 * np.pi, N)[:, None]
    s1 = np.vectorize(lambda x: np.sin(x))
    s2 = np.vectorize(lambda x: np.cos(x))
    sS = np.vectorize(lambda x: np.sin(2 * x))
    S1 = np.hstack([s1(x), sS(x)])
    S2 = np.hstack([s2(x), sS(x)])
    Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
    Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
    k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
    m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False)
    m.ensure_default_constraints()
 #     fig = pyplot.figure("expected", figsize=(8, 3))
 #     ax = fig.add_subplot(121)
 #     ax.bar(np.arange(ard1.size) + .1, ard1)
 #     ax = fig.add_subplot(122)
 #     ax.bar(np.arange(ard2.size) + .1, ard2)
    return m
 def brendan_faces():
@ -109,7 +155,7 @@ def brendan_faces():
    m.optimize(messages=1, max_f_eval=10000)
    ax = m.plot_latent()
-    y = m.likelihood.Y[0,:]
+    y = m.likelihood.Y[0, :]
    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
    lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
    raw_input('Press enter to finish')
@ -126,10 +172,39 @@ def stick():
    m.optimize(messages=1, max_f_eval=10000)
    ax = m.plot_latent()
-    y = m.likelihood.Y[0,:]
+    y = m.likelihood.Y[0, :]
    data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
    lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
    raw_input('Press enter to finish')
    plt.close('all')
    return m
 # def BGPLVM_oil():
 #     data = GPy.util.datasets.oil()
 #     Y, X = data['Y'], data['X']
 #     X -= X.mean(axis=0)
 #     X /= X.std(axis=0)
 #
 #     Q = 10
 #     M = 30
 #
 #     kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
 #     m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
 #     # m.scale_factor = 100.0
 #     m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
 #     from sklearn import cluster
 #     km = cluster.KMeans(M, verbose=10)
 #     Z = km.fit(m.X).cluster_centers_
 #     # Z = GPy.util.misc.kmm_init(m.X, M)
 #     m.set('iip', Z)
 #     m.set('bias', 1e-4)
 #     # optimize
 #     # m.ensure_default_constraints()
 #
 #     import pdb; pdb.set_trace()
 #     m.optimize('tnc', messages=1)
 #     print m
 #     m.plot_latent(labels=data['Y'].argmax(axis=1))
 #     return m
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        iteration += 1
        if display:
-            print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+            print 'Iteration: {0:<5g}  Objective:{1:< 12g}  Scale:{2:< 12g}\r'.format(iteration, fnow, beta),
            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
            sys.stdout.flush()
        if success:
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -52,7 +52,7 @@ class kern(parameterised):
        parameterised.__init__(self)
-    def plot_ARD(self):
+    def plot_ARD(self, ax=pb.gca()):
        """
        If an ARD kernel is present, it bar-plots the ARD parameters
@ -60,17 +60,17 @@ class kern(parameterised):
        """
        for p in self.parts:
            if hasattr(p, 'ARD') and p.ARD:
-                pb.figure()
+                ax.set_title('ARD parameters, %s kernel' % p.name)
                pb.title('ARD parameters, %s kernel' % p.name)
                if p.name == 'linear':
                    ard_params = p.variances
                else:
                    ard_params = 1./p.lengthscale
-                pb.bar(np.arange(len(ard_params))-0.4, ard_params)
+                ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
-
+                ax.set_xticks(np.arange(len(ard_params)),
-
+                              ["${}$".format(i + 1) for i in range(len(ard_params))])
        return ax
    def _transform_gradients(self,g):
        x = self._get_params()
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@ -173,7 +173,7 @@ class rbf(kernpart):
        """Think N,M,M,Q """
        self._psi_computations(Z,mu,S)
        tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
+        target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1)
        target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
@ -207,7 +207,6 @@ class rbf(kernpart):
        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
            #something's changed. recompute EVERYTHING
            #TODO: make more efficient for large Q (using NDL's dot product trick)
            #psi1
            self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
    :type init: 'PCA'|'random'
    """
-    def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
+    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
        if X == None:
            X = self.initialise_latent(init, Q, Y)
@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        if Z is None:
            Z = np.random.permutation(X.copy())[:M]
-        assert Z.shape[1]==X.shape[1]
+        assert Z.shape[1] == X.shape[1]
        if kernel is None:
            kernel = kern.rbf(Q) + kern.white(Q)
@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
    def _get_param_names(self):
-        X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
+        X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
-        S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
+        S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
        return (X_names + S_names + sparse_GP._get_param_names(self))
    def _get_params(self):
@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        """
        return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
-    def _set_params(self,x):
+    def _set_params(self, x):
        N, Q = self.N, self.Q
-        self.X = x[:self.X.size].reshape(N,Q).copy()
+        self.X = x[:self.X.size].reshape(N, Q).copy()
-        self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
+        self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
-        sparse_GP._set_params(self, x[(2*N*Q):])
+        sparse_GP._set_params(self, x[(2 * N * Q):])
    def dKL_dmuS(self):
        dKL_dS = (1. - (1. / self.X_variance)) * 0.5
        dKL_dmu = self.X
        return dKL_dmu, dKL_dS
    def dL_dmuS(self):
-        dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance)
+        dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
-        dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance)
+        dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
-        dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance)
+        dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
        dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
        dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
-        dKL_dS = (1. - (1./self.X_variance))*0.5
+        return dL_dmu, dL_dS
        dKL_dmu = self.X
        return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
    def KL_divergence(self):
        var_mean = np.square(self.X).sum()
        var_S = np.sum(self.X_variance - np.log(self.X_variance))
-        return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N
+        return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
    def log_likelihood(self):
        return sparse_GP.log_likelihood(self) - self.KL_divergence()
    def _log_likelihood_gradients(self):
-        return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
+        dKL_dmu, dKL_dS = self.dKL_dmuS()
        dL_dmu, dL_dS = self.dL_dmuS()
        # TODO: find way to make faster
        dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
        return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
-    def plot_latent(self, which_indices=None,*args, **kwargs):
+    def plot_latent(self, which_indices=None, *args, **kwargs):
        if which_indices is None:
            try:
@ -93,7 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
        else:
            input_1, input_2 = which_indices
-        ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
+        ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
        ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
        return ax
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@ -60,7 +60,7 @@ class GPLVM(GP):
        mu, var, upper, lower = self.predict(Xnew)
        pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
-    def plot_latent(self,labels=None, which_indices=None, resolution=50):
+    def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()):
        """
        :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
        :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -90,7 +90,7 @@ class GPLVM(GP):
        Xtest_full[:, :2] = Xtest
        mu, var, low, up = self.predict(Xtest_full)
        var = var[:, :1] 
-        pb.imshow(var.reshape(resolution,resolution).T[::-1,:],
+        ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
        for i,ul in enumerate(np.unique(labels)):
@ -110,15 +110,15 @@ class GPLVM(GP):
                y = self.X[index,input_2]
            pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
-        pb.xlabel('latent dimension %i'%input_1)
+        ax.set_xlabel('latent dimension %i' % input_1)
-        pb.ylabel('latent dimension %i'%input_2)
+        ax.set_ylabel('latent dimension %i' % input_2)
        if not np.all(labels==1.):
-            pb.legend(loc=0,numpoints=1)
+            ax.legend(loc=0, numpoints=1)
-        pb.xlim(xmin[0],xmax[0])
+        ax.set_xlim(xmin[0], xmax[0])
-        pb.ylim(xmin[1],xmax[1])
+        ax.set_ylim(xmin[1], xmax[1])
-        pb.grid(b=False) # remove the grid if present, it doesn't look good
+        ax.grid(b=False)  # remove the grid if present, it doesn't look good
-        ax = pb.gca()
+        # ax = pb.gca()
        ax.set_aspect('auto') # set a nice aspect ratio
        return ax
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -11,4 +11,7 @@ from warped_GP import warpedGP
 from sparse_GPLVM import sparse_GPLVM
 from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from Bayesian_GPLVM import Bayesian_GPLVM
 import mrd
 MRD = mrd.MRD
 del mrd
 from generalized_FITC import generalized_FITC
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -0,0 +1,180 @@
 '''
 Created on 10 Apr 2013
@author: Max Zwiessele
 '''
 from GPy.core import model
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
 import numpy
 from GPy.models.sparse_GP import sparse_GP
 import itertools
 from matplotlib import pyplot
 import pylab
 class MRD(model):
    """
    Do MRD on given Datasets in Ylist.
    All Ys in Ylist are in [N x Dn], where Dn can be different per Yn,
    N must be shared across datasets though.
    :param Ylist...: observed datasets
    :type Ylist: [np.ndarray]
    :param names: names for different gplvm models
    :type names: [str]
    :param Q: latent dimensionality
    :type Q: int
    :param init: initialisation method for the latent space
    :type init: 'PCA'|'random'
    :param X:
        Initial latent space
    :param X_variance:
        Initial latent space variance
    :param init: [PCA|random]
        initialization method to use
    :param M:
        number of inducing inputs to use
    :param Z:
        initial inducing inputs
    :param kernel:
        kernel to use
    """
    def __init__(self, *Ylist, **kwargs):
        self._debug = False
        if kwargs.has_key("_debug"):
            self._debug = kwargs['_debug']
            del kwargs['_debug']
        if kwargs.has_key("names"):
            self.names = kwargs['names']
            del kwargs['names']
        else:
            self.names = ["{}".format(i + 1) for i in range(len(Ylist))]
        if kwargs.has_key('kernel'):
            kernel = kwargs['kernel']
            k = lambda: kernel.copy()
            del kwargs['kernel']
        else:
            k = lambda: None
        self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), **kwargs) for Y in Ylist]
        self.gref = self.bgplvms[0]
        nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
        self.nparams = nparams.cumsum()
        self.Q = self.gref.Q
        self.N = self.gref.N
        self.NQ = self.N * self.Q
        self.M = self.gref.M
        self.MQ = self.M * self.Q
        model.__init__(self)  # @UndefinedVariable
    def _get_param_names(self):
        # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
        # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
        n1 = self.gref._get_param_names()
        n1var = n1[:self.NQ * 2 + self.MQ]
        map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
                                         itertools.izip(ns,
                                                        itertools.repeat(name)))
        return list(itertools.chain(n1var, *(map_names(\
                sparse_GP._get_param_names(g)[self.MQ:], n) \
                for g, n in zip(self.bgplvms, self.names))))
    def _get_params(self):
        """
        return parameter list containing private and shared parameters as follows:
        =================================================================
        | mu | S | Z || theta1 | theta2 | .. | thetaN |
        =================================================================
        """
        X = self.gref.X.flatten()
        X_var = self.gref.X_variance.flatten()
        Z = self.gref.Z.flatten()
        thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
        params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
        return params
    def _set_var_params(self, g, X, X_var, Z):
        g.X = X
        g.X_variance = X_var
        g.Z = Z
    def _set_kern_params(self, g, p):
        g.kern._set_params(p[:g.kern.Nparam])
        g.likelihood._set_params(p[g.kern.Nparam:])
    def _set_params(self, x):
        start = 0; end = self.NQ
        X = x[start:end].reshape(self.N, self.Q).copy()
        start = end; end += start
        X_var = x[start:end].reshape(self.N, self.Q).copy()
        start = end; end += self.MQ
        Z = x[start:end].reshape(self.M, self.Q).copy()
        thetas = x[end:]
        # set params for all others:
        for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
            self._set_var_params(g, X, X_var, Z)
            self._set_kern_params(g, thetas[s:e].copy())
            g._compute_kernel_matrices()
            g._computations()
    def log_likelihood(self):
        ll = +self.gref.KL_divergence()
        for g in self.bgplvms:
            ll -= sparse_GP.log_likelihood(g)
        return -ll
    def _log_likelihood_gradients(self):
        dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
        dKLmu, dKLdS = self.gref.dKL_dmuS()
        dLdmu -= dKLmu
        dLdS -= dKLdS
        dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
        dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
        return numpy.hstack((dLdmuS,
                             dldzt1,
                numpy.hstack([numpy.hstack([g.dL_dtheta(),
                                            g.likelihood._gradients(\
                                                partial=g.partial_for_likelihood)]) \
                              for g in self.bgplvms])))
    def plot_X(self):
        fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
        fig.clf()
        for i, g in enumerate(self.bgplvms):
            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
            ax.imshow(g.X)
        pylab.draw()
        fig.tight_layout()
        return fig
    def plot_predict(self):
        fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
        fig.clf()
        for i, g in enumerate(self.bgplvms):
            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
            ax.imshow(g.predict(g.X)[0])
        pylab.draw()
        fig.tight_layout()
        return fig
    def plot_scales(self, *args, **kwargs):
        fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
        for i, g in enumerate(self.bgplvms):
            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
            g.kern.plot_ARD(ax=ax, *args, **kwargs)
        pylab.draw()
        fig.tight_layout()
        return fig
    def plot_latent(self, *args, **kwargs):
        fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
        for i, g in enumerate(self.bgplvms):
            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
            g.plot_latent(ax=ax, *args, **kwargs)
        pylab.draw()
        fig.tight_layout()
        return fig
--- a/GPy/testing/mrd_tests.py
+++ b/GPy/testing/mrd_tests.py
@ -0,0 +1,32 @@
 # Copyright (c) 2013, Max Zwiessele
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 '''
 Created on 10 Apr 2013
@author: maxz
 '''
 import unittest
 import numpy as np
 import GPy
 class MRDTests(unittest.TestCase):
    def test_gradients(self):
        num_m = 3
        N, M, Q, D = 20, 8, 5, 50
        X = np.random.rand(N, Q)
        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q)
        K = k.K(X)
        Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
        m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M)
        m.ensure_default_constraints()
        m.randomize()
        self.assertTrue(m.checkgrad())
 if __name__ == "__main__":
    print "Running unit tests, please be (very) patient..."
    unittest.main()
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@ -4,7 +4,7 @@ import GPy
 import numpy as np
 class lvm:
-    def __init__(self, model, data_visualize, latent_axis):
+    def __init__(self, model, data_visualize, latent_axis, latent_index=[0,1], latent_dim=2):
        self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click)
        self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
        self.data_visualize = data_visualize
@ -12,6 +12,8 @@ class lvm:
        self.latent_axis = latent_axis
        self.called = False
        self.move_on = False
        self.latent_index = latent_index
        self.latent_dim = latent_dim
    def on_click(self, event):
        #print 'click', event.xdata, event.ydata
@ -32,7 +34,8 @@ class lvm:
        if self.called and self.move_on:
            # Call modify code on move
            #print 'move', event.xdata, event.ydata
-            latent_values = np.array((event.xdata, event.ydata))
+            latent_values = np.zeros((1,self.latent_dim))
            latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata])
            y = self.model.predict(latent_values)[0]
            self.data_visualize.modify(y)
            #print 'y', y
@ -57,7 +60,7 @@ class vector_show(data_show):
    def __init__(self, vals, axis=None):
        data_show.__init__(self, vals, axis)
        self.vals = vals.T
-        self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
+        self.handle = self.axis.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
    def modify(self, vals):
        xdata, ydata = self.handle.get_data()