Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-07-26 17:11:06 +02:00 · 2013-06-03 12:06:14 +01:00 · 2013-06-03 12:06:14 +01:00 · fe3707f3d1
commit fe3707f3d1
parent 3847e584a3 85cada271e
24 changed files with 4334 additions and 236 deletions
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@ -3,4 +3,5 @@ Nicolo Fusi
 Ricardo Andrade
 Nicolas Durrande
 Alan Saul
+Max Zwiessele
 Neil D. Lawrence
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -447,7 +447,7 @@ class model(parameterised):
        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
        ll_change = epsilon + 1.
        iteration = 0
-        last_ll = -np.exp(1000)
+        last_ll = -np.inf

        convergence = False
        alpha = 0
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -39,8 +39,8 @@ class logexp(transformation):
        return '(+ve)'

 class logexp_clipped(transformation):
-    max_bound = 1e250
-    min_bound = 1e-9
+    max_bound = 1e100
+    min_bound = 1e-10
    log_max_bound = np.log(max_bound)
    log_min_bound = np.log(min_bound)
    def __init__(self, lower=1e-6):
@ -49,15 +49,15 @@ class logexp_clipped(transformation):
    def f(self, x):
        exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
        f = np.log(1. + exp)
-        if np.isnan(f).any():
-            import ipdb;ipdb.set_trace()
-        return f
+#         if np.isnan(f).any():
+#             import ipdb;ipdb.set_trace()
+        return np.clip(f, self.min_bound, self.max_bound)
    def finv(self, f):
-        return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
+        return np.log(np.exp(f - 1.))
    def gradfactor(self, f):
        ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
        gf = (ef - 1.) / ef
-        return np.where(f < self.lower, 0, gf)
+        return gf # np.where(f < self.lower, 0, gf)
    def initialize(self, f):
        if np.any(f < 0.):
            print "Warning: changing parameters to satisfy constraints"
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -266,15 +266,19 @@ def bgplvm_simulation(optimize='scg',

    if optimize:
        print "Optimizing model:"
-        m.optimize('scg', max_iters=max_f_eval, max_f_eval=max_f_eval, messages=True)
+        m.optimize('bfgs', max_iters=max_f_eval,
+                   max_f_eval=max_f_eval,
+                   messages=True, gtol=1e-2)
    if plot:
        import pylab
        m.plot_X_1d()
-        pylab.figure(); pylab.axis(); m.kern.plot_ARD()
+        pylab.figure('BGPLVM Simulation ARD Parameters');
+        pylab.axis();
+        m.kern.plot_ARD()
    return m

 def mrd_simulation(optimize=True, plot_sim=False, **kw):
-    D1, D2, D3, N, M, Q = 150, 250, 30, 200, 3, 7
+    D1, D2, D3, N, M, Q = 150, 200, 400, 300, 3, 7
    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)

    from GPy.models import mrd
@ -283,21 +287,21 @@ def mrd_simulation(optimize=True, plot_sim=False, **kw):

    reload(mrd); reload(kern)

-    k = kern.linear(Q, [0.01] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
-    m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', **kw)
+    k = kern.linear(Q, [0.05] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="concat", initz='permute', **kw)

    for i, Y in enumerate(Ylist):
        m['{}_noise'.format(i + 1)] = Y.var() / 100.

-    # m.constrain('variance|noise', logexp_clipped())
+    # m.constrain('variance|noise', logexp_clipped(1e-6))
    m.ensure_default_constraints()

    # DEBUG
-    np.seterr("raise")
+    # np.seterr("raise")

    if optimize:
        print "Optimizing Model:"
-        m.optimize('scg', messages=1, max_iters=3e3)
+        m.optimize('bfgs', messages=1, max_iters=3e3)

    return m

@ -305,12 +309,13 @@ def brendan_faces():
    from GPy import kern
    data = GPy.util.datasets.brendan_faces()
    Q = 2
-    # Y = data['Y'][0:-1:2, :]
-    Y = data['Y']
+    Y = data['Y'][0:-1:10, :]
+    # Y = data['Y']
    Yn = Y - Y.mean()
    Yn /= Yn.std()

-    m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
+    m = GPy.models.GPLVM(Yn, Q)
+    # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100)

    # optimize
    m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@ -318,7 +323,7 @@ def brendan_faces():
    m.ensure_default_constraints()
    m.optimize('scg', messages=1, max_f_eval=10000)

-    ax = m.plot_latent()
+    ax = m.plot_latent(which_indices=(0,1))
    y = m.likelihood.Y[0, :]
    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
    lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@ -25,6 +25,13 @@
 import numpy as np
 import sys

+
+def print_out(len_maxiters, display, fnow, current_grad, beta, iteration):
+    if display:
+        print '\r',
+        print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+        sys.stdout.flush()
+
 def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
    """
    Optimisation through Scaled Conjugate Gradients (SCG)
@ -64,8 +71,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto

    iteration = 0

+    len_maxiters = len(str(maxiters))
    if display:
-        print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters)))
+        print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)

    # Main optimization loop.
    while iteration < maxiters:
@ -97,7 +105,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto

        if function_eval >= max_f_eval:
            status = "Maximum number of function evaluations exceeded"
-            return x, flog, function_eval, status
+            break
+#             return x, flog, function_eval, status

        Delta = 2.*(fnew - fold) / (alpha * mu)
        if Delta >= 0.:
@ -113,17 +122,14 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        flog.append(fnow) # Current function value

        iteration += 1
-        if display:
-            print '\r',
-            print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))),
-            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
-            sys.stdout.flush()
+        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)

        if success:
            # Test for termination
            if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
                status = 'converged'
-                return x, flog, function_eval, status
+                break
+#                 return x, flog, function_eval, status

            else:
                # Update variables for new position
@ -158,5 +164,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        status = "maxiter exceeded"

    if display:
+        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
        print ""
    return x, flog, function_eval, status
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@ -56,6 +56,13 @@ class rbf(kernpart):
        self._Z, self._mu, self._S = np.empty(shape=(3,1))
        self._X, self._X2, self._params = np.empty(shape=(3,1))

+        #a set of optional args to pass to weave
+        self.weave_options = {'headers'           : ['<omp.h>'],
+                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
+                         'extra_link_args'   : ['-lgomp']}
+
+
+
    def _get_params(self):
        return np.hstack((self.variance,self.lengthscale))

@ -85,8 +92,43 @@ class rbf(kernpart):
        self._K_computations(X,X2)
        target[0] += np.sum(self._K_dvar*dL_dK)
        if self.ARD:
-            if X2 is None: X2 = X
-            [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
+            dvardLdK = self._K_dvar*dL_dK
+            var_len3 = self.variance/np.power(self.lengthscale,3)
+            if X2 is None:
+                #save computation for the symmetrical case
+                dvardLdK += dvardLdK.T
+                code = """
+                int q,i,j;
+                double tmp;
+                for(q=0; q<D; q++){
+                  tmp = 0;
+                  for(i=0; i<N; i++){
+                    for(j=0; j<i; j++){
+                      tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
+                    }
+                  }
+                  target(q+1) += var_len3(q)*tmp;
+                }
+                """
+                N,M,D = X.shape[0], X.shape[0], self.D
+            else:
+                code = """
+                int q,i,j;
+                double tmp;
+                for(q=0; q<D; q++){
+                  tmp = 0;
+                  for(i=0; i<N; i++){
+                    for(j=0; j<M; j++){
+                      tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
+                    }
+                  }
+                  target(q+1) += var_len3(q)*tmp;
+                }
+                """
+                N,M,D = X.shape[0], X2.shape[0], self.D
+            #[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
+            weave.inline(code, arg_names=['N','M','D','X','X2','target','dvardLdK','var_len3'],
+                 type_converters=weave.converters.blitz,**self.weave_options)
        else:
            target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)

@ -227,10 +269,6 @@ class rbf(kernpart):
            self._Z, self._mu, self._S = Z, mu,S

    def weave_psi2(self,mu,Zhat):
-        weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
-                         'extra_link_args'   : ['-lgomp']}
-
        N,Q = mu.shape
        M = Zhat.shape[0]

@ -288,6 +326,6 @@ class rbf(kernpart):
        """
        weave.inline(code, support_code=support_code, libraries=['gomp'],
                     arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
-                     type_converters=weave.converters.blitz,**weave_options)
+                     type_converters=weave.converters.blitz,**self.weave_options)

        return mudist,mudist_sq, psi2_exponent, psi2
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -319,7 +319,8 @@ class EP(likelihood):
            Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
            Diag = Diag0 * Iplus_Dprod_i
            P = Iplus_Dprod_i[:,None] * P0
-            L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T))
+            safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
+            L = jitchol(np.eye(M) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
            R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
            RPT = np.dot(R,P.T)
            Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@ -53,7 +53,7 @@ class Gaussian(likelihood):
    def _set_params(self, x):
        x = float(x)
        if self._variance != x:
-            self.precision = 1. / max(x, 1e-6)
+            self.precision = 1. / x
            self.covariance_matrix = np.eye(self.N) * x
            self.V = (self.precision) * self.Y
            self._variance = x
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -14,6 +14,7 @@ import itertools
 from matplotlib.colors import colorConverter
 from matplotlib.figure import SubplotParams
 from GPy.inference.optimization import SCG
+from GPy.util import plot_latent

 class Bayesian_GPLVM(sparse_GP, GPLVM):
    """
@ -93,8 +94,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
        return x

+    def _clipped(self, x):
+        return x # np.clip(x, -1e300, 1e300)
+    
    def _set_params(self, x, save_old=True, save_count=0):
 #         try:
+            x = self._clipped(x)
            N, Q = self.N, self.Q
            self.X = x[:self.X.size].reshape(N, Q).copy()
            self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
@ -176,20 +181,10 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        # ========================
        self.dbound_dmuS = np.hstack((d_dmu, d_dS))
        self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
-        return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
+        return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))

-    def plot_latent(self, which_indices=None, *args, **kwargs):
-
-        if which_indices is None:
-            try:
-                input_1, input_2 = np.argsort(self.input_sensitivity())[:2]
-            except:
-                raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
-        else:
-            input_1, input_2 = which_indices
-        ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
-        ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
-        return ax
+    def plot_latent(self, *args, **kwargs):
+        plot_latent.plot_latent_indices(self, *args, **kwargs)

    def do_test_latents(self, Y):
        """
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@ -173,7 +173,7 @@ class GP(model):
        """
        # normalize X values
        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var = self._raw_predict(Xnew, which_parts=which_parts, full_cov=full_cov)
+        mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)

        # now push through likelihood
        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@ -11,6 +11,8 @@ from ..util.linalg import pdinv, PCA
 from GP import GP
 from ..likelihoods import Gaussian
 from .. import util
+from GPy.util import plot_latent
+

 class GPLVM(GP):
    """
@ -60,75 +62,5 @@ class GPLVM(GP):
        mu, var, upper, lower = self.predict(Xnew)
        pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)

-    def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
-        """
-        :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
-        :param resolution: the resolution of the grid on which to evaluate the predictive variance
-        """
-        if ax is None:
-            ax = pb.gca()
-        util.plot.Tango.reset()
-
-        if labels is None:
-            labels = np.ones(self.N)
-        if which_indices is None:
-            if self.Q==1:
-                input_1 = 0
-                input_2 = None
-            if self.Q==2:
-                input_1, input_2 = 0,1
-            else:
-                try:
-                    input_1, input_2 = np.argsort(self.input_sensitivity())[:2]
-                except:
-                    raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
-        else:
-            input_1, input_2 = which_indices
-
-        #first, plot the output variance as a function of the latent space
-        Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
-        Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
-        Xtest_full[:, :2] = Xtest
-        mu, var, low, up = self.predict(Xtest_full)
-        var = var[:, :1]
-        ax.imshow(var.reshape(resolution, resolution).T,
-                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
-
-        # make sure labels are in order of input:
-        ulabels = []
-        for lab in labels:
-            if not lab in ulabels:
-                ulabels.append(lab)
-
-        for i, ul in enumerate(ulabels):
-            if type(ul) is np.string_:
-                this_label = ul
-            elif type(ul) is np.int64:
-                this_label = 'class %i'%ul
-            else:
-                this_label = 'class %i'%i
-            if len(marker) == len(ulabels):
-                m = marker[i]
-            else:
-                m = marker
-
-            index = np.nonzero(labels==ul)[0]
-            if self.Q==1:
-                x = self.X[index,input_1]
-                y = np.zeros(index.size)
-            else:
-                x = self.X[index,input_1]
-                y = self.X[index,input_2]
-            ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), mew=1.3, label=this_label)
-
-        ax.set_xlabel('latent dimension %i'%input_1)
-        ax.set_ylabel('latent dimension %i'%input_2)
-
-        if not np.all(labels==1.):
-            ax.legend(loc=0,numpoints=1)
-
-        ax.set_xlim(xmin[0],xmax[0])
-        ax.set_ylim(xmin[1],xmax[1])
-        ax.grid(b=False) # remove the grid if present, it doesn't look good
-        ax.set_aspect('auto') # set a nice aspect ratio
-        return ax
+    def plot_latent(self, *args, **kwargs):
+        util.plot_latent.plot_latent(self, *args, **kwargs)
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -11,6 +11,7 @@ from scipy import linalg
 import numpy
 import itertools
 import pylab
+from GPy.kern.kern import kern

 class MRD(model):
    """
@ -19,7 +20,7 @@ class MRD(model):
    N must be shared across datasets though.

    :param likelihood_list...: likelihoods of observed datasets
-    :type likelihood_list: [GPy.likelihood]
+    :type likelihood_list: [GPy.likelihood] | [Y1..Yy]
    :param names: names for different gplvm models
    :type names: [str]
    :param Q: latent dimensionality (will raise 
@ -38,85 +39,33 @@ class MRD(model):
        number of inducing inputs to use
    :param Z:
        initial inducing inputs
-    :param kernel:
-        kernel to use
+    :param kernels: list of kernels or kernel shared for all BGPLVMS
+    :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
    """
-    def __init__(self,likelihood_list,Q,M=10,names=None,kernels=None,initX='PCA',initz='permute',_debug=False, **kwargs):
+    def __init__(self, likelihood_or_Y_list, Q, M=10, names=None,
+                 kernels=None, initx='PCA',
+                 initz='permute', _debug=False, **kw):
        if names is None:
-            self.names = ["{}".format(i + 1) for i in range(len(likelihood_list))]
+            self.names = ["{}".format(i + 1) for i in range(len(likelihood_or_Y_list))]

-        #sort out the kernels
+        # sort out the kernels
        if kernels is None:
-            kernels = [None]*len(likelihood_list)
-        elif isinstance(kernels,kern.kern):
-            kernels = [kernels.copy() for i in range(len(likelihood_list))]
+            kernels = [None] * len(likelihood_or_Y_list)
+        elif isinstance(kernels, kern):
+            kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
        else:
-            assert len(kernels)==len(likelihood_list), "need one kernel per output"
-            assert all([isinstance(k, kern.kern) for k in kernels]), "invalid kernel object detected!"
+            assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
+            assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
+        assert not ('kernel' in kw), "pass kernels through `kernels` argument"

        self.Q = Q
        self.M = M
-        self.N = self.gref.N
-        self.NQ = self.N * self.Q
-        self.MQ = self.M * self.Q
+        self._debug = _debug

        self._init = True
-        X = self._init_X(initx, likelihood_list)
+        X = self._init_X(initx, likelihood_or_Y_list)
        Z = self._init_Z(initz, X)
-        self.bgplvms = [Bayesian_GPLVM(l, k, X=X, Z=Z, M=self.M, **kwargs) for l,k in zip(likelihood_list,kernels)]
-
-        del self._init
-
-        self.gref = self.bgplvms[0]
-        nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
-        self.nparams = nparams.cumsum()
-
-        model.__init__(self) # @UndefinedVariable
-
-
-
-    def __init__(self, *likelihood_list, **kwargs):
-        if kwargs.has_key("_debug"):
-            self._debug = kwargs['_debug']
-            del kwargs['_debug']
-        else:
-            self._debug = False
-        if kwargs.has_key("names"):
-            self.names = kwargs['names']
-            del kwargs['names']
-        else:
-            self.names = ["{}".format(i + 1) for i in range(len(likelihood_list))]
-        if kwargs.has_key('kernel'):
-            kernel = kwargs['kernel']
-            k = lambda: kernel.copy()
-            del kwargs['kernel']
-        else:
-            k = lambda: None
-        if kwargs.has_key('initx'):
-            initx = kwargs['initx']
-            del kwargs['initx']
-        else:
-            initx = "PCA"
-        if kwargs.has_key('initz'):
-            initz = kwargs['initz']
-            del kwargs['initz']
-        else:
-            initz = "permute"
-        try:
-            self.Q = kwargs["Q"]
-        except KeyError:
-            raise ValueError("Need Q for MRD")
-        try:
-            self.M = kwargs["M"]
-            del kwargs["M"]
-        except KeyError:
-            self.M = 10
-
-        self._init = True
-        X = self._init_X(initx, likelihood_list)
-        Z = self._init_Z(initz, X)
-        self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), X=X, Z=Z, M=self.M, **kwargs) for Y in likelihood_list]
-        
+        self.bgplvms = [Bayesian_GPLVM(l, Q=Q, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
        del self._init

        self.gref = self.bgplvms[0]
@ -212,13 +161,6 @@ class MRD(model):
        X = self.gref.X.ravel()
        X_var = self.gref.X_variance.ravel()
        Z = self.gref.Z.ravel()
-
-        if self._debug:
-            for g in self.bgplvms:
-                assert numpy.allclose(g.X.ravel(), X)
-                assert numpy.allclose(g.X_variance.ravel(), X_var)
-                assert numpy.allclose(g.Z.ravel(), Z)
-
        thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
        params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
        return params
@ -241,12 +183,6 @@ class MRD(model):
        Z = x[start:end]
        thetas = x[end:]

-        if self._debug:
-            for g in self.bgplvms:
-                assert numpy.allclose(g.X, self.gref.X)
-                assert numpy.allclose(g.X_variance, self.gref.X_variance)
-                assert numpy.allclose(g.Z, self.gref.Z)
-
        # set params for all:
        for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
            g._set_params(numpy.hstack([X, X_var, Z, thetas[s:e]]))
@ -259,7 +195,7 @@ class MRD(model):
 #             g._computations()


-    def update_likelihood_approximation(self):#TODO: object oriented vs script base
+    def update_likelihood_approximation(self): # TODO: object oriented vs script base
        for bgplvm in self.bgplvms:
            bgplvm.update_likelihood_approximation()

@ -287,15 +223,21 @@ class MRD(model):
    def _init_X(self, init='PCA', likelihood_list=None):
        if likelihood_list is None:
            likelihood_list = self.likelihood_list
-        if init in "PCA_single":
-            X = numpy.zeros((likelihood_list[0].Y.shape[0], self.Q))
-            for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(likelihood_list)), likelihood_list):
-                X[:, qs] = PCA(Y.Y, len(qs))[0]
-        elif init in "PCA_concat":
-            X = PCA(numpy.hstack([l.Y for l in likelihood_list]), self.Q)[0]
-            #X = PCA(numpy.hstack(likelihood_list), self.Q)[0]
+        Ylist = []
+        for likelihood_or_Y in likelihood_list:
+            if type(likelihood_or_Y) is numpy.ndarray:
+                Ylist.append(likelihood_or_Y)
+            else:
+                Ylist.append(likelihood_or_Y.Y)
+        del likelihood_list
+        if init in "PCA_concat":
+            X = PCA(numpy.hstack(Ylist), self.Q)[0]
+        elif init in "PCA_single":
+            X = numpy.zeros((Ylist[0].shape[0], self.Q))
+            for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(Ylist)), Ylist):
+                X[:, qs] = PCA(Y, len(qs))[0]
        else: # init == 'random':
-            X = numpy.random.randn(likelihood_list[0].Y.shape[0], self.Q)
+            X = numpy.random.randn(Ylist[0].shape[0], self.Q)
        self.X = X
        return X

@ -334,7 +276,7 @@ class MRD(model):
        return fig

    def plot_predict(self, fig_num="MRD Predictions", axes=None, **kwargs):
-        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0],**kwargs))
+        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs))
        return fig

    def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -3,7 +3,7 @@

 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides,chol_inv
+from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
@ -149,9 +149,9 @@ class sparse_GP(GP):
        else:
            A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
            B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
-        C = -self.D * (np.sum(np.log(np.diag(self.LB))))  # + 0.5 * self.M * np.log(sf2))
+        C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
        D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
-        return A + B + C + D
+        return A + B + C + D + self.likelihood.Z

    def _set_params(self, p):
        self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
@ -173,19 +173,19 @@ class sparse_GP(GP):
        For a Gaussian likelihood, no iteration is required:
        this function does nothing
        """
-        if not isinstance(self.likelihood,Gaussian): #Updates not needed for Gaussian likelihood
-            self.likelihood.restart() #TODO check consistency with pseudo_EP
+        if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
+            self.likelihood.restart() # TODO check consistency with pseudo_EP
            if self.has_uncertain_inputs:
                Lmi = chol_inv(self.Lm)
                Kmmi = tdot(Lmi.T)
-                diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2,Kmmi)])
+                diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])

-                self.likelihood.fit_FITC(self.Kmm,self.psi1,diag_tr_psi2Kmmi) #This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
-                #raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
+                self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
+                # raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
            else:
                self.likelihood.fit_DTC(self.Kmm, self.psi1)
                # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
-                self._set_params(self._get_params())  # update the GP
+                self._set_params(self._get_params()) # update the GP

    def _log_likelihood_gradients(self):
        return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
@ -209,7 +209,7 @@ class sparse_GP(GP):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
-        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z)  # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
+        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
        if self.has_uncertain_inputs:
            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
@ -229,21 +229,56 @@ class sparse_GP(GP):
            mu = np.dot(Kx.T, self.Cpsi1V)
            if full_cov:
                Kxx = self.kern.K(Xnew, which_parts=which_parts)
-                var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx)  # NOTE this won't work for plotting
+                var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
            else:
                Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
                var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
        else:
            # assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
-            Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)#, which_parts=which_parts) TODO: which_parts
+            Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
            mu = np.dot(Kx, self.Cpsi1V)
            if full_cov:
                raise NotImplementedError, "TODO"
            else:
-                Kxx = self.kern.psi0(self.Z,Xnew,X_variance_new)
-                psi2 = self.kern.psi2(self.Z,Xnew,X_variance_new)
-                var = Kxx - np.sum(np.sum(psi2*Kmmi_LmiBLmi[None,:,:],1),1)
+                Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
+                psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
+                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)

        return mu, var[:, None]

+    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
+        """
+        Predict the function(s) at the new point(s) Xnew.
+
+        Arguments
+        ---------
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.Q
+        :param X_variance_new: The uncertainty in the prediction points
+        :type X_variance_new: np.ndarray, Nnew x self.Q
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
+        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
+        :type full_cov: bool
+        :rtype: posterior mean,  a Numpy array, Nnew x self.D
+        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.D
+
+
+           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
+           This is to allow for different normalizations of the output dimensions.
+
+        """
+        # normalize X values
+        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
+        if X_variance_new is not None:
+            X_variance_new = X_variance_new / self._Xstd ** 2
+
+        # here's the actual prediction by the GP model
+        mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
+
+        # now push through likelihood
+        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+
+        return mean, var, _025pm, _975pm

--- a/GPy/util/init.py
+++ b/GPy/util/init.py
@ -13,3 +13,4 @@ import datasets
 import mocap
 import visualize
 import decorators
+import classification
--- a/GPy/util/classification.py
+++ b/GPy/util/classification.py
@ -0,0 +1,30 @@
+import numpy as np
+
+def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
+    """
+    Returns true and false positives in a binary classification problem
+    - Column names: true class of the examples
+    - Row names: classification assigned by the model
+
+    p: probabilities estimated for observation of belonging to class '1'
+    labels: observations' class
+    names: classes' names
+    threshold: probability value at which the model allocate an element to each class
+    show: whether the matrix should be shown or not
+    """
+    p = p.flatten()
+    labels = labels.flatten()
+    N = p.size
+    C = np.ones(N)
+    C[p<threshold] = 0
+    True_1 = float((labels - C)[labels-C==0].shape[0] )
+    False_1 = float((labels - C)[labels-C==-2].shape[0] )
+    True_0 = float((labels - C)[labels-C==-1].shape[0] )
+    False_0 = float((labels - C)[labels-C==1].shape[0] )
+    if show:
+        print (True_1 + True_0 + 0.)/N * 100,'% instances correctly classified'
+        print '%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1])
+        print '----------|------------|------------|'
+        print '%-10s|  %-10s|  %-10s| ' % (names[0],True_1,False_0)
+        print '%-10s|  %-10s|  %-10s| ' % (names[1],False_1,True_0)
+    return True_1, False_1, True_0, False_0
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -19,7 +19,7 @@ def sample_class(f):

 def fetch_dataset(resource, save_name = None, save_file = True, messages = True):
    if messages:
-        print "Downloading resource: " , resource, " ... "
+        print "Downloading resource: " , resource, " ... ",
    response = url.urlopen(resource)
    # TODO: Some error checking...
    # ... 
--- a/GPy/util/datasets/mocap/cmu/README.txt
+++ b/GPy/util/datasets/mocap/cmu/README.txt
@ -0,0 +1 @@
+this otherwise empty directory is for storing mnocap data, which we don;t distribute
--- a/GPy/util/datasets/oil/DataTst.txt
+++ b/GPy/util/datasets/oil/DataTst.txt
--- a/GPy/util/datasets/oil/DataTstLbls.txt
+++ b/GPy/util/datasets/oil/DataTstLbls.txt
--- a/GPy/util/datasets/oil/DataVdn.txt
+++ b/GPy/util/datasets/oil/DataVdn.txt
--- a/GPy/util/datasets/oil/DataVdnLbls.txt
+++ b/GPy/util/datasets/oil/DataVdnLbls.txt
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -313,7 +313,10 @@ def symmetrify(A,upper=False):
    elif A.flags['F_CONTIGUOUS'] and not upper:
        weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
    else:
-        tmp = np.tril(A)
+        if upper:
+            tmp = np.tril(A.T)
+        else:
+            tmp = np.tril(A)
        A[:] = 0.0
        A += tmp
        A += np.tril(tmp,-1).T
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@ -702,15 +702,31 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di

    e.g.
    # Download the data, do not return anything
-    GPy.util.mocap.fetch_data(subj_motions = (['35'],[['01','02','03']]), return_motions = False)
+    GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = False)
    # Fetch and return the data in a list. Do not store them anywhere
-    GPy.util.mocap.fetch_data(subj_motions = (['35'],[['01','02','03']]), return_motions = True, store_motions = False)
+    GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = True, store_motions = False)

    In both cases above, if the data do exist in the given skel_store_dir and motion_store_dir, they are just loaded from there.
    '''
    
-    subjects = subj_motions[0]
-    motions = subj_motions[1]
+    subjectsNum = subj_motions[0]
+    motionsNum = subj_motions[1]
+
+    # Convert numbers to strings
+    subjects = []
+    motions = [list() for _ in range(len(subjectsNum))]
+    for i in range(len(subjectsNum)):
+        curSubj = str(int(subjectsNum[i]))
+        if subjectsNum[i] < 10:
+            curSubj = '0' + curSubj
+        subjects.append(curSubj)
+        for j in range(len(motionsNum[i])):
+            curMot = str(int(motionsNum[i][j]))
+            if motionsNum[i][j] < 10:
+                curMot = '0' + curMot
+            motions[i].append(curMot)
+
+
    all_skels = []
    
    assert len(subjects) == len(motions)
--- a/GPy/util/plot_latent.py
+++ b/GPy/util/plot_latent.py
@ -0,0 +1,91 @@
+import pylab as pb
+import numpy as np
+from .. import util
+
+def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
+    """
+    :param labels: a np.array of size model.N containing labels for the points (can be number, strings, etc)
+    :param resolution: the resolution of the grid on which to evaluate the predictive variance
+    """
+    if ax is None:
+        ax = pb.gca()
+    util.plot.Tango.reset()
+
+    if labels is None:
+        labels = np.ones(model.N)
+    if which_indices is None:
+        if model.Q==1:
+            input_1 = 0
+            input_2 = None
+        if model.Q==2:
+            input_1, input_2 = 0,1
+        else:
+            try:
+                input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
+            except:
+                raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
+    else:
+        input_1, input_2 = which_indices
+
+    #first, plot the output variance as a function of the latent space
+    Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution)
+    Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
+    Xtest_full[:, :2] = Xtest
+    mu, var, low, up = model.predict(Xtest_full)
+    var = var[:, :1]
+    ax.imshow(var.reshape(resolution, resolution).T,
+              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
+
+    # make sure labels are in order of input:
+    ulabels = []
+    for lab in labels:
+        if not lab in ulabels:
+            ulabels.append(lab)
+
+    for i, ul in enumerate(ulabels):
+        if type(ul) is np.string_:
+            this_label = ul
+        elif type(ul) is np.int64:
+            this_label = 'class %i'%ul
+        else:
+            this_label = 'class %i'%i
+        if len(marker) == len(ulabels):
+            m = marker[i]
+        else:
+            m = marker
+
+        index = np.nonzero(labels==ul)[0]
+        if model.Q==1:
+            x = model.X[index,input_1]
+            y = np.zeros(index.size)
+        else:
+            x = model.X[index,input_1]
+            y = model.X[index,input_2]
+        ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
+
+    ax.set_xlabel('latent dimension %i'%input_1)
+    ax.set_ylabel('latent dimension %i'%input_2)
+
+    if not np.all(labels==1.):
+        ax.legend(loc=0,numpoints=1)
+
+    ax.set_xlim(xmin[0],xmax[0])
+    ax.set_ylim(xmin[1],xmax[1])
+    ax.grid(b=False) # remove the grid if present, it doesn't look good
+    ax.set_aspect('auto') # set a nice aspect ratio
+    return ax
+
+
+def plot_latent_indices(model, which_indices=None, *args, **kwargs):
+
+    if which_indices is None:
+        try:
+            input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
+        except:
+            raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'"
+    else:
+        input_1, input_2 = which_indices
+    ax = plot_latent(model, which_indices=[input_1, input_2], *args, **kwargs)
+    # TODO: Here test if there are inducing points...
+    ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
+    return ax
				`@ -0,0 +1 @@`
				`this otherwise empty directory is for storing mnocap data, which we don;t distribute`