merge devel branch in

2026-07-23 17:01:06 +02:00 · 2014-05-21 10:38:34 +01:00 · 2014-05-21 10:38:34 +01:00 · 52c0be1848
commit 52c0be1848
parent a2203179f6 b520eb212c
21 changed files with 595 additions and 134 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -180,40 +180,80 @@ class GP(Model):

        return Ysim

-    def plot_f(self, *args, **kwargs):
+    def plot_f(self, plot_limits=None, which_data_rows='all',
+        which_data_ycols='all', fixed_inputs=[],
+        levels=20, samples=0, fignum=None, ax=None, resolution=None,
+        plot_raw=True,
+        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
        """
-
-        Plot the GP's view of the world, where the data is normalized and
-        before applying a likelihood.
-
-        This is a convenience function: arguments are passed to
-        GPy.plotting.matplot_dep.models_plots.plot_f_fit
-
+        Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
+        This is a call to plot with plot_raw=True.
+        Data will not be plotted in this, as the GP's view of the world
+        may live in another space, or units then the data.
        """
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from ..plotting.matplot_dep import models_plots
-        return models_plots.plot_fit_f(self,*args,**kwargs)
+        kw = {}
+        if linecol is not None:
+            kw['linecol'] = linecol
+        if fillcol is not None:
+            kw['fillcol'] = fillcol
+        return models_plots.plot_fit(self, plot_limits, which_data_rows,
+                                     which_data_ycols, fixed_inputs, 
+                                     levels, samples, fignum, ax, resolution, 
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     data_symbol=data_symbol, **kw)

-    def plot(self, *args, **kwargs):
+    def plot(self, plot_limits=None, which_data_rows='all',
+        which_data_ycols='all', fixed_inputs=[],
+        levels=20, samples=0, fignum=None, ax=None, resolution=None,
+        plot_raw=False,
+        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
        """
        Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region
-            identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted
-            function
-          - In higher dimensions, use fixed_inputs to plot the GP  with some of
-            the inputs fixed.
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - In higher dimensions, use fixed_inputs to plot the GP  with some of the inputs fixed.

        Can plot only part of the data and part of the posterior functions
-        using which_data_rows which_data_ycols and which_parts
-
-        This is a convenience function: arguments are passed to
-        GPy.plotting.matplot_dep.models_plots.plot_fit
+        using which_data_rowsm which_data_ycols.

+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :type plot_limits: np.array
+        :param which_data_rows: which of the training data to plot (default all)
+        :type which_data_rows: 'all' or a slice object to slice model.X, model.Y
+        :param which_data_ycols: when the data has several columns (independant outputs), only plot these
+        :type which_data_rows: 'all' or a list of integers
+        :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
+        :type fixed_inputs: a list of tuples
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param levels: number of levels to plot in a contour plot.
+        :type levels: int
+        :param samples: the number of a posteriori samples to plot
+        :type samples: int
+        :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle
+        :type output: integer (first output is 0)
+        :param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
+        :type linecol:
+        :param fillcol: color of fill [Tango.colorsHex['lightBlue']]
+        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        """
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from ..plotting.matplot_dep import models_plots
-        return models_plots.plot_fit(self,*args,**kwargs)
+        kw = {}
+        if linecol is not None:
+            kw['linecol'] = linecol
+        if fillcol is not None:
+            kw['fillcol'] = fillcol
+        return models_plots.plot_fit(self, plot_limits, which_data_rows,
+                                     which_data_ycols, fixed_inputs, 
+                                     levels, samples, fignum, ax, resolution, 
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     data_symbol=data_symbol, **kw)

    def input_sensitivity(self):
        """
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -7,6 +7,20 @@ import numpy
 from numpy.lib.function_base import vectorize
 from lists_and_dicts import IntArrayDict

+def extract_properties_to_index(index, props):
+    prop_index = dict()
+    for i, cl in enumerate(props):
+        for c in cl:
+            ind = prop_index.get(c, list())
+            ind.append(index[i])
+            prop_index[c] = ind
+
+    for c, i in prop_index.items():
+        prop_index[c] = numpy.array(i, dtype=int)
+
+    return prop_index
+
+
 class ParameterIndexOperations(object):
    '''
    Index operations for storing param index _properties
@ -66,8 +80,34 @@ class ParameterIndexOperations(object):
        return self._properties.values()

    def properties_for(self, index):
+        """
+        Returns a list of properties, such that each entry in the list corresponds
+        to the element of the index given.
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_for([2,3,5])
+        [['one'], ['one', 'two'], ['two']]
+        """
        return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)

+    def properties_to_index_dict(self, index):
+        """
+        Return a dictionary, containing properties as keys and indices as index
+        Thus, the indices for each constraint, which is contained will be collected as
+        one dictionary
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_to_index_dict([2,3,5])
+        {'one':[2,3], 'two':[3,5]}
+        """
+        props = self.properties_for(index)
+        prop_index = extract_properties_to_index(index, props)
+        return prop_index
+
    def add(self, prop, indices):
        self._properties[prop] = combine_indices(self._properties[prop], indices)

@ -174,8 +214,32 @@ class ParameterIndexOperationsView(object):


    def properties_for(self, index):
+        """
+        Returns a list of properties, such that each entry in the list corresponds
+        to the element of the index given.
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_for([2,3,5])
+        [['one'], ['one', 'two'], ['two']]
+        """
        return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)

+    def properties_to_index_dict(self, index):
+        """
+        Return a dictionary, containing properties as keys and indices as index
+        Thus, the indices for each constraint, which is contained will be collected as
+        one dictionary
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_to_index_dict([2,3,5])
+        {'one':[2,3], 'two':[3,5]}
+        """
+        return extract_properties_to_index(index, self.properties_for(index))
+

    def add(self, prop, indices):
        self._param_index_ops.add(prop, indices+self._offset)
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
 import numpy as np
 import re

-__updated__ = '2014-05-15'
+__updated__ = '2014-05-20'

 class HierarchyError(Exception):
    """
@ -50,11 +50,24 @@ class Observable(object):
    self as only argument to all its observers.
    """
    _updated = True
+    _updates = True
    def __init__(self, *args, **kwargs):
        super(Observable, self).__init__()
        from lists_and_dicts import ObserverList
        self.observers = ObserverList()

+    @property
+    def updates(self):
+        self._updates = self._highest_parent_._updates
+        return self._updates
+
+    @updates.setter
+    def updates(self, ups):
+        assert isinstance(ups, bool), "updates are either on (True) or off (False)"
+        self._highest_parent_._updates = ups
+        if ups:
+            self._trigger_params_changed()
+
    def add_observer(self, observer, callble, priority=0):
        """
        Add an observer `observer` with the callback `callble` 
@ -91,6 +104,8 @@ class Observable(object):
        :param min_priority: only notify observers with priority > min_priority
                             if min_priority is None, notify all observers in order
        """
+        if not self.updates:
+            return
        if which is None:
            which = self
        if min_priority is None:
@ -309,6 +324,7 @@ class Indexable(Nameable, Observable):
        self._default_constraint_ = default_constraint
        from index_operations import ParameterIndexOperations
        self.constraints = ParameterIndexOperations()
+        self._old_constraints = ParameterIndexOperations()
        self.priors = ParameterIndexOperations()
        if self._default_constraint_ is not None:
            self.constrain(self._default_constraint_)
@ -371,8 +387,10 @@ class Indexable(Nameable, Observable):
        """
        if value is not None:
            self[:] = value
-        reconstrained = self.unconstrain()
-        index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
+
+        index = self._raveled_index()
+        # reconstrained = self.unconstrain()
+        index = self._add_to_index_operations(self.constraints, index, __fixed__, warning)
        self._highest_parent_._set_fixed(self, index)
        self.notify_observers(self, None if trigger_parent else -np.inf)
        return index
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -272,8 +272,11 @@ class Parameterized(Parameterizable):
    def __setattr__(self, name, val):
        # override the default behaviour, if setting a param, so broadcasting can by used
        if hasattr(self, "parameters"):
-            pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
-            if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+            try:
+                pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
+                if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+            except AttributeError:
+                pass
        object.__setattr__(self, name, val);

    #===========================================================================
@ -281,11 +284,14 @@ class Parameterized(Parameterizable):
    #===========================================================================
    def __setstate__(self, state):
        super(Parameterized, self).__setstate__(state)
-        self._connect_parameters()
-        self._connect_fixes()
-        self._notify_parent_change()
+        try:
+            self._connect_parameters()
+            self._connect_fixes()
+            self._notify_parent_change()
+            self.parameters_changed()
+        except Exception as e:
+            print "WARNING: caught exception {!s}, trying to continue".format(e)

-        self.parameters_changed()
    def copy(self):
        c = super(Parameterized, self).copy()
        c._connect_parameters()
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -66,7 +66,11 @@ class SparseGP(GP):
            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
            self.Z.gradient += self.kern.gradients_Z_expectations(
-                               self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
+                               self.grad_dict['dL_dpsi0'], 
+                               self.grad_dict['dL_dpsi1'], 
+                               self.grad_dict['dL_dpsi2'], 
+                               Z=self.Z, 
+                               variational_posterior=self.X)
        else:
            #gradients wrt kernel
            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -303,9 +303,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
    k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)

    inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
-    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
-    m.inference_method = VarDTCMissingData()
-    m.Y[inan] = _np.nan
+    Y[inan] = _np.nan
+
+    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, 
+                      inference_method=VarDTCMissingData(inan=inan), kernel=k)
+
    m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
    m.likelihood.variance = .01
    m.parameters_changed()
@ -338,7 +340,40 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
        print "Optimizing Model:"
        m.optimize(messages=verbose, max_iters=8e3, gtol=.1)
    if plot:
-        m.plot_X_1d("MRD Latent Space 1D")
+        m.X.plot("MRD Latent Space 1D")
+        m.plot_scales("MRD Scales")
+    return m
+
+def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
+    from GPy import kern
+    from GPy.models import MRD
+    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
+
+    D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
+    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
+
+    #Ylist = [Ylist[0]]
+    k = kern.Linear(Q, ARD=True)
+    inanlist = []
+
+    for Y in Ylist:
+        inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
+        inanlist.append(inan)
+        Y[inan] = _np.nan
+
+    imlist = []
+    for inan in inanlist:
+        imlist.append(VarDTCMissingData(limit=1, inan=inan))
+
+    m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, 
+            kernel=k, inference_method=imlist,
+            initx="random", initz='permute', **kw)
+
+    if optimize:
+        print "Optimizing Model:"
+        m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
+    if plot:
+        m.X.plot("MRD Latent Space 1D")
        m.plot_scales("MRD Scales")
    return m

@ -483,21 +518,22 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
    Q = 6
    kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) 
    m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
-    
+
    m.data = data
    m.likelihood.variance = 0.001
-    
+
    # optimize
-    if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
+    if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
    if plot:
-        plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
+        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        plt.sca(latent_axes)
        m.plot_latent(ax=latent_axes)
        y = m.Y[:1, :].copy()
        data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
-        GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
-        plt.draw()
-        #raw_input('Press enter to finish')
+        dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
+        fig.canvas.draw()
+        fig.canvas.show()
+        raw_input('Press enter to finish')

    return m

--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -38,6 +38,25 @@ class LatentFunctionInference(object):
        """
        pass

+class InferenceMethodList(LatentFunctionInference, list):
+
+    def on_optimization_start(self):
+        for inf in self:
+            inf.on_optimization_start()
+
+    def on_optimization_end(self):
+        for inf in self:
+            inf.on_optimization_end()
+    
+    def __getstate__(self):
+        state = []
+        for inf in self:
+            state.append(inf)
+        return state
+    
+    def __setstate__(self, state):
+        for inf in state:
+            self.append(inf)

 from exact_gaussian_inference import ExactGaussianInference
 from laplace import Laplace
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -95,7 +95,7 @@ class Posterior(object):
        """
        if self._covariance is None:
            #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
-            self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
+            self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
            #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
        return self._covariance

--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -202,6 +202,17 @@ class VarDTCMissingData(LatentFunctionInference):
    def set_limit(self, limit):
        self._Y.limit = limit

+    def __getstate__(self):
+        # has to be overridden, as Cacher objects cannot be pickled. 
+        return self._Y.limit, self._inan
+
+    def __setstate__(self, state):
+        # has to be overridden, as Cacher objects cannot be pickled. 
+        from ...util.caching import Cacher
+        self.limit = state[0]
+        self._inan = state[1]
+        self._Y = Cacher(self._subarray_computations, self.limit)
+
    def _subarray_computations(self, Y):
        if self._inan is None:
            inan = np.isnan(Y)
@ -272,7 +283,11 @@ class VarDTCMissingData(LatentFunctionInference):
            else: beta = beta_all

            VVT_factor = (beta*y)
-            VVT_factor_all[v, ind].flat = VVT_factor.flat
+            try:
+                VVT_factor_all[v, ind].flat = VVT_factor.flat
+            except ValueError:
+                mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
+                VVT_factor_all.flat[mult] = VVT_factor
            output_dim = y.shape[1]

            psi0 = psi0_all[v]
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@ -134,7 +134,7 @@ class Add(CombinationKernel):
                    eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                else:
                    eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
-            target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
+            target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
        return target

    def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
--- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py
+++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py
@ -0,0 +1,170 @@
+"""
+The module for psi-statistics for RBF kernel
+"""
+
+import numpy as np
+from . import PSICOMP
+from GPy.util.caching import Cache_this
+from ....util.misc import param_to_array
+from scipy import weave
+from ....util.config import *
+
+class PSICOMP_RBF(PSICOMP):
+    
+    @Cache_this(limit=1, ignore_args=(0,))
+    def psicomputations(self, variance, lengthscale, Z, variational_posterior):
+        """
+        Z - MxQ
+        mu - NxQ
+        S - NxQ
+        gamma - NxQ
+        """
+        # here are the "statistics" for psi0, psi1 and psi2
+        # Produced intermediate results:
+        # _psi1                NxM
+        mu = variational_posterior.mean
+        S = variational_posterior.variance
+        
+        psi0 = np.empty(mu.shape[0])
+        psi0[:] = variance
+        psi1 = self._psi1computations(variance, lengthscale, Z, mu, S)
+        psi2 = self._psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
+        return psi0, psi1, psi2
+    
+    @Cache_this(limit=1, ignore_args=(0,))
+    def _psi1computations(self, variance, lengthscale, Z, mu, S):
+        """
+        Z - MxQ
+        mu - NxQ
+        S - NxQ
+        gamma - NxQ
+        """
+        # here are the "statistics" for psi1
+        # Produced intermediate results:
+        # _psi1                NxM
+    
+        lengthscale2 = np.square(lengthscale)
+    
+        # psi1
+        _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
+        _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
+        _psi1 = variance*np.exp(_psi1_log)
+            
+        return _psi1
+    
+    @Cache_this(limit=1, ignore_args=(0,))
+    def _psi2computations(self, variance, lengthscale, Z, mu, S):
+        """
+        Z - MxQ
+        mu - NxQ
+        S - NxQ
+        gamma - NxQ
+        """
+        # here are the "statistics" for psi2
+        # Produced intermediate results:
+        # _psi2                MxM
+        
+        lengthscale2 = np.square(lengthscale)
+        
+        _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
+        _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
+        Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
+        denom = 1./(2.*S+lengthscale2)
+        _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
+        _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
+        
+    
+        return _psi2
+    
+    @Cache_this(limit=1, ignore_args=(0,1,2,3))
+    def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
+        ARD = (len(lengthscale)!=1)
+        
+        dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
+        dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
+    
+        dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
+        
+        dL_dlengscale = dl_psi1 + dl_psi2
+        if not ARD:
+            dL_dlengscale = dL_dlengscale.sum()
+
+        dL_dmu = dmu_psi1 + dmu_psi2
+        dL_dS = dS_psi1 + dS_psi2
+        dL_dZ = dZ_psi1 + dZ_psi2
+        
+        return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
+    
+    def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S):
+        """
+        dL_dpsi1 - NxM
+        Z - MxQ
+        mu - NxQ
+        S - NxQ
+        gamma - NxQ
+        """
+        # here are the "statistics" for psi1
+        # Produced intermediate results: dL_dparams w.r.t. psi1
+        # _dL_dvariance     1
+        # _dL_dlengthscale  Q
+        # _dL_dZ            MxQ
+        # _dL_dgamma        NxQ
+        # _dL_dmu           NxQ
+        # _dL_dS            NxQ
+        
+        lengthscale2 = np.square(lengthscale)
+        
+        _psi1 = self._psi1computations(variance, lengthscale, Z, mu, S)
+        Lpsi1 = dL_dpsi1*_psi1
+        Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
+        denom = 1./(S+lengthscale2)
+        Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
+        _dL_dvar = Lpsi1.sum()/variance
+        _dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
+        _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
+        _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
+        _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
+        
+        return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
+    
+    def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S):
+        """
+        Z - MxQ
+        mu - NxQ
+        S - NxQ
+        gamma - NxQ
+        dL_dpsi2 - MxM
+        """
+        # here are the "statistics" for psi2
+        # Produced the derivatives w.r.t. psi2:
+        # _dL_dvariance      1
+        # _dL_dlengthscale   Q
+        # _dL_dZ             MxQ
+        # _dL_dgamma         NxQ
+        # _dL_dmu            NxQ
+        # _dL_dS             NxQ
+        
+        lengthscale2 = np.square(lengthscale)
+        denom = 1./(2*S+lengthscale2)
+        denom2 = np.square(denom)
+
+        _psi2 = self._psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
+        
+        Lpsi2 = dL_dpsi2[None,:,:]*_psi2
+        Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
+        Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
+        Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
+        Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
+        Lpsi2Zhat = Lpsi2Z
+        Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
+        
+        _dL_dvar = Lpsi2sum.sum()*2/variance
+        _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
+        _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
+        _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
+                 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
+        _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
+                                 (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
+    
+        return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
+    
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -180,7 +180,7 @@ class Stationary(Kern):
        return np.zeros(X.shape)

    def input_sensitivity(self):
-        return np.ones(self.input_dim)/self.lengthscale
+        return np.ones(self.input_dim)/self.lengthscale**2

 class Exponential(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -82,8 +82,8 @@ class BayesianGPLVM(SparseGP):
    def plot_latent(self, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=True, legend=True,
-                plot_limits=None,
-                aspect='auto', updates=False, **kwargs):
+                plot_limits=None, 
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from ..plotting.matplot_dep import dim_reduction_plots
@ -91,7 +91,7 @@ class BayesianGPLVM(SparseGP):
        return dim_reduction_plots.plot_latent(self, labels, which_indices,
                resolution, ax, marker, s,
                fignum, plot_inducing, legend,
-                plot_limits, aspect, updates, **kwargs)
+                plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)

    def do_test_latents(self, Y):
        """
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -9,16 +9,25 @@ from ..core import Model
 from ..kern import Kern
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 from ..core.parameterization import Param, Parameterized
+from ..core.parameterization.observable_array import ObsAr
 from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
+from ..inference.latent_function_inference import InferenceMethodList
 from ..likelihoods import Gaussian
-from GPy.util.initialization import initialize_latent
+from ..util.initialization import initialize_latent
+from ..core.sparse_gp import SparseGP, GP

-class MRD(Model):
+class MRD(SparseGP):
    """
+    !WARNING: This is bleeding edge code and still in development. 
+    Functionality may change fundamentally during development!
+
    Apply MRD to all given datasets Y in Ylist.

    Y_i in [n x p_i]

+    If Ylist is a dictionary, the keys of the dictionary are the names, and the
+    values are the different datasets to compare. 
+
    The samples n in the datasets need
    to match up, whereas the dimensionality p_d can differ.

@ -39,40 +48,71 @@ class MRD(Model):
    :param num_inducing: number of inducing inputs to use
    :param Z: initial inducing inputs
    :param kernel: list of kernels or kernel to copy for each output
-    :type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default)
-    :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use
-    :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use
+    :type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default)
+    :param :class:`~GPy.inference.latent_function_inference inference_method: 
+        InferenceMethodList of inferences, or one inference method for all
+    :param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use
    :param str name: the name of this model
    :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
    """
    def __init__(self, Ylist, input_dim, X=None, X_variance=None,
                 initx = 'PCA', initz = 'permute',
                 num_inducing=10, Z=None, kernel=None,
-                 inference_method=None, likelihood=None, name='mrd', Ynames=None):
-        super(MRD, self).__init__(name)
+                 inference_method=None, likelihoods=None, name='mrd', Ynames=None):
+        super(GP, self).__init__(name)

        self.input_dim = input_dim
        self.num_inducing = num_inducing

-        self.Ylist = Ylist
+        if isinstance(Ylist, dict):
+            Ynames, Ylist = zip(*Ylist.items())
+
+        self.Ylist = [ObsAr(Y) for Y in Ylist]
+
+        if Ynames is None:
+            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
+        self.names = Ynames
+        assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
+
+        if inference_method is None:
+            self.inference_method= InferenceMethodList()
+            warned = False
+            for y in Ylist:
+                inan = np.isnan(y)
+                if np.any(inan):
+                    if not warned:
+                        print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X"
+                        warned = True
+                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
+                else:
+                    self.inference_method.append(VarDTC(limit=1))
+        else:
+            if not isinstance(inference_method, InferenceMethodList):
+                inference_method = InferenceMethodList(inference_method)
+            self.inference_method = inference_method
+
+
        self._in_init_ = True
-        X, fracs = self._init_X(initx, Ylist)
+        if X is None:
+            X, fracs = self._init_X(initx, Ylist)
+        else:
+            fracs = [X.var(0)]*len(Ylist)
        self.Z = Param('inducing inputs', self._init_Z(initz, X))
        self.num_inducing = self.Z.shape[0] # ensure M==N if M>N

        # sort out the kernels
        if kernel is None:
            from ..kern import RBF
-            self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
+            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
-            self.kern = []
+            self.kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
-                self.kern.append(k)
+                self.kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
            assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
-            self.kern = kernel
+            self.kernels = kernel

        if X_variance is None:
            X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -80,32 +120,27 @@ class MRD(Model):
        self.variational_prior = NormalPrior()
        self.X = NormalPosterior(X, X_variance)

-        if likelihood is None:
-            self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
-        else: self.likelihood = likelihood
-
-        if inference_method is None:
-            self.inference_method= []
-            for y in Ylist:
-                if np.any(np.isnan(y)):
-                    self.inference_method.append(VarDTCMissingData(limit=1))
-                else:
-                    self.inference_method.append(VarDTC(limit=1))
-        else:
-            self.inference_method = inference_method
-            self.inference_method.set_limit(len(Ylist))
+        if likelihoods is None:
+            self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
+        else: self.likelihoods = likelihoods

        self.add_parameters(self.X, self.Z)

-        if Ynames is None:
-            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
+        self.bgplvms = []
+        self.num_data = Ylist[0].shape[0]
+
+        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
+            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"

-        for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood):
            p = Parameterized(name=n)
            p.add_parameter(k)
+            p.kern = k
            p.add_parameter(l)
-            setattr(self, 'Y{}'.format(i), p)
+            p.likelihood = l
            self.add_parameter(p)
+            self.bgplvms.append(p)
+
+        self.posterior = None
        self._in_init_ = False

    def parameters_changed(self):
@ -114,13 +149,13 @@ class MRD(Model):
        self.Z.gradient[:] = 0.
        self.X.gradient[:] = 0.

-        for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
+        for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
            posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)

            self.posteriors.append(posterior)
            self._log_marginal_likelihood += lml

-            # likelihood gradients
+            # likelihoods gradients
            l.update_gradients(grad_dict.pop('dL_dthetaL'))

            #gradients wrt kernel
@ -133,13 +168,20 @@ class MRD(Model):
            #gradients wrt Z
            self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
            self.Z.gradient += k.gradients_Z_expectations(
-                               grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
+                               grad_dict['dL_dpsi0'], 
+                               grad_dict['dL_dpsi1'], 
+                               grad_dict['dL_dpsi2'], 
+                               Z=self.Z, variational_posterior=self.X)

            dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
            self.X.mean.gradient += dL_dmean
            self.X.variance.gradient += dL_dS

        # update for the KL divergence
+        self.posterior = self.posteriors[0]
+        self.kern = self.kernels[0]
+        self.likelihood = self.likelihoods[0]
+
        self.variational_prior.update_gradients_KL(self.X)
        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)

@ -151,7 +193,7 @@ class MRD(Model):
            Ylist = self.Ylist
        if init in "PCA_concat":
            X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
-            fracs = [fracs]*self.input_dim
+            fracs = [fracs]*len(Ylist)
        elif init in "PCA_single":
            X = np.zeros((Ylist[0].shape[0], self.input_dim))
            fracs = []
@ -162,7 +204,7 @@ class MRD(Model):
        else: # init == 'random':
            X = np.random.randn(Ylist[0].shape[0], self.input_dim)
            fracs = X.var(0)
-            fracs = [fracs]*self.input_dim
+            fracs = [fracs]*len(Ylist)
        X -= X.mean()
        X /= X.std()
        return X, fracs
@ -181,6 +223,7 @@ class MRD(Model):
            fig = pylab.figure(num=fignum)
        sharex_ax = None
        sharey_ax = None
+        plots = []
        for i, g in enumerate(self.bgplvms):
            try:
                if sharex:
@ -197,26 +240,36 @@ class MRD(Model):
                ax = axes[i]
            else:
                raise ValueError("Need one axes per latent dimension input_dim")
-            plotf(i, g, ax)
+            plots.append(plotf(i, g, ax))
            if sharey_ax is not None:
                pylab.setp(ax.get_yticklabels(), visible=False)
        pylab.draw()
        if axes is None:
-            fig.tight_layout()
-            return fig
-        else:
-            return pylab.gcf()
+            try:
+                fig.tight_layout()
+            except:
+                pass
+        return plots

-    def plot_X(self, fignum=None, ax=None):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
-        return fig
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0):
+        """
+        Prediction for data set Yindex[default=0].
+        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
+        """
+        self.posterior = self.posteriors[Yindex]
+        self.kern = self.kernels[Yindex]
+        self.likelihood = self.likelihoods[Yindex]
+        return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)

-    def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
-        fig = self._handle_plotting(fignum,
-                                    ax,
-                                    lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
-                                    sharex=sharex, sharey=sharey)
-        return fig
+    #===============================================================================
+    # TODO: Predict! Maybe even change to several bgplvms, which share an X?
+    #===============================================================================
+    #     def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
+    #         fig = self._handle_plotting(fignum,
+    #                                     ax,
+    #                                     lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs),
+    #                                     sharex=sharex, sharey=sharey)
+    #         return fig

    def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
        """
@ -228,28 +281,58 @@ class MRD(Model):
        """
        if titles is None:
            titles = [r'${}$'.format(name) for name in self.names]
-        ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms])
+        ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
        def plotf(i, g, ax):
            ax.set_ylim([0,ymax])
-            g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
+            return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
        fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
        return fig

-    def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
-        return fig
+    def plot_latent(self, labels=None, which_indices=None,
+                resolution=50, ax=None, marker='o', s=40,
+                fignum=None, plot_inducing=True, legend=True,
+                plot_limits=None, 
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
+        """
+        see plotting.matplot_dep.dim_reduction_plots.plot_latent
+        if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give
+        predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'.
+        """
+        import sys
+        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from ..plotting.matplot_dep import dim_reduction_plots
+        if "Yindex" not in predict_kwargs:
+            predict_kwargs['Yindex'] = 0
+        if ax is None:
+            fig = pylab.figure(num=fignum)
+            ax = fig.add_subplot(111)
+        else:
+            fig = ax.figure
+        plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
+                                        resolution, ax, marker, s,
+                                        fignum, plot_inducing, legend,
+                                        plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
+        ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name)
+        try:
+            fig.tight_layout()
+        except:
+            pass

-    def _debug_plot(self):
-        self.plot_X_1d()
-        fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
-        fig.clf()
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
-        self.plot_X(ax=axes)
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
-        self.plot_latent(ax=axes)
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
-        self.plot_scales(ax=axes)
-        pylab.draw()
-        fig.tight_layout()
+        return plot

+    def __getstate__(self):
+        # TODO:
+        import copy
+        state = copy.copy(self.__dict__)
+        del state['kernels']
+        del state['kern']
+        del state['likelihood']
+        return state

+    def __setstate__(self, state):
+        # TODO:
+        super(MRD, self).__setstate__(state)
+        self.kernels = [p.kern for p in self.bgplvms]
+        self.kern = self.kernels[0]
+        self.likelihood = self.likelihoods[0]
+        self.parameters_changed()
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -31,7 +31,7 @@ def plot_latent(model, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=False, legend=True,
                plot_limits=None, 
-                aspect='auto', updates=False, **kwargs):
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
    """
    :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
    :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -60,7 +60,7 @@ def plot_latent(model, labels=None, which_indices=None,
    def plot_function(x):
        Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
        Xtest_full[:, [input_1, input_2]] = x
-        _, var = model.predict(Xtest_full)
+        _, var = model.predict(Xtest_full, **predict_kwargs)
        var = var[:, :1]
        return np.log(var)

@ -81,7 +81,7 @@ def plot_latent(model, labels=None, which_indices=None,
    view = ImshowController(ax, plot_function,
                            (xmin, ymin, xmax, ymax),
                            resolution, aspect=aspect, interpolation='bilinear',
-                            cmap=pb.cm.binary, **kwargs)
+                            cmap=pb.cm.binary, **imshow_kwargs)

    # make sure labels are in order of input:
    ulabels = []
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',

        for d in which_data_ycols:
            plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
-            plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
+            if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)

        #optionally plot some samples
        if samples: #NOTE not tested with fixed_inputs
@ -151,7 +151,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        for d in which_data_ycols:
            m_d = m[:,d].reshape(resolution, resolution).T
            plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
-            plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
+            if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)

        #set the limits of the plot to some sensible values
        ax.set_xlim(xmin[0], xmax[0])
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -88,7 +88,6 @@ class vector_show(matplotlib_show):


 class lvm(matplotlib_show):
-    
    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1], disable_drag=False):
        """Visualize a latent variable model

@ -150,7 +149,6 @@ class lvm(matplotlib_show):
        pass

    def on_click(self, event):
-        print 'click!'
        if event.inaxes!=self.latent_axes: return
        if self.disable_drag:
            self.move_on = True
@ -228,11 +226,10 @@ class lvm_dimselect(lvm):
        self.labels = labels
        lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
        self.show_sensitivities()
-        print "use left and right mouse butons to select dimensions"
+        print "use left and right mouse buttons to select dimensions"


    def on_click(self, event):
-
        if event.inaxes==self.sense_axes:
            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
            if event.button == 1:
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@ -4,7 +4,8 @@ Created on 13 Mar 2014
@author: maxz
 '''
 import unittest, itertools
-import cPickle as pickle
+#import cPickle as pickle
+import pickle
 import numpy as np
 from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
    ParameterIndexOperationsView
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'):
        import bz2
    except ImportError as i:
        raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
-    if not data_available(data_set):
-        download_data(data_set)
+
    dirpath = os.path.join(data_path,'hapmap3')
    hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
+    unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
+    unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))
+
+    if not unpacked_files_exist and not data_available(data_set):
+        download_data(data_set)
+
    preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
                               ['.snps.pickle',
                                '.info.pickle',
                                '.nan.pickle']]
+
    if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
        if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
                            "of memory and can take a (very) long time, continue? [Y/n]"):
@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'):
                                                               perc="="*int(20.*progress/100.))
            stdout.write(status); stdout.flush()
            return status
-        unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
-        if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
+        if not unpacked_files_exist:
            status=write_status('unpacking...', 0, '')
            curr = 0
            for newfilepath in unpacked_files:
@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'):
                            status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
                curr += 12
                status=write_status('unpacking...', curr, status)
+                os.remove(filepath)
        status=write_status('reading .ped...', 25, status)
        # Preprocess data:    
        snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'):
 def singlecell(data_set='singlecell'):
    if not data_available(data_set):
        download_data(data_set)
-    
+
    from pandas import read_csv
    dirpath = os.path.join(data_path, data_set)
    filename = os.path.join(dirpath, 'singlecell.csv')
--- a/GPy/util/pca.py
+++ b/GPy/util/pca.py
@ -106,12 +106,14 @@ class pca(object):
                ulabels.append(lab)
        nlabels = len(ulabels)
        if colors is None:
-            colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
+            colors = iter([cmap(float(i) / nlabels) for i in range(nlabels)])
+        else:
+            colors = iter(colors)
        X_ = self.project(X, self.Q)[:,dimensions]
        kwargs.update(dict(s=s))
        plots = list()
        for i, l in enumerate(ulabels):
-            kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
+            kwargs.update(dict(color=colors.next(), marker=marker[i % len(marker)]))
            plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
        ax.set_xlabel(r"PC$_1$")
        ax.set_ylabel(r"PC$_2$")
--- a/GPy/util/subarray_and_sorting.py
+++ b/GPy/util/subarray_and_sorting.py
@ -4,7 +4,7 @@
 .. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>

 '''
-__updated__ = '2013-12-02'
+__updated__ = '2014-05-20'

 import numpy as np