From f820e3b0bff71e41f0b3fdf3f887de538a7330d0 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Thu, 15 May 2014 18:47:20 +0100
Subject: [PATCH 01/15] [examples] stick demo

---
 GPy/examples/dimensionality_reduction.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index a15c2a93..a783cfaf 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -496,7 +496,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
         data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
         GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
         plt.draw()
-        #raw_input('Press enter to finish')
+        plt.show()
+        raw_input('Press enter to finish')
 
     return m
 

From efc1f4413c11ce9f2e2880721f5e73a7e53f8641 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 16 May 2014 11:20:50 +0100
Subject: [PATCH 02/15] [ploting] dim reduction

---
 GPy/plotting/matplot_dep/dim_reduction_plots.py | 6 +++---
 GPy/plotting/matplot_dep/models_plots.py        | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index f8413671..0ba082df 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -31,7 +31,7 @@ def plot_latent(model, labels=None, which_indices=None,
                 resolution=50, ax=None, marker='o', s=40,
                 fignum=None, plot_inducing=False, legend=True,
                 plot_limits=None, 
-                aspect='auto', updates=False, **kwargs):
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
     """
     :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
     :param resolution: the resolution of the grid on which to evaluate the predictive variance
@@ -60,7 +60,7 @@ def plot_latent(model, labels=None, which_indices=None,
     def plot_function(x):
         Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
         Xtest_full[:, [input_1, input_2]] = x
-        _, var = model.predict(Xtest_full)
+        _, var = model.predict(Xtest_full, **predict_kwargs)
         var = var[:, :1]
         return np.log(var)
 
@@ -81,7 +81,7 @@ def plot_latent(model, labels=None, which_indices=None,
     view = ImshowController(ax, plot_function,
                             (xmin, ymin, xmax, ymax),
                             resolution, aspect=aspect, interpolation='bilinear',
-                            cmap=pb.cm.binary, **kwargs)
+                            cmap=pb.cm.binary, **imshow_kwargs)
 
     # make sure labels are in order of input:
     ulabels = []
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index 84747d05..8f3e55b0 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
 
         for d in which_data_ycols:
             plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
-            plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
+            if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
 
         #optionally plot some samples
         if samples: #NOTE not tested with fixed_inputs
@@ -151,7 +151,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
         for d in which_data_ycols:
             m_d = m[:,d].reshape(resolution, resolution).T
             plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
-            plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
+            if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
 
         #set the limits of the plot to some sensible values
         ax.set_xlim(xmin[0], xmax[0])

From 94c84a23a3d5c9e1ed93b40e5848a83cf791b83f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 16 May 2014 11:21:08 +0100
Subject: [PATCH 03/15] [mrd] plotting, init, inference etc.

---
 GPy/core/gp.py                                |  82 ++++++++---
 GPy/core/parameterization/parameterized.py    |  19 ++-
 GPy/core/sparse_gp.py                         |   6 +-
 GPy/examples/dimensionality_reduction.py      |  43 +++++-
 .../latent_function_inference/__init__.py     |  19 +++
 GPy/models/bayesian_gplvm.py                  |   4 +-
 GPy/models/mrd.py                             | 132 +++++++++++++-----
 GPy/testing/pickle_tests.py                   |   3 +-
 8 files changed, 236 insertions(+), 72 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 29d9032a..ca2583e0 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -180,40 +180,80 @@ class GP(Model):
 
         return Ysim
 
-    def plot_f(self, *args, **kwargs):
+    def plot_f(self, plot_limits=None, which_data_rows='all',
+        which_data_ycols='all', fixed_inputs=[],
+        levels=20, samples=0, fignum=None, ax=None, resolution=None,
+        plot_raw=True,
+        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
         """
-
-        Plot the GP's view of the world, where the data is normalized and
-        before applying a likelihood.
-
-        This is a convenience function: arguments are passed to
-        GPy.plotting.matplot_dep.models_plots.plot_f_fit
-
+        Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
+        This is a call to plot with plot_raw=True.
+        Data will not be plotted in this, as the GP's view of the world
+        may live in another space, or units then the data.
         """
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import models_plots
-        return models_plots.plot_fit_f(self,*args,**kwargs)
+        kw = {}
+        if linecol is not None:
+            kw['linecol'] = linecol
+        if fillcol is not None:
+            kw['fillcol'] = fillcol
+        return models_plots.plot_fit(self, plot_limits, which_data_rows,
+                                     which_data_ycols, fixed_inputs, 
+                                     levels, samples, fignum, ax, resolution, 
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     data_symbol=data_symbol, **kw)
 
-    def plot(self, *args, **kwargs):
+    def plot(self, plot_limits=None, which_data_rows='all',
+        which_data_ycols='all', fixed_inputs=[],
+        levels=20, samples=0, fignum=None, ax=None, resolution=None,
+        plot_raw=False,
+        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
         """
         Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region
-            identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted
-            function
-          - In higher dimensions, use fixed_inputs to plot the GP  with some of
-            the inputs fixed.
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - In higher dimensions, use fixed_inputs to plot the GP  with some of the inputs fixed.
 
         Can plot only part of the data and part of the posterior functions
-        using which_data_rows which_data_ycols and which_parts
-
-        This is a convenience function: arguments are passed to
-        GPy.plotting.matplot_dep.models_plots.plot_fit
+        using which_data_rowsm which_data_ycols.
 
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :type plot_limits: np.array
+        :param which_data_rows: which of the training data to plot (default all)
+        :type which_data_rows: 'all' or a slice object to slice model.X, model.Y
+        :param which_data_ycols: when the data has several columns (independant outputs), only plot these
+        :type which_data_rows: 'all' or a list of integers
+        :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
+        :type fixed_inputs: a list of tuples
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param levels: number of levels to plot in a contour plot.
+        :type levels: int
+        :param samples: the number of a posteriori samples to plot
+        :type samples: int
+        :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle
+        :type output: integer (first output is 0)
+        :param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
+        :type linecol:
+        :param fillcol: color of fill [Tango.colorsHex['lightBlue']]
+        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
         """
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import models_plots
-        return models_plots.plot_fit(self,*args,**kwargs)
+        kw = {}
+        if linecol is not None:
+            kw['linecol'] = linecol
+        if fillcol is not None:
+            kw['fillcol'] = fillcol
+        return models_plots.plot_fit(self, plot_limits, which_data_rows,
+                                     which_data_ycols, fixed_inputs, 
+                                     levels, samples, fignum, ax, resolution, 
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     data_symbol=data_symbol, **kw)
 
     def input_sensitivity(self):
         """
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index cfb2171d..48eb2ddc 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -272,8 +272,11 @@ class Parameterized(Parameterizable):
     def __setattr__(self, name, val):
         # override the default behaviour, if setting a param, so broadcasting can by used
         if hasattr(self, "parameters"):
-            pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
-            if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+            try:
+                pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
+                if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+            except AttributeError:
+                pass
         object.__setattr__(self, name, val);
 
     #===========================================================================
@@ -281,11 +284,15 @@ class Parameterized(Parameterizable):
     #===========================================================================
     def __setstate__(self, state):
         super(Parameterized, self).__setstate__(state)
-        self._connect_parameters()
-        self._connect_fixes()
-        self._notify_parent_change()
+        try:
+            self._connect_parameters()
+            self._connect_fixes()
+            self._notify_parent_change()
+    
+            self.parameters_changed()
+        except Exception as e:
+            print "WARNING: caught exception {!s}, trying to continue".format(e)
 
-        self.parameters_changed()
     def copy(self):
         c = super(Parameterized, self).copy()
         c._connect_parameters()
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 388b682a..ac752c36 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -66,7 +66,11 @@ class SparseGP(GP):
             #gradients wrt Z
             self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
             self.Z.gradient += self.kern.gradients_Z_expectations(
-                               self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
+                               self.grad_dict['dL_dpsi0'], 
+                               self.grad_dict['dL_dpsi1'], 
+                               self.grad_dict['dL_dpsi2'], 
+                               Z=self.Z, 
+                               variational_posterior=self.X)
         else:
             #gradients wrt kernel
             self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 8acf5489..2a319e8a 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -303,9 +303,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
     k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
 
     inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
-    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
-    m.inference_method = VarDTCMissingData()
-    m.Y[inan] = _np.nan
+    Y[inan] = _np.nan
+
+    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, 
+                      inference_method=VarDTCMissingData(inan=inan), kernel=k)
+
     m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
     m.likelihood.variance = .01
     m.parameters_changed()
@@ -338,7 +340,40 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
         print "Optimizing Model:"
         m.optimize(messages=verbose, max_iters=8e3, gtol=.1)
     if plot:
-        m.plot_X_1d("MRD Latent Space 1D")
+        m.X.plot("MRD Latent Space 1D")
+        m.plot_scales("MRD Scales")
+    return m
+
+def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
+    from GPy import kern
+    from GPy.models import MRD
+    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
+
+    D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
+    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
+
+    #Ylist = [Ylist[0]]
+    k = kern.Linear(Q, ARD=True)
+    inanlist = []
+
+    for Y in Ylist:
+        inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
+        inanlist.append(inan)
+        Y[inan] = _np.nan
+
+    imlist = []
+    for inan in inanlist:
+        imlist.append(VarDTCMissingData(limit=1, inan=inan))
+
+    m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, 
+            kernel=k, inference_method=imlist,
+            initx="random", initz='permute', **kw)
+
+    if optimize:
+        print "Optimizing Model:"
+        m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
+    if plot:
+        m.X.plot("MRD Latent Space 1D")
         m.plot_scales("MRD Scales")
     return m
 
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index 878c1e4f..8589a60e 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -38,6 +38,25 @@ class LatentFunctionInference(object):
         """
         pass
 
+class InferenceMethodList(LatentFunctionInference, list):
+
+    def on_optimization_start(self):
+        for inf in self:
+            inf.on_optimization_start()
+
+    def on_optimization_end(self):
+        for inf in self:
+            inf.on_optimization_end()
+    
+    def __getstate__(self):
+        state = []
+        for inf in self:
+            state.append(inf)
+        return state
+    
+    def __setstate__(self, state):
+        for inf in state:
+            self.append(inf)
 
 from exact_gaussian_inference import ExactGaussianInference
 from laplace import Laplace
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 2bcbe0b2..26f4cd53 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -83,7 +83,7 @@ class BayesianGPLVM(SparseGP):
                 resolution=50, ax=None, marker='o', s=40,
                 fignum=None, plot_inducing=True, legend=True,
                 plot_limits=None, 
-                aspect='auto', updates=False, **kwargs):
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
         import sys
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import dim_reduction_plots
@@ -91,7 +91,7 @@ class BayesianGPLVM(SparseGP):
         return dim_reduction_plots.plot_latent(self, labels, which_indices,
                 resolution, ax, marker, s,
                 fignum, plot_inducing, legend,
-                plot_limits, aspect, updates, **kwargs)
+                plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
 
     def do_test_latents(self, Y):
         """
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 458a70a1..73b267ba 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -11,9 +11,11 @@ from ..core.parameterization.variational import NormalPosterior, NormalPrior
 from ..core.parameterization import Param, Parameterized
 from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
 from ..likelihoods import Gaussian
-from GPy.util.initialization import initialize_latent
+from ..util.initialization import initialize_latent
+from ..core.sparse_gp import SparseGP, GP
+from ..inference.latent_function_inference import InferenceMethodList
 
-class MRD(Model):
+class MRD(SparseGP):
     """
     Apply MRD to all given datasets Y in Ylist.
 
@@ -39,17 +41,18 @@ class MRD(Model):
     :param num_inducing: number of inducing inputs to use
     :param Z: initial inducing inputs
     :param kernel: list of kernels or kernel to copy for each output
-    :type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default)
-    :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use
-    :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use
+    :type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default)
+    :param :class:`~GPy.inference.latent_function_inference inference_method: 
+        InferenceMethodList of inferences, or one inference method for all
+    :param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use
     :param str name: the name of this model
     :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
     """
     def __init__(self, Ylist, input_dim, X=None, X_variance=None,
                  initx = 'PCA', initz = 'permute',
                  num_inducing=10, Z=None, kernel=None,
-                 inference_method=None, likelihood=None, name='mrd', Ynames=None):
-        super(MRD, self).__init__(name)
+                 inference_method=None, likelihoods=None, name='mrd', Ynames=None):
+        super(GP, self).__init__(name)
 
         self.input_dim = input_dim
         self.num_inducing = num_inducing
@@ -63,16 +66,16 @@ class MRD(Model):
         # sort out the kernels
         if kernel is None:
             from ..kern import RBF
-            self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
+            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
         elif isinstance(kernel, Kern):
-            self.kern = []
+            self.kernels = []
             for i in range(len(Ylist)):
                 k = kernel.copy()
-                self.kern.append(k)
+                self.kernels.append(k)
         else:
             assert len(kernel) == len(Ylist), "need one kernel per output"
             assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
-            self.kern = kernel
+            self.kernels = kernel
 
         if X_variance is None:
             X_variance = np.random.uniform(0.1, 0.2, X.shape)
@@ -80,32 +83,44 @@ class MRD(Model):
         self.variational_prior = NormalPrior()
         self.X = NormalPosterior(X, X_variance)
 
-        if likelihood is None:
-            self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
-        else: self.likelihood = likelihood
+        if likelihoods is None:
+            self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
+        else: self.likelihoods = likelihoods
 
         if inference_method is None:
-            self.inference_method= []
+            self.inference_method= InferenceMethodList()
             for y in Ylist:
-                if np.any(np.isnan(y)):
-                    self.inference_method.append(VarDTCMissingData(limit=1))
+                inan = np.isnan(y)
+                if np.any(inan):
+                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
                 else:
                     self.inference_method.append(VarDTC(limit=1))
         else:
+            if not isinstance(inference_method, InferenceMethodList):
+                inference_method = InferenceMethodList(inference_method)
             self.inference_method = inference_method
-            self.inference_method.set_limit(len(Ylist))
 
         self.add_parameters(self.X, self.Z)
 
         if Ynames is None:
             Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
+        self.names = Ynames
+
+        self.bgplvms = []
+        self.num_data = Ylist[0].shape[0]
+
+        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
+            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
 
-        for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood):
             p = Parameterized(name=n)
             p.add_parameter(k)
+            p.kern = k
             p.add_parameter(l)
-            setattr(self, 'Y{}'.format(i), p)
+            p.likelihood = l
             self.add_parameter(p)
+            self.bgplvms.append(p)
+
+        self.posterior = None
         self._in_init_ = False
 
     def parameters_changed(self):
@@ -114,13 +129,13 @@ class MRD(Model):
         self.Z.gradient[:] = 0.
         self.X.gradient[:] = 0.
 
-        for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
+        for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
             posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
 
             self.posteriors.append(posterior)
             self._log_marginal_likelihood += lml
 
-            # likelihood gradients
+            # likelihoods gradients
             l.update_gradients(grad_dict.pop('dL_dthetaL'))
 
             #gradients wrt kernel
@@ -133,13 +148,20 @@ class MRD(Model):
             #gradients wrt Z
             self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
             self.Z.gradient += k.gradients_Z_expectations(
-                               grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
+                               grad_dict['dL_dpsi0'], 
+                               grad_dict['dL_dpsi1'], 
+                               grad_dict['dL_dpsi2'], 
+                               Z=self.Z, variational_posterior=self.X)
 
             dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
             self.X.mean.gradient += dL_dmean
             self.X.variance.gradient += dL_dS
 
         # update for the KL divergence
+        self.posterior = self.posteriors[0]
+        self.kern = self.kernels[0]
+        self.likelihood = self.likelihoods[0]
+
         self.variational_prior.update_gradients_KL(self.X)
         self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
 
@@ -207,16 +229,25 @@ class MRD(Model):
         else:
             return pylab.gcf()
 
-    def plot_X(self, fignum=None, ax=None):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
-        return fig
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0):
+        """
+        Prediction for data set Yindex[default=0].
+        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
+        """
+        self.posterior = self.posteriors[Yindex]
+        self.kern = self.kernels[Yindex]
+        self.likelihood = self.likelihoods[Yindex]
+        return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
 
-    def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
-        fig = self._handle_plotting(fignum,
-                                    ax,
-                                    lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
-                                    sharex=sharex, sharey=sharey)
-        return fig
+    #===============================================================================
+    # TODO: Predict! Maybe even change to several bgplvms, which share an X?
+    #===============================================================================
+    #     def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
+    #         fig = self._handle_plotting(fignum,
+    #                                     ax,
+    #                                     lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs),
+    #                                     sharex=sharex, sharey=sharey)
+    #         return fig
 
     def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
         """
@@ -228,16 +259,28 @@ class MRD(Model):
         """
         if titles is None:
             titles = [r'${}$'.format(name) for name in self.names]
-        ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms])
+        ymax = reduce(max, [np.ceil(max(g.kernels.input_sensitivity())) for g in self.bgplvms])
         def plotf(i, g, ax):
             ax.set_ylim([0,ymax])
-            g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
+            g.kernels.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
         fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
         return fig
 
-    def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
-        return fig
+    def plot_latent(self, labels=None, which_indices=None,
+                resolution=50, ax=None, marker='o', s=40,
+                fignum=None, plot_inducing=True, legend=True,
+                plot_limits=None, 
+                aspect='auto', updates=False, predict_kwargs=dict(Yindex=0), imshow_kwargs={}):
+        """
+        """
+        import sys
+        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from ..plotting.matplot_dep import dim_reduction_plots
+
+        return dim_reduction_plots.plot_latent(self, labels, which_indices,
+                resolution, ax, marker, s,
+                fignum, plot_inducing, legend,
+                plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
 
     def _debug_plot(self):
         self.plot_X_1d()
@@ -252,4 +295,19 @@ class MRD(Model):
         pylab.draw()
         fig.tight_layout()
 
+    def __getstate__(self):
+        # TODO:
+        import copy
+        state = copy.copy(self.__dict__)
+        del state['kernels']
+        del state['kern']
+        del state['likelihood']
+        return state
 
+    def __setstate__(self, state):
+        # TODO:
+        super(MRD, self).__setstate__(state)
+        self.kernels = [p.kern for p in self.bgplvms]
+        self.kern = self.kernels[0]
+        self.likelihood = self.likelihoods[0]
+        self.parameters_changed()
\ No newline at end of file
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index 94504645..13a67744 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -4,7 +4,8 @@ Created on 13 Mar 2014
 @author: maxz
 '''
 import unittest, itertools
-import cPickle as pickle
+#import cPickle as pickle
+import pickle
 import numpy as np
 from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
     ParameterIndexOperationsView

From 01d6b91f9079dfc4aab01c6531d5e9ff4a6b326e Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 16 May 2014 15:12:19 +0100
Subject: [PATCH 04/15] [mrd] missing data implemented, and plotting better

---
 GPy/core/parameterization/parameterized.py    |  1 -
 .../latent_function_inference/var_dtc.py      | 11 ++++
 GPy/models/mrd.py                             | 55 ++++++++++---------
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 48eb2ddc..dd9a07c4 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -288,7 +288,6 @@ class Parameterized(Parameterizable):
             self._connect_parameters()
             self._connect_fixes()
             self._notify_parent_change()
-    
             self.parameters_changed()
         except Exception as e:
             print "WARNING: caught exception {!s}, trying to continue".format(e)
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 3043a7e8..b5e6787b 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -202,6 +202,17 @@ class VarDTCMissingData(LatentFunctionInference):
     def set_limit(self, limit):
         self._Y.limit = limit
 
+    def __getstate__(self):
+        # has to be overridden, as Cacher objects cannot be pickled. 
+        return self._Y.limit, self._inan
+
+    def __setstate__(self, state):
+        # has to be overridden, as Cacher objects cannot be pickled. 
+        from ...util.caching import Cacher
+        self.limit = state[0]
+        self._inan = state[1]
+        self._Y = Cacher(self._subarray_computations, self.limit)
+
     def _subarray_computations(self, Y):
         if self._inan is None:
             inan = np.isnan(Y)
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 73b267ba..c8067c01 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -203,6 +203,7 @@ class MRD(SparseGP):
             fig = pylab.figure(num=fignum)
         sharex_ax = None
         sharey_ax = None
+        plots = []
         for i, g in enumerate(self.bgplvms):
             try:
                 if sharex:
@@ -219,15 +220,16 @@ class MRD(SparseGP):
                 ax = axes[i]
             else:
                 raise ValueError("Need one axes per latent dimension input_dim")
-            plotf(i, g, ax)
+            plots.append(plotf(i, g, ax))
             if sharey_ax is not None:
                 pylab.setp(ax.get_yticklabels(), visible=False)
         pylab.draw()
         if axes is None:
-            fig.tight_layout()
-            return fig
-        else:
-            return pylab.gcf()
+            try:
+                fig.tight_layout()
+            except:
+                pass
+        return plots
 
     def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0):
         """
@@ -259,10 +261,10 @@ class MRD(SparseGP):
         """
         if titles is None:
             titles = [r'${}$'.format(name) for name in self.names]
-        ymax = reduce(max, [np.ceil(max(g.kernels.input_sensitivity())) for g in self.bgplvms])
+        ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
         def plotf(i, g, ax):
             ax.set_ylim([0,ymax])
-            g.kernels.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
+            return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
         fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
         return fig
 
@@ -270,30 +272,33 @@ class MRD(SparseGP):
                 resolution=50, ax=None, marker='o', s=40,
                 fignum=None, plot_inducing=True, legend=True,
                 plot_limits=None, 
-                aspect='auto', updates=False, predict_kwargs=dict(Yindex=0), imshow_kwargs={}):
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
         """
+        see plotting.matplot_dep.dim_reduction_plots.plot_latent
+        if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give
+        predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'.
         """
         import sys
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import dim_reduction_plots
+        if "Yindex" not in predict_kwargs:
+            predict_kwargs['Yindex'] = 0
+        if ax is None:
+            fig = pylab.figure(num=fignum)
+            ax = fig.add_subplot(111)
+        else:
+            fig = ax.figure
+        plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
+                                        resolution, ax, marker, s,
+                                        fignum, plot_inducing, legend,
+                                        plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
+        ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name)
+        try:
+            fig.tight_layout()
+        except:
+            pass
 
-        return dim_reduction_plots.plot_latent(self, labels, which_indices,
-                resolution, ax, marker, s,
-                fignum, plot_inducing, legend,
-                plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
-
-    def _debug_plot(self):
-        self.plot_X_1d()
-        fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
-        fig.clf()
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
-        self.plot_X(ax=axes)
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
-        self.plot_latent(ax=axes)
-        axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
-        self.plot_scales(ax=axes)
-        pylab.draw()
-        fig.tight_layout()
+        return plot
 
     def __getstate__(self):
         # TODO:

From dafad623637e3d0d8ef797bd5ab17f861b52fcd3 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:43:18 +0100
Subject: [PATCH 05/15] [lmv_dimselect] we need to keep a pointer to the
 lvm_dimselect object, as the updates are weak references: dim_select = ...

---
 GPy/examples/dimensionality_reduction.py | 14 +++++++-------
 GPy/plotting/matplot_dep/visualize.py    |  5 +----
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 2a319e8a..a216eec6 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -518,21 +518,21 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
     Q = 6
     kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) 
     m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
-    
+
     m.data = data
     m.likelihood.variance = 0.001
-    
+
     # optimize
-    if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
+    if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
     if plot:
-        plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
+        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
         plt.sca(latent_axes)
         m.plot_latent(ax=latent_axes)
         y = m.Y[:1, :].copy()
         data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
-        GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
-        plt.draw()
-        plt.show()
+        dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
+        fig.canvas.draw()
+        fig.canvas.show()
         raw_input('Press enter to finish')
 
     return m
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index d7884730..3ba120aa 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -88,7 +88,6 @@ class vector_show(matplotlib_show):
 
 
 class lvm(matplotlib_show):
-    
     def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
         """Visualize a latent variable model
 
@@ -147,7 +146,6 @@ class lvm(matplotlib_show):
         pass
 
     def on_click(self, event):
-        print 'click!'
         if event.inaxes!=self.latent_axes: return
         self.move_on = not self.move_on
         self.called = True
@@ -220,11 +218,10 @@ class lvm_dimselect(lvm):
         self.labels = labels
         lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
         self.show_sensitivities()
-        print "use left and right mouse butons to select dimensions"
+        print "use left and right mouse buttons to select dimensions"
 
 
     def on_click(self, event):
-
         if event.inaxes==self.sense_axes:
             new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
             if event.button == 1:

From dfa23b77c51d6962ec2456f5b0b51da318f834d7 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:43:58 +0100
Subject: [PATCH 06/15] [posteriot] adjusted for more then one covariance per
 output

---
 GPy/inference/latent_function_inference/posterior.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index 309cb7e5..70b0e0ea 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -95,7 +95,7 @@ class Posterior(object):
         """
         if self._covariance is None:
             #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
-            self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
+            self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
             #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
         return self._covariance
 

From 8c8d06c8aec83c44b86e08c95b641c8d80213660 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:44:59 +0100
Subject: [PATCH 07/15] [pca] colors now as iterator

---
 GPy/util/pca.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/GPy/util/pca.py b/GPy/util/pca.py
index 6c548b3d..967d0e1b 100644
--- a/GPy/util/pca.py
+++ b/GPy/util/pca.py
@@ -106,12 +106,14 @@ class pca(object):
                 ulabels.append(lab)
         nlabels = len(ulabels)
         if colors is None:
-            colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
+            colors = iter([cmap(float(i) / nlabels) for i in range(nlabels)])
+        else:
+            colors = iter(colors)
         X_ = self.project(X, self.Q)[:,dimensions]
         kwargs.update(dict(s=s))
         plots = list()
         for i, l in enumerate(ulabels):
-            kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
+            kwargs.update(dict(color=colors.next(), marker=marker[i % len(marker)]))
             plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
         ax.set_xlabel(r"PC$_1$")
         ax.set_ylabel(r"PC$_2$")

From 99699e9e020959f18d17a0b250049da52e25aa42 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:45:34 +0100
Subject: [PATCH 08/15] [vardtc missing data] can handle non broadcastable
 selections

---
 GPy/inference/latent_function_inference/var_dtc.py | 6 +++++-
 GPy/util/subarray_and_sorting.py                   | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index b5e6787b..a9a137dc 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -283,7 +283,11 @@ class VarDTCMissingData(LatentFunctionInference):
             else: beta = beta_all
 
             VVT_factor = (beta*y)
-            VVT_factor_all[v, ind].flat = VVT_factor.flat
+            try:
+                VVT_factor_all[v, ind].flat = VVT_factor.flat
+            except ValueError:
+                mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
+                VVT_factor_all.flat[mult] = VVT_factor
             output_dim = y.shape[1]
 
             psi0 = psi0_all[v]
diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py
index ab7b7672..f1707394 100644
--- a/GPy/util/subarray_and_sorting.py
+++ b/GPy/util/subarray_and_sorting.py
@@ -4,7 +4,7 @@
 .. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>
 
 '''
-__updated__ = '2013-12-02'
+__updated__ = '2014-05-20'
 
 import numpy as np
 

From 533e5fb744fdbbc52f00f2768a485fc5e8b85ba8 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:46:29 +0100
Subject: [PATCH 09/15] [datasets] delete packed data in hapmap dataset

---
 GPy/util/datasets.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 4d89ece2..d140fe3a 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'):
         import bz2
     except ImportError as i:
         raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
-    if not data_available(data_set):
-        download_data(data_set)
+
     dirpath = os.path.join(data_path,'hapmap3')
     hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
+    unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
+    unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))
+
+    if not unpacked_files_exist and not data_available(data_set):
+        download_data(data_set)
+
     preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
                                ['.snps.pickle',
                                 '.info.pickle',
                                 '.nan.pickle']]
+
     if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
         if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
                             "of memory and can take a (very) long time, continue? [Y/n]"):
@@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'):
                                                                perc="="*int(20.*progress/100.))
             stdout.write(status); stdout.flush()
             return status
-        unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
-        if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
+        if not unpacked_files_exist:
             status=write_status('unpacking...', 0, '')
             curr = 0
             for newfilepath in unpacked_files:
@@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'):
                             status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
                 curr += 12
                 status=write_status('unpacking...', curr, status)
+                os.remove(filepath)
         status=write_status('reading .ped...', 25, status)
         # Preprocess data:    
         snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
@@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'):
 def singlecell(data_set='singlecell'):
     if not data_available(data_set):
         download_data(data_set)
-    
+
     from pandas import read_csv
     dirpath = os.path.join(data_path, data_set)
     filename = os.path.join(dirpath, 'singlecell.csv')

From 76718a8fcc14dd435c8f8d2a9b153fb55fccc5fd Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:47:28 +0100
Subject: [PATCH 10/15] [stationary] input_sensitiviy is now 1/(l**2)

---
 GPy/kern/_src/stationary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index f561baa4..ef8900f9 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -180,7 +180,7 @@ class Stationary(Kern):
         return np.zeros(X.shape)
 
     def input_sensitivity(self):
-        return np.ones(self.input_dim)/self.lengthscale
+        return np.ones(self.input_dim)/self.lengthscale**2
 
 class Exponential(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):

From 1ebcee967a8df5073d7e868a96de073aac465fe1 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:47:57 +0100
Subject: [PATCH 11/15] [psi-stats] add kernel was missing a psi zero call

---
 GPy/kern/_src/add.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 1c237a6c..12f5d444 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -134,7 +134,7 @@ class Add(CombinationKernel):
                     eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                 else:
                     eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
-            target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
+            target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
         return target
 
     def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):

From 995de897b3472fb6682a419f42bca377f88191f2 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:48:53 +0100
Subject: [PATCH 12/15] [core updates] first try to switch updates off and on.
 Use m.updates = False to switch updates off, and vice-versa

---
 GPy/core/parameterization/parameter_core.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index f0fbf4ea..9329651d 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
 import numpy as np
 import re
 
-__updated__ = '2014-05-15'
+__updated__ = '2014-05-20'
 
 class HierarchyError(Exception):
     """
@@ -50,11 +50,24 @@ class Observable(object):
     self as only argument to all its observers.
     """
     _updated = True
+    _updates = True
     def __init__(self, *args, **kwargs):
         super(Observable, self).__init__()
         from lists_and_dicts import ObserverList
         self.observers = ObserverList()
 
+    @property
+    def updates(self):
+        self._updates = self._highest_parent_._updates
+        return self._updates
+
+    @updates.setter
+    def updates(self, ups):
+        assert isinstance(ups, bool), "updates are either on (True) or off (False)"
+        self._highest_parent_._updates = ups
+        if ups:
+            self._trigger_params_changed()
+
     def add_observer(self, observer, callble, priority=0):
         """
         Add an observer `observer` with the callback `callble` 
@@ -91,6 +104,8 @@ class Observable(object):
         :param min_priority: only notify observers with priority > min_priority
                              if min_priority is None, notify all observers in order
         """
+        if not self.updates:
+            return
         if which is None:
             which = self
         if min_priority is None:

From ef256223d142b7752bad6a9e9dd4d52fbe3be1f3 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 14:49:20 +0100
Subject: [PATCH 13/15] [mrd] more control for init, some missing data
 adjustements, init greatly improved

---
 GPy/models/mrd.py | 66 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index c8067c01..0e6547bb 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -9,18 +9,25 @@ from ..core import Model
 from ..kern import Kern
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 from ..core.parameterization import Param, Parameterized
+from ..core.parameterization.observable_array import ObsAr
 from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
+from ..inference.latent_function_inference import InferenceMethodList
 from ..likelihoods import Gaussian
 from ..util.initialization import initialize_latent
 from ..core.sparse_gp import SparseGP, GP
-from ..inference.latent_function_inference import InferenceMethodList
 
 class MRD(SparseGP):
     """
+    !WARNING: This is bleeding edge code and still in development. 
+    Functionality may change fundamentally during development!
+
     Apply MRD to all given datasets Y in Ylist.
 
     Y_i in [n x p_i]
 
+    If Ylist is a dictionary, the keys of the dictionary are the names, and the
+    values are the different datasets to compare. 
+
     The samples n in the datasets need
     to match up, whereas the dimensionality p_d can differ.
 
@@ -57,16 +64,46 @@ class MRD(SparseGP):
         self.input_dim = input_dim
         self.num_inducing = num_inducing
 
-        self.Ylist = Ylist
+        if isinstance(Ylist, dict):
+            Ynames, Ylist = zip(*Ylist.items())
+
+        self.Ylist = [ObsAr(Y) for Y in Ylist]
+
+        if Ynames is None:
+            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
+        self.names = Ynames
+        assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
+
+        if inference_method is None:
+            self.inference_method= InferenceMethodList()
+            warned = False
+            for y in Ylist:
+                inan = np.isnan(y)
+                if np.any(inan):
+                    if not warned:
+                        print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X"
+                        warned = True
+                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
+                else:
+                    self.inference_method.append(VarDTC(limit=1))
+        else:
+            if not isinstance(inference_method, InferenceMethodList):
+                inference_method = InferenceMethodList(inference_method)
+            self.inference_method = inference_method
+
+
         self._in_init_ = True
-        X, fracs = self._init_X(initx, Ylist)
+        if X is None:
+            X, fracs = self._init_X(initx, Ylist)
+        else:
+            fracs = [X.var(0)]*len(Ylist)
         self.Z = Param('inducing inputs', self._init_Z(initz, X))
         self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
 
         # sort out the kernels
         if kernel is None:
             from ..kern import RBF
-            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
+            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
         elif isinstance(kernel, Kern):
             self.kernels = []
             for i in range(len(Ylist)):
@@ -87,25 +124,8 @@ class MRD(SparseGP):
             self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
         else: self.likelihoods = likelihoods
 
-        if inference_method is None:
-            self.inference_method= InferenceMethodList()
-            for y in Ylist:
-                inan = np.isnan(y)
-                if np.any(inan):
-                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
-                else:
-                    self.inference_method.append(VarDTC(limit=1))
-        else:
-            if not isinstance(inference_method, InferenceMethodList):
-                inference_method = InferenceMethodList(inference_method)
-            self.inference_method = inference_method
-
         self.add_parameters(self.X, self.Z)
 
-        if Ynames is None:
-            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
-        self.names = Ynames
-
         self.bgplvms = []
         self.num_data = Ylist[0].shape[0]
 
@@ -173,7 +193,7 @@ class MRD(SparseGP):
             Ylist = self.Ylist
         if init in "PCA_concat":
             X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
-            fracs = [fracs]*self.input_dim
+            fracs = [fracs]*len(Ylist)
         elif init in "PCA_single":
             X = np.zeros((Ylist[0].shape[0], self.input_dim))
             fracs = []
@@ -184,7 +204,7 @@ class MRD(SparseGP):
         else: # init == 'random':
             X = np.random.randn(Ylist[0].shape[0], self.input_dim)
             fracs = X.var(0)
-            fracs = [fracs]*self.input_dim
+            fracs = [fracs]*len(Ylist)
         X -= X.mean()
         X /= X.std()
         return X, fracs

From c507cfe4ab85fed86d102f16cbe483f4e19dc88f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 16:06:56 +0100
Subject: [PATCH 14/15] [index operations] added lookup for properties for a
 given index as dict <properties, subindex> for given index

---
 GPy/core/parameterization/index_operations.py | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 1f3ac934..7fc76d07 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -7,6 +7,20 @@ import numpy
 from numpy.lib.function_base import vectorize
 from lists_and_dicts import IntArrayDict
 
+def extract_properties_to_index(index, props):
+    prop_index = dict()
+    for i, cl in enumerate(props):
+        for c in cl:
+            ind = prop_index.get(c, list())
+            ind.append(index[i])
+            prop_index[c] = ind
+
+    for c, i in prop_index.items():
+        prop_index[c] = numpy.array(i, dtype=int)
+
+    return prop_index
+
+
 class ParameterIndexOperations(object):
     '''
     Index operations for storing param index _properties
@@ -66,8 +80,34 @@ class ParameterIndexOperations(object):
         return self._properties.values()
 
     def properties_for(self, index):
+        """
+        Returns a list of properties, such that each entry in the list corresponds
+        to the element of the index given.
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_for([2,3,5])
+        [['one'], ['one', 'two'], ['two']]
+        """
         return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
 
+    def properties_to_index_dict(self, index):
+        """
+        Return a dictionary, containing properties as keys and indices as index
+        Thus, the indices for each constraint, which is contained will be collected as
+        one dictionary
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_to_index_dict([2,3,5])
+        {'one':[2,3], 'two':[3,5]}
+        """
+        props = self.properties_for(index)
+        prop_index = extract_properties_to_index(index, props)
+        return prop_index
+
     def add(self, prop, indices):
         self._properties[prop] = combine_indices(self._properties[prop], indices)
 
@@ -174,8 +214,32 @@ class ParameterIndexOperationsView(object):
 
 
     def properties_for(self, index):
+        """
+        Returns a list of properties, such that each entry in the list corresponds
+        to the element of the index given.
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_for([2,3,5])
+        [['one'], ['one', 'two'], ['two']]
+        """
         return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
 
+    def properties_to_index_dict(self, index):
+        """
+        Return a dictionary, containing properties as keys and indices as index
+        Thus, the indices for each constraint, which is contained will be collected as
+        one dictionary
+
+        Example:
+        let properties: 'one':[1,2,3,4], 'two':[3,5,6]
+
+        >>> properties_to_index_dict([2,3,5])
+        {'one':[2,3], 'two':[3,5]}
+        """
+        return extract_properties_to_index(index, self.properties_for(index))
+
 
     def add(self, prop, indices):
         self._param_index_ops.add(prop, indices+self._offset)

From b520eb212c2a1171b8bea319dc32b91b8d07c070 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 20 May 2014 16:39:52 +0100
Subject: [PATCH 15/15] [fixing] fixing now saves the old constraint

---
 GPy/core/parameterization/parameter_core.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 9329651d..9cedc46d 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -324,6 +324,7 @@ class Indexable(Nameable, Observable):
         self._default_constraint_ = default_constraint
         from index_operations import ParameterIndexOperations
         self.constraints = ParameterIndexOperations()
+        self._old_constraints = ParameterIndexOperations()
         self.priors = ParameterIndexOperations()
         if self._default_constraint_ is not None:
             self.constrain(self._default_constraint_)
@@ -386,8 +387,10 @@ class Indexable(Nameable, Observable):
         """
         if value is not None:
             self[:] = value
-        reconstrained = self.unconstrain()
-        index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
+
+        index = self._raveled_index()
+        # reconstrained = self.unconstrain()
+        index = self._add_to_index_operations(self.constraints, index, __fixed__, warning)
         self._highest_parent_._set_fixed(self, index)
         self.notify_observers(self, None if trigger_parent else -np.inf)
         return index