From 6b3888f1631bd94f7cd7b8010946d42a960cef76 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 11:16:34 +0000
Subject: [PATCH 01/22] [VarDTC] reverted SparseGP to previous state, updated
 BGPLVM accordingly

---
 GPy/core/sparse_gp.py                  | 282 +++-----------------
 GPy/core/sparse_gp_mpi.py              |   6 +-
 GPy/models/bayesian_gplvm_minibatch.py | 267 +++++++++++++++++++
 GPy/models/sparse_gp_minibatch.py      | 347 +++++++++++++++++++++++++
 GPy/testing/model_tests.py             |   6 +-
 5 files changed, 655 insertions(+), 253 deletions(-)
 create mode 100644 GPy/models/bayesian_gplvm_minibatch.py
 create mode 100644 GPy/models/sparse_gp_minibatch.py

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 7f88503a..73c80c76 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -40,8 +40,7 @@ class SparseGP(GP):
     """
 
     def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
-                 name='sparse gp', Y_metadata=None, normalizer=False,
-                 missing_data=False, stochastic=False, batchsize=1):
+                 name='sparse gp', Y_metadata=None, normalizer=False):
         #pick a sensible inference method
         if inference_method is None:
             if isinstance(likelihood, likelihoods.Gaussian):
@@ -55,260 +54,51 @@ class SparseGP(GP):
         self.num_inducing = Z.shape[0]
 
         GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
-        self.missing_data = missing_data
-
-        if stochastic and missing_data:
-            self.missing_data = True
-            self.ninan = ~np.isnan(Y)
-            self.stochastics = SparseGPStochastics(self, batchsize)
-        elif stochastic and not missing_data:
-            self.missing_data = False
-            self.stochastics = SparseGPStochastics(self, batchsize)
-        elif missing_data:
-            self.missing_data = True
-            self.ninan = ~np.isnan(Y)
-            self.stochastics = SparseGPMissing(self)
-        else:
-            self.stochastics = False
 
         logger.info("Adding Z as parameter")
         self.link_parameter(self.Z, index=0)
-        if self.missing_data:
-            self.Ylist = []
-            overall = self.Y_normalized.shape[1]
-            m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
-            message = m_f(-1)
-            print message,
-            for d in xrange(overall):
-                self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
-                print ' '*(len(message)+1) + '\r',
-                message = m_f(d)
-                print message,
-            print ''
-
         self.posterior = None
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
 
-    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
-        """
-        This is the standard part, which usually belongs in parameters_changed.
-
-        For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
-        loop, in order to ensure a different handling of gradients etc of different
-        subsets of data.
-
-        The dict in current_values will be passed aroung as current_values for
-        the rest of the algorithm, so this is the place to store current values,
-        such as subsets etc, if necessary.
-
-        If Lm and dL_dKmm can be precomputed (or only need to be computed once)
-        pass them in here, so they will be passed to the inference_method.
-
-        subset_indices is a dictionary of indices. you can put the indices however you
-        like them into this dictionary for inner use of the indices inside the
-        algorithm.
-        """
-        try:
-            posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
-        except:
-            posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
-        current_values = {}
-        likelihood.update_gradients(grad_dict['dL_dthetaL'])
-        current_values['likgrad'] = likelihood.gradient.copy()
-        if subset_indices is None:
-            subset_indices = {}
-        if isinstance(X, VariationalPosterior):
-            #gradients wrt kernel
-            dL_dKmm = grad_dict['dL_dKmm']
-            kern.update_gradients_full(dL_dKmm, Z, None)
-            current_values['kerngrad'] = kern.gradient.copy()
-            kern.update_gradients_expectations(variational_posterior=X,
-                                                    Z=Z,
-                                                    dL_dpsi0=grad_dict['dL_dpsi0'],
-                                                    dL_dpsi1=grad_dict['dL_dpsi1'],
-                                                    dL_dpsi2=grad_dict['dL_dpsi2'])
-            current_values['kerngrad'] += kern.gradient
-
-            #gradients wrt Z
-            current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
-            current_values['Zgrad'] += kern.gradients_Z_expectations(
-                               grad_dict['dL_dpsi0'],
-                               grad_dict['dL_dpsi1'],
-                               grad_dict['dL_dpsi2'],
-                               Z=Z,
-                               variational_posterior=X)
-        else:
-            #gradients wrt kernel
-            kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
-            current_values['kerngrad'] = kern.gradient.copy()
-            kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
-            current_values['kerngrad'] += kern.gradient
-            kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
-            current_values['kerngrad'] += kern.gradient
-            #gradients wrt Z
-            current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
-            current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
-        return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
-
-    def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
-        """
-        This is for automatic updates of values in the inner loop of missing
-        data handling. Both arguments are dictionaries and the values in
-        full_values will be updated by the current_gradients.
-
-        If a key from current_values does not exist in full_values, it will be
-        initialized to the value in current_values.
-
-        If there is indices needed for the update, value_indices can be used for
-        that. If value_indices has the same key, as current_values, the update
-        in full_values will be indexed by the indices in value_indices.
-
-        grads:
-            dictionary of standing gradients (you will have to carefully make sure, that
-            the ordering is right!). The values in here will be updated such that
-            full_values[key] += current_values[key]  forall key in full_gradients.keys()
-
-        gradients:
-            dictionary of gradients in the current set of parameters.
-
-        value_indices:
-            dictionary holding indices for the update in full_values.
-            if the key exists the update rule is:def df(x):
-            full_values[key][value_indices[key]] += current_values[key]
-        """
-        for key in current_values.keys():
-            if value_indices is not None and value_indices.has_key(key):
-                index = value_indices[key]
-            else:
-                index = slice(None)
-            if full_values.has_key(key):
-                full_values[key][index] += current_values[key]
-            else:
-                full_values[key] = current_values[key]
-
-    def _inner_values_update(self, current_values):
-        """
-        This exists if there is more to do with the current values.
-        It will be called allways in the inner loop, so that
-        you can do additional inner updates for the inside of the missing data
-        loop etc. This can also be used for stochastic updates, when only working on
-        one dimension of the output.
-        """
-        pass
-
-    def _outer_values_update(self, full_values):
-        """
-        Here you put the values, which were collected before in the right places.
-        E.g. set the gradients of parameters, etc.
-        """
-        self.likelihood.gradient = full_values['likgrad']
-        self.kern.gradient = full_values['kerngrad']
-        self.Z.gradient = full_values['Zgrad']
-
-    def _outer_init_full_values(self):
-        """
-        If full_values has indices in values_indices, we might want to initialize
-        the full_values differently, so that subsetting is possible.
-
-        Here you can initialize the full_values for the values needed.
-
-        Keep in mind, that if a key does not exist in full_values when updating
-        values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
-        as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
-        to initialize the gradients for the mean and the variance in order to
-        have the full gradient for indexing)
-        """
-        return {}
-
-    def _outer_loop_for_missing_data(self):
-        Lm = None
-        dL_dKmm = None
-
-        self._log_marginal_likelihood = 0
-        self.full_values = self._outer_init_full_values()
-
-        if self.posterior is None:
-            woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
-            woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
-        else:
-            woodbury_inv = self.posterior._woodbury_inv
-            woodbury_vector = self.posterior._woodbury_vector
-
-        if not self.stochastics:
-            m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
-            message = m_f(-1)
-            print message,
-
-        for d in self.stochastics.d:
-            ninan = self.ninan[:, d]
-
-            if not self.stochastics:
-                print ' '*(len(message)) + '\r',
-                message = m_f(d)
-                print message,
-
-            posterior, log_marginal_likelihood, \
-                grad_dict, current_values, value_indices = self._inner_parameters_changed(
-                                self.kern, self.X[ninan],
-                                self.Z, self.likelihood,
-                                self.Ylist[d], self.Y_metadata,
-                                Lm, dL_dKmm,
-                                subset_indices=dict(outputs=d, samples=ninan))
-
-            self._inner_take_over_or_update(self.full_values, current_values, value_indices)
-            self._inner_values_update(current_values)
-
-            Lm = posterior.K_chol
-            dL_dKmm = grad_dict['dL_dKmm']
-            woodbury_inv[:, :, d] = posterior.woodbury_inv
-            woodbury_vector[:, d:d+1] = posterior.woodbury_vector
-            self._log_marginal_likelihood += log_marginal_likelihood
-        if not self.stochastics:
-            print ''
-
-        if self.posterior is None:
-            self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
-                                   K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
-        self._outer_values_update(self.full_values)
-
-    def _outer_loop_without_missing_data(self):
-        self._log_marginal_likelihood = 0
-
-        if self.posterior is None:
-            woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
-            woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
-        else:
-            woodbury_inv = self.posterior._woodbury_inv
-            woodbury_vector = self.posterior._woodbury_vector
-
-        d = self.stochastics.d
-        posterior, log_marginal_likelihood, \
-            grad_dict, self.full_values, _ = self._inner_parameters_changed(
-                            self.kern, self.X,
-                            self.Z, self.likelihood,
-                            self.Y_normalized[:, d], self.Y_metadata)
-        self.grad_dict = grad_dict
-
-        self._log_marginal_likelihood += log_marginal_likelihood
-
-        self._outer_values_update(self.full_values)
-
-        woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
-        woodbury_vector[:, d] = posterior.woodbury_vector
-        if self.posterior is None:
-            self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
-                                   K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
-
     def parameters_changed(self):
-        if self.missing_data:
-            self._outer_loop_for_missing_data()
-        elif self.stochastics:
-            self._outer_loop_without_missing_data()
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
+
+        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
+
+        if isinstance(self.X, VariationalPosterior):
+            #gradients wrt kernel
+            dL_dKmm = self.grad_dict['dL_dKmm']
+            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
+            kerngrad = self.kern.gradient.copy()
+            self.kern.update_gradients_expectations(variational_posterior=self.X,
+                                                    Z=self.Z,
+                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
+            self.kern.gradient += kerngrad
+
+            #gradients wrt Z
+            self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
+            self.Z.gradient += self.kern.gradients_Z_expectations(
+                               self.grad_dict['dL_dpsi0'],
+                               self.grad_dict['dL_dpsi1'],
+                               self.grad_dict['dL_dpsi2'],
+                               Z=self.Z,
+                               variational_posterior=self.X)
         else:
-            self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
-            self._outer_values_update(self.full_values)
+            #gradients wrt kernel
+            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
+            kerngrad = self.kern.gradient.copy()
+            self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
+            kerngrad += self.kern.gradient
+            self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
+            self.kern.gradient += kerngrad
+            #gradients wrt Z
+            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
+            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
+
 
     def _raw_predict(self, Xnew, full_cov=False, kern=None):
         """
diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index 52f75d45..733d0e1c 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -34,8 +34,7 @@ class SparseGP_MPI(SparseGP):
 
     """
 
-    def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False,
-                 missing_data=False, stochastic=False, batchsize=1):
+    def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
         self._IN_OPTIMIZATION_ = False
         if mpi_comm != None:
             if inference_method is None:
@@ -43,8 +42,7 @@ class SparseGP_MPI(SparseGP):
             else:
                 assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
 
-        super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer,
-                                           missing_data=missing_data, stochastic=stochastic, batchsize=batchsize)
+        super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
         self.update_model(False)
         self.link_parameter(self.X, index=0)
         if variational_prior is not None:
diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py
new file mode 100644
index 00000000..56b46244
--- /dev/null
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@@ -0,0 +1,267 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from .. import kern
+from ..likelihoods import Gaussian
+from ..core.parameterization.variational import NormalPosterior, NormalPrior
+from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
+from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
+import logging
+from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
+
+class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
+    """
+    Bayesian Gaussian Process Latent Variable Model
+
+    :param Y: observed data (np.ndarray) or GPy.likelihood
+    :type Y: np.ndarray| GPy.likelihood instance
+    :param input_dim: latent dimensionality
+    :type input_dim: int
+    :param init: initialisation method for the latent space
+    :type init: 'PCA'|'random'
+
+    """
+    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
+                 Z=None, kernel=None, inference_method=None, likelihood=None,
+                 name='bayesian gplvm', normalizer=None,
+                 missing_data=False, stochastic=False, batchsize=1):
+        self.__IN_OPTIMIZATION__ = False
+
+        self.logger = logging.getLogger(self.__class__.__name__)
+        if X is None:
+            from ..util.initialization import initialize_latent
+            self.logger.info("initializing latent space X with method {}".format(init))
+            X, fracs = initialize_latent(init, input_dim, Y)
+        else:
+            fracs = np.ones(input_dim)
+
+        self.init = init
+
+        if X_variance is None:
+            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
+            X_variance = np.random.uniform(0,.1,X.shape)
+
+        if Z is None:
+            self.logger.info("initializing inducing inputs")
+            Z = np.random.permutation(X.copy())[:num_inducing]
+        assert Z.shape[1] == X.shape[1]
+
+        if kernel is None:
+            self.logger.info("initializing kernel RBF")
+            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)
+
+        if likelihood is None:
+            likelihood = Gaussian()
+
+        self.variational_prior = NormalPrior()
+        X = NormalPosterior(X, X_variance)
+
+        if inference_method is None:
+            from ..inference.latent_function_inference.var_dtc import VarDTC
+            self.logger.debug("creating inference_method var_dtc")
+            inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
+
+        if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
+            kernel.psicomp.GPU_direct = True
+
+        super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
+                                           name=name, inference_method=inference_method,
+                                           normalizer=normalizer,
+                                           missing_data=missing_data, stochastic=stochastic,
+                                           batchsize=batchsize)
+
+    def set_X_gradients(self, X, X_grad):
+        """Set the gradients of the posterior distribution of X in its specific form."""
+        X.mean.gradient, X.variance.gradient = X_grad
+
+    def get_X_gradients(self, X):
+        """Get the gradients of the posterior distribution of X in its specific form."""
+        return X.mean.gradient, X.variance.gradient
+
+    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
+        posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
+
+        kl_fctr = 1.
+        if self.missing_data:
+            d = self.output_dim
+            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
+        else:
+            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
+
+        current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
+                                            variational_posterior=X,
+                                            Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
+                                            dL_dpsi1=grad_dict['dL_dpsi1'],
+                                            dL_dpsi2=grad_dict['dL_dpsi2'])
+
+        # Subsetting Variational Posterior objects, makes the gradients
+        # empty. We need them to be 0 though:
+        X.mean.gradient[:] = 0
+        X.variance.gradient[:] = 0
+
+        self.variational_prior.update_gradients_KL(X)
+        if self.missing_data:
+            current_values['meangrad'] += kl_fctr*X.mean.gradient/d
+            current_values['vargrad'] += kl_fctr*X.variance.gradient/d
+        else:
+            current_values['meangrad'] += kl_fctr*X.mean.gradient
+            current_values['vargrad'] += kl_fctr*X.variance.gradient
+
+        if subset_indices is not None:
+            value_indices['meangrad'] = subset_indices['samples']
+            value_indices['vargrad'] = subset_indices['samples']
+        return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
+
+    def _outer_values_update(self, full_values):
+        """
+        Here you put the values, which were collected before in the right places.
+        E.g. set the gradients of parameters, etc.
+        """
+        super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
+        self.X.mean.gradient = full_values['meangrad']
+        self.X.variance.gradient = full_values['vargrad']
+
+    def _outer_init_full_values(self):
+        return dict(meangrad=np.zeros(self.X.mean.shape),
+                    vargrad=np.zeros(self.X.variance.shape))
+
+    def parameters_changed(self):
+        super(BayesianGPLVMMiniBatch,self).parameters_changed()
+        if isinstance(self.inference_method, VarDTC_minibatch):
+            return
+
+        #super(BayesianGPLVM, self).parameters_changed()
+        #self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
+
+        #self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
+
+        # This is testing code -------------------------
+#         i = np.random.randint(self.X.shape[0])
+#         X_ = self.X.mean
+#         which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
+#         _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
+#         grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
+#
+#         self.X.mean.gradient[:] = 0
+#         self.X.variance.gradient[:] = 0
+#         self.X.mean.gradient[which] = grad[0]
+#         self.X.variance.gradient[which] = grad[1]
+
+        # update for the KL divergence
+#         self.variational_prior.update_gradients_KL(self.X, which)
+        # -----------------------------------------------
+
+        # update for the KL divergence
+        #self.variational_prior.update_gradients_KL(self.X)
+
+    def plot_latent(self, labels=None, which_indices=None,
+                resolution=50, ax=None, marker='o', s=40,
+                fignum=None, plot_inducing=True, legend=True,
+                plot_limits=None,
+                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
+        import sys
+        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from ..plotting.matplot_dep import dim_reduction_plots
+
+        return dim_reduction_plots.plot_latent(self, labels, which_indices,
+                resolution, ax, marker, s,
+                fignum, plot_inducing, legend,
+                plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
+
+    def do_test_latents(self, Y):
+        """
+        Compute the latent representation for a set of new points Y
+
+        Notes:
+        This will only work with a univariate Gaussian likelihood (for now)
+        """
+        N_test = Y.shape[0]
+        input_dim = self.Z.shape[1]
+
+        means = np.zeros((N_test, input_dim))
+        covars = np.zeros((N_test, input_dim))
+
+        dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
+        dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
+        V = Y/self.likelihood.variance
+
+        #compute CPsi1V
+        #if self.Cpsi1V is None:
+        #    psi1V = np.dot(self.psi1.T, self.likelihood.V)
+        #    tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+        #    tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
+        #    self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
+
+        dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)
+
+        #start = np.zeros(self.input_dim * 2)
+
+
+        from scipy.optimize import minimize
+
+        for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
+            args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2)
+            res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS')
+            xopt = res.x
+            mu, log_S = xopt.reshape(2, 1, -1)
+            means[n] = mu[0].copy()
+            covars[n] = np.exp(log_S[0]).copy()
+
+        X = NormalPosterior(means, covars)
+
+        return X
+
+    def dmu_dX(self, Xnew):
+        """
+        Calculate the gradient of the prediction at Xnew w.r.t Xnew.
+        """
+        dmu_dX = np.zeros_like(Xnew)
+        for i in range(self.Z.shape[0]):
+            dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
+        return dmu_dX
+
+    def dmu_dXnew(self, Xnew):
+        """
+        Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
+        """
+        gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
+        ones = np.ones((1, 1))
+        for i in range(self.Z.shape[0]):
+            gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
+        return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
+
+    def plot_steepest_gradient_map(self, *args, ** kwargs):
+        """
+        See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
+        """
+        import sys
+        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from ..plotting.matplot_dep import dim_reduction_plots
+
+        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
+
+
+def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+    """
+    objective function for fitting the latent variables for test points
+    (negative log-likelihood: should be minimised!)
+    """
+    mu = mu_S[:input_dim][None]
+    log_S = mu_S[input_dim:][None]
+    S = np.exp(log_S)
+
+    X = NormalPosterior(mu, S)
+
+    psi0 = kern.psi0(Z, X)
+    psi1 = kern.psi1(Z, X)
+    psi2 = kern.psi2(Z, X)
+
+    lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
+
+    dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
+    dmu = dLdmu - mu
+    # dS = S0 + S1 + S2 -0.5 + .5/S
+    dlnS = S * (dLdS - 0.5) + .5
+
+    return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
new file mode 100644
index 00000000..00929462
--- /dev/null
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -0,0 +1,347 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from ..core.parameterization.param import Param
+from ..core.gp import GP
+from ..inference.latent_function_inference import var_dtc
+from .. import likelihoods
+from ..core.parameterization.variational import VariationalPosterior
+
+import logging
+from GPy.inference.latent_function_inference.posterior import Posterior
+from GPy.inference.optimization.stochastics import SparseGPStochastics,\
+    SparseGPMissing
+#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
+    #SparseGPMissing
+logger = logging.getLogger("sparse gp")
+
+class SparseGPMiniBatch(GP):
+    """
+    A general purpose Sparse GP model
+'''
+Created on 3 Nov 2014
+
+@author: maxz
+'''
+
+    This model allows (approximate) inference using variational DTC or FITC
+    (Gaussian likelihoods) as well as non-conjugate sparse methods based on
+    these.
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+    :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
+    :type X_variance: np.ndarray (num_data x input_dim) | None
+    :param Z: inducing inputs
+    :type Z: np.ndarray (num_inducing x input_dim)
+    :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :type num_inducing: int
+
+    """
+
+    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
+                 name='sparse gp', Y_metadata=None, normalizer=False,
+                 missing_data=False, stochastic=False, batchsize=1):
+        #pick a sensible inference method
+        if inference_method is None:
+            if isinstance(likelihood, likelihoods.Gaussian):
+                inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
+            else:
+                #inference_method = ??
+                raise NotImplementedError, "what to do what to do?"
+            print "defaulting to ", inference_method, "for latent function inference"
+
+        self.Z = Param('inducing inputs', Z)
+        self.num_inducing = Z.shape[0]
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        self.missing_data = missing_data
+
+        if stochastic and missing_data:
+            self.missing_data = True
+            self.ninan = ~np.isnan(Y)
+            self.stochastics = SparseGPStochastics(self, batchsize)
+        elif stochastic and not missing_data:
+            self.missing_data = False
+            self.stochastics = SparseGPStochastics(self, batchsize)
+        elif missing_data:
+            self.missing_data = True
+            self.ninan = ~np.isnan(Y)
+            self.stochastics = SparseGPMissing(self)
+        else:
+            self.stochastics = False
+
+        logger.info("Adding Z as parameter")
+        self.link_parameter(self.Z, index=0)
+        if self.missing_data:
+            self.Ylist = []
+            overall = self.Y_normalized.shape[1]
+            m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
+            message = m_f(-1)
+            print message,
+            for d in xrange(overall):
+                self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
+                print ' '*(len(message)+1) + '\r',
+                message = m_f(d)
+                print message,
+            print ''
+
+        self.posterior = None
+
+    def has_uncertain_inputs(self):
+        return isinstance(self.X, VariationalPosterior)
+
+    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
+        """
+        This is the standard part, which usually belongs in parameters_changed.
+
+        For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
+        loop, in order to ensure a different handling of gradients etc of different
+        subsets of data.
+
+        The dict in current_values will be passed aroung as current_values for
+        the rest of the algorithm, so this is the place to store current values,
+        such as subsets etc, if necessary.
+
+        If Lm and dL_dKmm can be precomputed (or only need to be computed once)
+        pass them in here, so they will be passed to the inference_method.
+
+        subset_indices is a dictionary of indices. you can put the indices however you
+        like them into this dictionary for inner use of the indices inside the
+        algorithm.
+        """
+        try:
+            posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
+        except:
+            posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
+        current_values = {}
+        likelihood.update_gradients(grad_dict['dL_dthetaL'])
+        current_values['likgrad'] = likelihood.gradient.copy()
+        if subset_indices is None:
+            subset_indices = {}
+        if isinstance(X, VariationalPosterior):
+            #gradients wrt kernel
+            dL_dKmm = grad_dict['dL_dKmm']
+            kern.update_gradients_full(dL_dKmm, Z, None)
+            current_values['kerngrad'] = kern.gradient.copy()
+            kern.update_gradients_expectations(variational_posterior=X,
+                                                    Z=Z,
+                                                    dL_dpsi0=grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=grad_dict['dL_dpsi2'])
+            current_values['kerngrad'] += kern.gradient
+
+            #gradients wrt Z
+            current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
+            current_values['Zgrad'] += kern.gradients_Z_expectations(
+                               grad_dict['dL_dpsi0'],
+                               grad_dict['dL_dpsi1'],
+                               grad_dict['dL_dpsi2'],
+                               Z=Z,
+                               variational_posterior=X)
+        else:
+            #gradients wrt kernel
+            kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
+            current_values['kerngrad'] = kern.gradient.copy()
+            kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
+            current_values['kerngrad'] += kern.gradient
+            kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
+            current_values['kerngrad'] += kern.gradient
+            #gradients wrt Z
+            current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
+            current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
+        return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
+
+    def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
+        """
+        This is for automatic updates of values in the inner loop of missing
+        data handling. Both arguments are dictionaries and the values in
+        full_values will be updated by the current_gradients.
+
+        If a key from current_values does not exist in full_values, it will be
+        initialized to the value in current_values.
+
+        If there is indices needed for the update, value_indices can be used for
+        that. If value_indices has the same key, as current_values, the update
+        in full_values will be indexed by the indices in value_indices.
+
+        grads:
+            dictionary of standing gradients (you will have to carefully make sure, that
+            the ordering is right!). The values in here will be updated such that
+            full_values[key] += current_values[key]  forall key in full_gradients.keys()
+
+        gradients:
+            dictionary of gradients in the current set of parameters.
+
+        value_indices:
+            dictionary holding indices for the update in full_values.
+            if the key exists the update rule is:def df(x):
+            full_values[key][value_indices[key]] += current_values[key]
+        """
+        for key in current_values.keys():
+            if value_indices is not None and value_indices.has_key(key):
+                index = value_indices[key]
+            else:
+                index = slice(None)
+            if full_values.has_key(key):
+                full_values[key][index] += current_values[key]
+            else:
+                full_values[key] = current_values[key]
+
+    def _inner_values_update(self, current_values):
+        """
+        This exists if there is more to do with the current values.
+        It will be called allways in the inner loop, so that
+        you can do additional inner updates for the inside of the missing data
+        loop etc. This can also be used for stochastic updates, when only working on
+        one dimension of the output.
+        """
+        pass
+
+    def _outer_values_update(self, full_values):
+        """
+        Here you put the values, which were collected before in the right places.
+        E.g. set the gradients of parameters, etc.
+        """
+        self.likelihood.gradient = full_values['likgrad']
+        self.kern.gradient = full_values['kerngrad']
+        self.Z.gradient = full_values['Zgrad']
+
+    def _outer_init_full_values(self):
+        """
+        If full_values has indices in values_indices, we might want to initialize
+        the full_values differently, so that subsetting is possible.
+
+        Here you can initialize the full_values for the values needed.
+
+        Keep in mind, that if a key does not exist in full_values when updating
+        values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
+        as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
+        to initialize the gradients for the mean and the variance in order to
+        have the full gradient for indexing)
+        """
+        return {}
+
+    def _outer_loop_for_missing_data(self):
+        Lm = None
+        dL_dKmm = None
+
+        self._log_marginal_likelihood = 0
+        self.full_values = self._outer_init_full_values()
+
+        if self.posterior is None:
+            woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
+            woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
+        else:
+            woodbury_inv = self.posterior._woodbury_inv
+            woodbury_vector = self.posterior._woodbury_vector
+
+        if not self.stochastics:
+            m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
+            message = m_f(-1)
+            print message,
+
+        for d in self.stochastics.d:
+            ninan = self.ninan[:, d]
+
+            if not self.stochastics:
+                print ' '*(len(message)) + '\r',
+                message = m_f(d)
+                print message,
+
+            posterior, log_marginal_likelihood, \
+                grad_dict, current_values, value_indices = self._inner_parameters_changed(
+                                self.kern, self.X[ninan],
+                                self.Z, self.likelihood,
+                                self.Ylist[d], self.Y_metadata,
+                                Lm, dL_dKmm,
+                                subset_indices=dict(outputs=d, samples=ninan))
+
+            self._inner_take_over_or_update(self.full_values, current_values, value_indices)
+            self._inner_values_update(current_values)
+
+            Lm = posterior.K_chol
+            dL_dKmm = grad_dict['dL_dKmm']
+            woodbury_inv[:, :, d] = posterior.woodbury_inv
+            woodbury_vector[:, d:d+1] = posterior.woodbury_vector
+            self._log_marginal_likelihood += log_marginal_likelihood
+        if not self.stochastics:
+            print ''
+
+        if self.posterior is None:
+            self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
+                                   K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
+        self._outer_values_update(self.full_values)
+
+    def _outer_loop_without_missing_data(self):
+        self._log_marginal_likelihood = 0
+
+        if self.posterior is None:
+            woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
+            woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
+        else:
+            woodbury_inv = self.posterior._woodbury_inv
+            woodbury_vector = self.posterior._woodbury_vector
+
+        d = self.stochastics.d
+        posterior, log_marginal_likelihood, \
+            grad_dict, self.full_values, _ = self._inner_parameters_changed(
+                            self.kern, self.X,
+                            self.Z, self.likelihood,
+                            self.Y_normalized[:, d], self.Y_metadata)
+        self.grad_dict = grad_dict
+
+        self._log_marginal_likelihood += log_marginal_likelihood
+
+        self._outer_values_update(self.full_values)
+
+        woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
+        woodbury_vector[:, d] = posterior.woodbury_vector
+        if self.posterior is None:
+            self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
+                                   K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
+
+    def parameters_changed(self):
+        if self.missing_data:
+            self._outer_loop_for_missing_data()
+        elif self.stochastics:
+            self._outer_loop_without_missing_data()
+        else:
+            self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
+            self._outer_values_update(self.full_values)
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        """
+
+        if kern is None: kern = self.kern
+
+        if not isinstance(Xnew, VariationalPosterior):
+            Kx = kern.K(self.Z, Xnew)
+            mu = np.dot(Kx.T, self.posterior.woodbury_vector)
+            if full_cov:
+                Kxx = kern.K(Xnew)
+                if self.posterior.woodbury_inv.ndim == 2:
+                    var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
+                elif self.posterior.woodbury_inv.ndim == 3:
+                    var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
+                var = var
+            else:
+                Kxx = kern.Kdiag(Xnew)
+                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
+        else:
+            Kx = kern.psi1(self.Z, Xnew)
+            mu = np.dot(Kx, self.posterior.woodbury_vector)
+            if full_cov:
+                raise NotImplementedError, "TODO"
+            else:
+                Kxx = kern.psi0(self.Z, Xnew)
+                psi2 = kern.psi2(self.Z, Xnew)
+                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
+        return mu, var
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index d05e33de..55dfafec 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -112,7 +112,7 @@ class MiscTests(unittest.TestCase):
 
     def test_missing_data(self):
         from GPy import kern
-        from GPy.models import BayesianGPLVM
+        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
         from GPy.examples.dimensionality_reduction import _simulate_matern
 
         D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
@@ -124,12 +124,12 @@ class MiscTests(unittest.TestCase):
         Ymissing[inan] = np.nan
 
         k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
-        m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
+        m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                           kernel=k, missing_data=True)
         assert(m.checkgrad())
 
         k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
-        m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
+        m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                           kernel=k, missing_data=True)
         assert(m.checkgrad())
 

From 625eedd6ac092e1adb2b596632ae3f41a592f434 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 11:54:55 +0000
Subject: [PATCH 02/22] Catch nosetests import error if not installed, now
 ignore GPy.tests() when nosetests GPy is called, but allows GPy.tests() to be
 called, and throws error if this is tried without nose being installed

---
 GPy/__init__.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index ceff68a0..99a91fe8 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -14,7 +14,6 @@ import examples
 import likelihoods
 import testing
 from numpy.testing import Tester
-from nose.tools import nottest
 import kern
 import plotting
 
@@ -22,10 +21,16 @@ import plotting
 from core import Model
 from core.parameterization import Param, Parameterized, ObsAr
 
-@nottest
-def tests():
-    Tester(testing).test(verbose=10)
-
+#@nottest
+try:
+    #Get rid of nose dependency by only ignoring if you have nose installed
+    from nose.tools import nottest
+    @nottest
+    def tests():
+        Tester(testing).test(verbose=10)
+except:
+    def tests():
+        Tester(testing).test(verbose=10)
 
 def load(file_path):
     """
@@ -36,4 +41,4 @@ def load(file_path):
     import cPickle as pickle
     with open(file_path, 'rb') as f:
         m = pickle.load(f)
-    return m
\ No newline at end of file
+    return m

From f7ecfed179a0eadab5f029a99d77cabf2be4617c Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 12:04:46 +0000
Subject: [PATCH 03/22] add documentation for hmc

---
 GPy/inference/mcmc/hmc.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/GPy/inference/mcmc/hmc.py b/GPy/inference/mcmc/hmc.py
index 8c65cdf0..455e9411 100644
--- a/GPy/inference/mcmc/hmc.py
+++ b/GPy/inference/mcmc/hmc.py
@@ -4,7 +4,19 @@ import numpy as np
 
 
 class HMC:
-    def __init__(self,model,M=None,stepsize=1e-1):
+    """
+    An implementation of Hybrid Monte Carlo (HMC) for GPy models 
+    """
+    def __init__(self, model, M=None,stepsize=1e-1):
+        """
+        Initialize an object for HMC sampling. Note that the status of the model (model parameters) will be changed during sampling
+        :param model: the GPy model that will be sampled
+        :type model: GPy.core.Model
+        :param M: the mass matrix (an identity matrix by default)
+        :type M: numpy.ndarray
+        :param stepsize: the step size for HMC sampling
+        :type stepsize: float
+        """
         self.model = model
         self.stepsize = stepsize
         self.p = np.empty_like(model.optimizer_array.copy())
@@ -14,9 +26,18 @@ class HMC:
             self.M = M
         self.Minv = np.linalg.inv(self.M)
 
-    def sample(self, m_iters=1000, hmc_iters=20):
-        params = np.empty((m_iters,self.p.size))
-        for i in xrange(m_iters):
+    def sample(self, num_samples=1000, hmc_iters=20):
+        """
+        Sample the (unfixed) model parameters.
+        :param num_samples: the number of samples to draw (1000 by default)
+        :type num_samples: int
+        :param hmc_iters: the number of leap-frog iterations (20 by default)
+        :type hmc_iters: int
+        :return: the list of parameters samples with the size N x P (N - the number of samples, P - the number of parameters to sample) 
+        :rtype: numpy.ndarray
+        """
+        params = np.empty((num_samples,self.p.size))
+        for i in xrange(num_samples):
             self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
             H_old = self._computeH()
             theta_old = self.model.optimizer_array.copy()
@@ -125,8 +146,6 @@ class HMC_shortcut:
                             break
                 else:
                     Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
-#                    print Hlist
-#                    print self._testH(H_buf[Hlist])
 
                     if self._testH(H_buf[Hlist]):
                         pos += -1
@@ -139,14 +158,10 @@ class HMC_shortcut:
                             pos_new = pos + r
                         self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
                         self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
-#                        print reversal[0],pos,pos_new
-#                        print H_buf
                         break
 
     def _testH(self, Hlist):
         Hstd = np.std(Hlist)
-#        print Hlist
-#        print Hstd
         if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
             return False
         else:

From b39188a5cd76c2b3ef10bf50242ea486326ebcce Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 12:05:49 +0000
Subject: [PATCH 04/22] add __init__.py to mcmc

---
 GPy/inference/mcmc/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 GPy/inference/mcmc/__init__.py

diff --git a/GPy/inference/mcmc/__init__.py b/GPy/inference/mcmc/__init__.py
new file mode 100644
index 00000000..e69de29b

From 74765d0e0dc11a24cfa20ee4f75032d489174e6e Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 12:18:04 +0000
Subject: [PATCH 05/22] refine the docstring for hmc

---
 GPy/inference/mcmc/hmc.py          | 24 +++++++++++++-----------
 doc/GPy.inference.optimization.rst | 24 ------------------------
 doc/GPy.inference.rst              |  1 +
 doc/GPy.testing.rst                |  8 --------
 4 files changed, 14 insertions(+), 43 deletions(-)

diff --git a/GPy/inference/mcmc/hmc.py b/GPy/inference/mcmc/hmc.py
index 455e9411..54893769 100644
--- a/GPy/inference/mcmc/hmc.py
+++ b/GPy/inference/mcmc/hmc.py
@@ -1,22 +1,23 @@
-"""HMC implementation"""
+# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
 
 
 class HMC:
     """
-    An implementation of Hybrid Monte Carlo (HMC) for GPy models 
+    An implementation of Hybrid Monte Carlo (HMC) for GPy models
+    
+    Initialize an object for HMC sampling. Note that the status of the model (model parameters) will be changed during sampling.
+    
+    :param model: the GPy model that will be sampled
+    :type model: GPy.core.Model
+    :param M: the mass matrix (an identity matrix by default)
+    :type M: numpy.ndarray
+    :param stepsize: the step size for HMC sampling
+    :type stepsize: float
     """
     def __init__(self, model, M=None,stepsize=1e-1):
-        """
-        Initialize an object for HMC sampling. Note that the status of the model (model parameters) will be changed during sampling
-        :param model: the GPy model that will be sampled
-        :type model: GPy.core.Model
-        :param M: the mass matrix (an identity matrix by default)
-        :type M: numpy.ndarray
-        :param stepsize: the step size for HMC sampling
-        :type stepsize: float
-        """
         self.model = model
         self.stepsize = stepsize
         self.p = np.empty_like(model.optimizer_array.copy())
@@ -29,6 +30,7 @@ class HMC:
     def sample(self, num_samples=1000, hmc_iters=20):
         """
         Sample the (unfixed) model parameters.
+        
         :param num_samples: the number of samples to draw (1000 by default)
         :type num_samples: int
         :param hmc_iters: the number of leap-frog iterations (20 by default)
diff --git a/doc/GPy.inference.optimization.rst b/doc/GPy.inference.optimization.rst
index 81fa877a..a81a8e68 100644
--- a/doc/GPy.inference.optimization.rst
+++ b/doc/GPy.inference.optimization.rst
@@ -4,14 +4,6 @@ GPy.inference.optimization package
 Submodules
 ----------
 
-GPy.inference.optimization.BayesOpt module
-------------------------------------------
-
-.. automodule:: GPy.inference.optimization.BayesOpt
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.inference.optimization.conjugate_gradient_descent module
 ------------------------------------------------------------
 
@@ -28,14 +20,6 @@ GPy.inference.optimization.gradient_descent_update_rules module
     :undoc-members:
     :show-inheritance:
 
-GPy.inference.optimization.hmc module
--------------------------------------
-
-.. automodule:: GPy.inference.optimization.hmc
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.inference.optimization.optimization module
 ----------------------------------------------
 
@@ -44,14 +28,6 @@ GPy.inference.optimization.optimization module
     :undoc-members:
     :show-inheritance:
 
-GPy.inference.optimization.samplers module
-------------------------------------------
-
-.. automodule:: GPy.inference.optimization.samplers
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.inference.optimization.scg module
 -------------------------------------
 
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
index 2aa7839f..235f804b 100644
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@@ -7,6 +7,7 @@ Subpackages
 .. toctree::
 
     GPy.inference.latent_function_inference
+    GPy.inference.mcmc
     GPy.inference.optimization
 
 Module contents
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index bef24a30..2d1132d7 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -84,14 +84,6 @@ GPy.testing.prior_tests module
     :undoc-members:
     :show-inheritance:
 
-GPy.testing.sparse_tests module
--------------------------------
-
-.. automodule:: GPy.testing.sparse_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 
 Module contents
 ---------------

From 02c903c4eb732399bf5aaa98abc6e6a2fdd8db8b Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 13:36:26 +0000
Subject: [PATCH 06/22] Tidied up laplace warnings

---
 .../latent_function_inference/laplace.py        | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 01d2f997..2c741b9d 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -14,6 +14,9 @@ import numpy as np
 from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
 from posterior import Posterior
 import warnings
+def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
+    return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)
+warnings.formatwarning = warning_on_one_line
 from scipy import optimize
 from . import LatentFunctionInference
 
@@ -29,8 +32,11 @@ class Laplace(LatentFunctionInference):
         """
 
         self._mode_finding_tolerance = 1e-7
-        self._mode_finding_max_iter = 40
-        self.bad_fhat = True
+        self._mode_finding_max_iter = 60
+        self.bad_fhat = False
+        #Store whether it is the first run of the inference so that we can choose whether we need
+        #to calculate things or reuse old variables
+        self.first_run = True
         self._previous_Ki_fhat = None
 
     def inference(self, kern, X, likelihood, Y, Y_metadata=None):
@@ -42,8 +48,9 @@ class Laplace(LatentFunctionInference):
         K = kern.K(X)
 
         #Find mode
-        if self.bad_fhat:
+        if self.bad_fhat or self.first_run:
             Ki_f_init = np.zeros_like(Y)
+            first_run = False
         else:
             Ki_f_init = self._previous_Ki_fhat
 
@@ -123,11 +130,11 @@ class Laplace(LatentFunctionInference):
         #Warn of bad fits
         if difference > self._mode_finding_tolerance:
             if not self.bad_fhat:
-                warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
+                warnings.warn("Not perfect mode found (f_hat). difference: {}, iteration: {} out of max {}".format(difference, iteration, self._mode_finding_max_iter))
             self.bad_fhat = True
         elif self.bad_fhat:
             self.bad_fhat = False
-            warnings.warn("f_hat now fine again")
+            warnings.warn("f_hat now fine again. difference: {}, iteration: {} out of max {}".format(difference, iteration, self._mode_finding_max_iter))
 
         return f, Ki_f
 

From 1385c3b2711277034e654874090da277cc25a277 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 13:36:46 +0000
Subject: [PATCH 07/22] Removed pod dependency for pickle tests

---
 GPy/examples/regression.py  |  2 +-
 GPy/testing/pickle_tests.py | 17 +++++++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 8e481b02..6923b20d 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -468,7 +468,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
 
     k = GPy.kern.RBF(1)
     # create simple GP Model - no input uncertainty on this one
-    m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
+    m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
 
     if optimize:
         m.optimize('scg', messages=1, max_iters=max_iters)
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index dfabe54e..b541d21e 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -17,10 +17,15 @@ from GPy.kern._src.rbf import RBF
 from GPy.kern._src.linear import Linear
 from GPy.kern._src.static import Bias, White
 from GPy.examples.dimensionality_reduction import mrd_simulation
-from GPy.examples.regression import toy_rbf_1d_50
 from GPy.core.parameterization.variational import NormalPosterior
 from GPy.models.gp_regression import GPRegression
 
+def toy_model():
+    X = np.linspace(0,1,50)[:, None]
+    Y = np.sin(X)
+    m = GPRegression(X=X, Y=Y)
+    return m
+
 class ListDictTestCase(unittest.TestCase):
     def assertListDictEquals(self, d1, d2, msg=None):
         for k,v in d1.iteritems():
@@ -105,7 +110,7 @@ class Test(ListDictTestCase):
         self.assertSequenceEqual(str(par), str(pcopy))
 
     def test_model(self):
-        par = toy_rbf_1d_50(optimize=0, plot=0)
+        par = toy_model()
         pcopy = par.copy()
         self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
         np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
@@ -124,7 +129,7 @@ class Test(ListDictTestCase):
         self.assert_(pcopy.checkgrad())
 
     def test_modelrecreation(self):
-        par = toy_rbf_1d_50(optimize=0, plot=0)
+        par = toy_model()
         pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
         np.testing.assert_allclose(par.param_array, pcopy.param_array)
         np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
@@ -135,7 +140,7 @@ class Test(ListDictTestCase):
         self.assert_(np.any(pcopy.gradient!=0.0))
         pcopy.optimize('bfgs')
         par.optimize('bfgs')
-        np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=.001)
+        np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
         with tempfile.TemporaryFile('w+b') as f:
             par.pickle(f)
             f.seek(0)
@@ -193,7 +198,7 @@ class Test(ListDictTestCase):
 
     @unittest.skip
     def test_add_observer(self):
-        par = toy_rbf_1d_50(optimize=0, plot=0)
+        par = toy_model()
         par.name = "original"
         par.count = 0
         par.add_observer(self, self._callback, 1)
@@ -211,4 +216,4 @@ class Test(ListDictTestCase):
 
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 78b50db138beff11ea528e6155d2a1bd28554d2c Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 13:38:28 +0000
Subject: [PATCH 08/22] add new inference X

---
 GPy/core/parameterization/variational.py      |   6 +
 .../latent_function_inference/inferenceX.py   | 127 ++++++++++++++++++
 GPy/models/bayesian_gplvm.py                  |  16 +++
 3 files changed, 149 insertions(+)
 create mode 100644 GPy/inference/latent_function_inference/inferenceX.py

diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 251ec7db..e2a24008 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -94,6 +94,9 @@ class VariationalPosterior(Parameterized):
         if self.has_uncertain_inputs():
             assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
 
+    def set_gradients(self, grad):
+        self.mean.gradient, self.variance.gradient = grad
+
     def _raveled_index(self):
         index = np.empty(dtype=int, shape=0)
         size = 0
@@ -158,6 +161,9 @@ class SpikeAndSlabPosterior(VariationalPosterior):
         self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
         self.link_parameter(self.gamma)
 
+    def set_gradients(self, grad):
+        self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
+
     def __getitem__(self, s):
         if isinstance(s, (int, slice, tuple, list, np.ndarray)):
             import copy
diff --git a/GPy/inference/latent_function_inference/inferenceX.py b/GPy/inference/latent_function_inference/inferenceX.py
new file mode 100644
index 00000000..9828c4a1
--- /dev/null
+++ b/GPy/inference/latent_function_inference/inferenceX.py
@@ -0,0 +1,127 @@
+"""
+"""
+import numpy as np
+from ...core import Model
+from ...core.parameterization import variational
+
+def infer_newX(model, Y_new, optimize=True, init='L2'):
+    """
+    Infer the distribution of X for the new observed data *Y_new*.
+    
+    :param model: the GPy model used in inference
+    :type model: GPy.core.Model
+    :param Y_new: the new observed data for inference
+    :type Y_new: numpy.ndarray
+    :param optimize: whether to optimize the location of new X (True by default)
+    :type optimize: boolean
+    :return: a tuple containing the estimated posterior distribution of X and the model that optimize X 
+    :rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
+    """
+    infr_m = InferenceX(model, Y_new, init=init)
+    
+    if optimize:
+        infr_m.optimize()
+        
+    return infr_m.X, infr_m
+
+class InferenceX(Model):
+    """
+    The class for inference of new X with given new Y. (do_test_latent)
+    
+    :param model: the GPy model used in inference
+    :type model: GPy.core.Model
+    :param Y: the new observed data for inference
+    :type Y: numpy.ndarray
+    """
+    def __init__(self, model, Y, name='inferenceX', init='L2'):
+        if np.isnan(Y).any():
+            assert Y.shape[0]==1, "The current implementation of inference X only support one data point at a time with missing data!"
+            self.missing_data = True
+            self.valid_dim = np.logical_not(np.isnan(Y[0]))
+        else:
+            self.missing_data = False
+        super(InferenceX, self).__init__(name)
+        self.likelihood = model.likelihood.copy()
+        self.kern = model.kern.copy()
+        if model.kern.useGPU:
+            from ...models import SSGPLVM
+            if isinstance(model, SSGPLVM):
+                self.kern.GPU_SSRBF(True)
+            else:
+                self.kern.GPU(True)
+        from copy import deepcopy
+        self.posterior = deepcopy(model.posterior)
+        self.variational_prior = model.variational_prior.copy()
+        self.Z = model.Z.copy()
+        self.Y = Y
+        self.X = self._init_X(model, Y, init=init)
+        self.compute_dL()
+        
+        self.link_parameter(self.X)
+    
+    def _init_X(self, model, Y_new, init='L2'):
+        # Initialize the new X by finding the nearest point in Y space.
+        
+        Y = model.Y
+        if self.missing_data:
+            Y = Y[:,self.valid_dim]
+            Y_new = Y_new[:,self.valid_dim]
+            dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:]
+        else:
+            if init=='L2':
+                dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:]
+            elif init=='NCC':
+                dist = Y_new.dot(Y.T)
+        idx = dist.argmin(axis=1)
+        
+        from ...models import SSGPLVM
+        from ...util.misc import param_to_array
+        if isinstance(model, SSGPLVM):
+            X = variational.SpikeAndSlabPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]), param_to_array(model.X.gamma[idx]))
+            if model.group_spike:
+                X.gamma.fix()
+        else:
+            X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
+        
+        return X
+        
+    def compute_dL(self):
+        # Common computation
+        beta = 1./np.fmax(self.likelihood.variance, 1e-6)
+        output_dim = self.Y.shape[-1]
+        wv = self.posterior.woodbury_vector
+        if self.missing_data:
+            wv = wv[:,self.valid_dim]
+            output_dim = self.valid_dim.sum()
+            self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
+            self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
+            self.dL_dpsi0 = -output_dim * beta/2.* np.ones(self.Y.shape[0])
+        else:
+            self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
+            self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
+            self.dL_dpsi0 = -output_dim * beta/2.* np.ones(self.Y.shape[0])
+                
+    def parameters_changed(self):
+        psi0 = self.kern.psi0(self.Z, self.X)
+        psi1 = self.kern.psi1(self.Z, self.X)
+        psi2 = self.kern.psi2(self.Z, self.X)
+
+        self._log_marginal_likelihood = (self.dL_dpsi2*psi2).sum()+(self.dL_dpsi1*psi1).sum()+(self.dL_dpsi0*psi0).sum()
+        X_grad = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2)
+        self.X.set_gradients(X_grad)
+        
+        from ...core.parameterization.variational import SpikeAndSlabPrior
+        if isinstance(self.variational_prior, SpikeAndSlabPrior):
+            # Update Log-likelihood
+            KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
+            # update for the KL divergence
+            self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
+        else:
+            # Update Log-likelihood
+            KL_div = self.variational_prior.KL_divergence(self.X)
+            # update for the KL divergence
+            self.variational_prior.update_gradients_KL(self.X)
+        
+    def log_likelihood(self):
+        return self._log_marginal_likelihood
+
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index c4d0b6e9..b2eed718 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -175,6 +175,22 @@ class BayesianGPLVM(SparseGP_MPI):
                 resolution, ax, marker, s,
                 fignum, plot_inducing, legend,
                 plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
+        
+    def infer_newX(self, Y_new, optimize=True, ):
+        """
+        Infer the distribution of X for the new observed data *Y_new*.
+        
+        :param model: the GPy model used in inference
+        :type model: GPy.core.Model
+        :param Y_new: the new observed data for inference
+        :type Y_new: numpy.ndarray
+        :param optimize: whether to optimize the location of new X (True by default)
+        :type optimize: boolean
+        :return: a tuple containing the estimated posterior distribution of X and the model that optimize X 
+        :rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
+        """
+        from ..inference.latent_function_inference.inferenceX import infer_newX
+        return infer_newX(self, Y_new, optimize=optimize)
 
     def do_test_latents(self, Y):
         """

From 24ad425a1ba75ddfbdad8c546307d69657527b36 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 14:11:46 +0000
Subject: [PATCH 09/22] bug fix for inferenceX

---
 GPy/inference/latent_function_inference/inferenceX.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/latent_function_inference/inferenceX.py b/GPy/inference/latent_function_inference/inferenceX.py
index 9828c4a1..7480c910 100644
--- a/GPy/inference/latent_function_inference/inferenceX.py
+++ b/GPy/inference/latent_function_inference/inferenceX.py
@@ -95,11 +95,11 @@ class InferenceX(Model):
             output_dim = self.valid_dim.sum()
             self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
             self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
-            self.dL_dpsi0 = -output_dim * beta/2.* np.ones(self.Y.shape[0])
+            self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
         else:
             self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
             self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
-            self.dL_dpsi0 = -output_dim * beta/2.* np.ones(self.Y.shape[0])
+            self.dL_dpsi0 = -beta/2.* np.ones(self.Y.shape[0])            #self.dL_dpsi0[:] = 0
                 
     def parameters_changed(self):
         psi0 = self.kern.psi0(self.Z, self.X)
@@ -121,6 +121,7 @@ class InferenceX(Model):
             KL_div = self.variational_prior.KL_divergence(self.X)
             # update for the KL divergence
             self.variational_prior.update_gradients_KL(self.X)
+        self._log_marginal_likelihood += -KL_div
         
     def log_likelihood(self):
         return self._log_marginal_likelihood

From d0a5420f2f3e27931065c4f79c0146135d4620bf Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 14:19:03 +0000
Subject: [PATCH 10/22] [minbatch var dtc] adjustments to bgplvm minibatch

---
 GPy/models/bayesian_gplvm.py           | 64 ++++++--------------------
 GPy/models/bayesian_gplvm_minibatch.py |  4 +-
 2 files changed, 17 insertions(+), 51 deletions(-)

diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index c4d0b6e9..73fd6f8f 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -75,8 +75,7 @@ class BayesianGPLVM(SparseGP_MPI):
                                            name=name, inference_method=inference_method,
                                            normalizer=normalizer, mpi_comm=mpi_comm,
                                            variational_prior=self.variational_prior,
-                                           missing_data=missing_data, stochastic=stochastic,
-                                           batchsize=batchsize)
+                                           )
 
     def set_X_gradients(self, X, X_grad):
         """Set the gradients of the posterior distribution of X in its specific form."""
@@ -86,55 +85,22 @@ class BayesianGPLVM(SparseGP_MPI):
         """Get the gradients of the posterior distribution of X in its specific form."""
         return X.mean.gradient, X.variance.gradient
 
-    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
-        posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
-
-        kl_fctr = 1.
-        if self.missing_data:
-            d = self.output_dim
-            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
-        else:
-            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
-
-        current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
-                                            variational_posterior=X,
-                                            Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
-                                            dL_dpsi1=grad_dict['dL_dpsi1'],
-                                            dL_dpsi2=grad_dict['dL_dpsi2'])
-
-        # Subsetting Variational Posterior objects, makes the gradients
-        # empty. We need them to be 0 though:
-        X.mean.gradient[:] = 0
-        X.variance.gradient[:] = 0
-
-        self.variational_prior.update_gradients_KL(X)
-        if self.missing_data:
-            current_values['meangrad'] += kl_fctr*X.mean.gradient/d
-            current_values['vargrad'] += kl_fctr*X.variance.gradient/d
-        else:
-            current_values['meangrad'] += kl_fctr*X.mean.gradient
-            current_values['vargrad'] += kl_fctr*X.variance.gradient
-
-        if subset_indices is not None:
-            value_indices['meangrad'] = subset_indices['samples']
-            value_indices['vargrad'] = subset_indices['samples']
-        return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
-
-    def _outer_values_update(self, full_values):
-        """
-        Here you put the values, which were collected before in the right places.
-        E.g. set the gradients of parameters, etc.
-        """
-        super(BayesianGPLVM, self)._outer_values_update(full_values)
-        self.X.mean.gradient = full_values['meangrad']
-        self.X.variance.gradient = full_values['vargrad']
-
-    def _outer_init_full_values(self):
-        return dict(meangrad=np.zeros(self.X.mean.shape),
-                    vargrad=np.zeros(self.X.variance.shape))
-
     def parameters_changed(self):
         super(BayesianGPLVM,self).parameters_changed()
+
+        kl_fctr = 1.
+        self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
+
+        self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(
+                                            variational_posterior=self.X,
+                                            Z=self.Z,
+                                            dL_dpsi0=self.grad_dict['dL_dpsi0'],
+                                            dL_dpsi1=self.grad_dict['dL_dpsi1'],
+                                            dL_dpsi2=self.grad_dict['dL_dpsi2'])
+
+        self.variational_prior.update_gradients_KL(self.X)
+
+
         if isinstance(self.inference_method, VarDTC_minibatch):
             return
 
diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py
index 56b46244..7100fa72 100644
--- a/GPy/models/bayesian_gplvm_minibatch.py
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@@ -26,8 +26,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
                  Z=None, kernel=None, inference_method=None, likelihood=None,
                  name='bayesian gplvm', normalizer=None,
                  missing_data=False, stochastic=False, batchsize=1):
-        self.__IN_OPTIMIZATION__ = False
-
         self.logger = logging.getLogger(self.__class__.__name__)
         if X is None:
             from ..util.initialization import initialize_latent
@@ -70,6 +68,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
                                            normalizer=normalizer,
                                            missing_data=missing_data, stochastic=stochastic,
                                            batchsize=batchsize)
+        self.X = X
+        self.link_parameter(self.X, 0)
 
     def set_X_gradients(self, X, X_grad):
         """Set the gradients of the posterior distribution of X in its specific form."""

From 6242a75f6a28b41fbe699a1210442b5d7354c12f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 14:19:38 +0000
Subject: [PATCH 11/22] [parameterized] handle updates inside init

---
 GPy/core/parameterization/index_operations.py | 20 +++++++++----------
 GPy/core/parameterization/parameter_core.py   |  6 +++---
 GPy/core/parameterization/parameterized.py    | 15 ++++++++++----
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 78c0d2b9..ddd689ed 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -26,39 +26,39 @@ class ParameterIndexOperations(object):
     This object wraps a dictionary, whos keys are _operations_ that we'd like
     to apply to a parameter array, and whose values are np integer arrays which
     index the parameter array appropriately.
-    
+
     A model instance will contain one instance of this class for each thing
     that needs indexing (i.e. constraints, ties and priors). Parameters within
     the model constain instances of the ParameterIndexOperationsView class,
     which can map from a 'local' index (starting 0) to this global index.
-    
+
     Here's an illustration:
-    
+
     #=======================================================================
     model : 0 1 2 3 4 5 6 7 8 9
     key1: 4 5
     key2: 7 8
-    
+
     param1: 0 1 2 3 4 5
     key1: 2 3
     key2: 5
-    
+
     param2: 0 1 2 3 4
     key1: 0
     key2: 2 3
     #=======================================================================
-    
+
     The views of this global index have a subset of the keys in this global
     (model) index.
-    
+
     Adding a new key (e.g. a constraint) to a view will cause the view to pass
     the new key to the global index, along with the local index and an offset.
     This global index then stores the key and the appropriate global index
     (which can be seen by the view).
-    
+
     See also:
     ParameterIndexOperationsView
-    
+
     """
     _offset = 0
     def __init__(self, constraints=None):
@@ -221,8 +221,6 @@ class ParameterIndexOperationsView(object):
 
     def shift_left(self, start, size):
         self._param_index_ops.shift_left(start+self._offset, size)
-        self._offset -= size
-        self._size -= size
 
     def clear(self):
         for i, ind in self.items():
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index b1e0d8d2..115acf3a 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -18,7 +18,7 @@ import numpy as np
 import re
 import logging
 
-__updated__ = '2014-10-28'
+__updated__ = '2014-11-03'
 
 class HierarchyError(Exception):
     """
@@ -924,7 +924,7 @@ class Parameterizable(OptimizationHandlable):
         !WARNING!: setting the parameter array MUST always be done in memory:
         m.param_array[:] = m_copy.param_array
         """
-        if self.__dict__.get('_param_array_', None) is None:
+        if (self.__dict__.get('_param_array_', None) is None) or (self._param_array_.size != self.size):
             self._param_array_ = np.empty(self.size, dtype=np.float64)
         return self._param_array_
 
@@ -1002,7 +1002,7 @@ class Parameterizable(OptimizationHandlable):
     #=========================================================================
     @property
     def gradient(self):
-        if self.__dict__.get('_gradient_array_', None) is None:
+        if (self.__dict__.get('_gradient_array_', None) is None) or self._gradient_array_.size != self.size:
             self._gradient_array_ = np.empty(self.size, dtype=np.float64)
         return self._gradient_array_
 
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 0bd72be4..2c91b235 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -9,6 +9,7 @@ from param import ParamConcatenation
 from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
 
 import logging
+from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
 logger = logging.getLogger("parameters changed meta")
 
 class ParametersChangedMeta(type):
@@ -20,7 +21,7 @@ class ParametersChangedMeta(type):
         self._in_init_ = False
         logger.debug("connecting parameters")
         self._highest_parent_._connect_parameters()
-        self._highest_parent_._notify_parent_change()
+        #self._highest_parent_._notify_parent_change()
         self._highest_parent_._connect_fixes()
         logger.debug("calling parameters changed")
         self.parameters_changed()
@@ -140,6 +141,8 @@ class Parameterized(Parameterizable):
                 self.priors.shift_right(start, param.size)
                 self.constraints.update(param.constraints, self.size)
                 self.priors.update(param.priors, self.size)
+                param._parent_ = self
+                param._parent_index_ = len(self.parameters)
                 self.parameters.append(param)
             else:
                 start = sum(p.size for p in self.parameters[:index])
@@ -147,19 +150,23 @@ class Parameterized(Parameterizable):
                 self.priors.shift_right(start, param.size)
                 self.constraints.update(param.constraints, start)
                 self.priors.update(param.priors, start)
+                param._parent_ = self
+                param._parent_index_ = index if index>=0 else len(self.parameters[:index])
+                for p in self.parameters[index:]:
+                    p._parent_index_ += 1
                 self.parameters.insert(index, param)
 
-            self._notify_parent_change()
             param.add_observer(self, self._pass_through_notify_observers, -np.inf)
 
             parent = self
             while parent is not None:
                 parent.size += param.size
                 parent = parent._parent_
+            self._notify_parent_change()
 
             if not self._in_init_:
-                self._connect_parameters()
-                self._notify_parent_change()
+                #self._connect_parameters()
+                #self._notify_parent_change()
 
                 self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
                 self._highest_parent_._notify_parent_change()

From 871405e3e4721ae4f31efb5add8dc0e6d48500df Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 14:22:01 +0000
Subject: [PATCH 12/22] add test cases for inference new X for bayesian GPLVM

---
 GPy/testing/inference_tests.py | 71 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 GPy/testing/inference_tests.py

diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py
new file mode 100644
index 00000000..c7efa821
--- /dev/null
+++ b/GPy/testing/inference_tests.py
@@ -0,0 +1,71 @@
+
+"""
+The test cases for various inference algorithms
+"""
+
+import unittest, itertools
+import numpy as np
+import GPy
+
+
+class InferenceXTestCase(unittest.TestCase):
+    
+    def genData(self):
+        D1,D2,N = 12,12,50
+        np.random.seed(1234)
+    
+        x = np.linspace(0, 4 * np.pi, N)[:, None]
+        s1 = np.vectorize(lambda x: np.sin(x))
+        s2 = np.vectorize(lambda x: np.cos(x)**2)
+        s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
+        sS = np.vectorize(lambda x: np.cos(x))
+    
+        s1 = s1(x)
+        s2 = s2(x)
+        s3 = s3(x)
+        sS = sS(x)
+    
+        s1 -= s1.mean(); s1 /= s1.std(0)
+        s2 -= s2.mean(); s2 /= s2.std(0)
+        s3 -= s3.mean(); s3 /= s3.std(0)
+        sS -= sS.mean(); sS /= sS.std(0)
+    
+        S1 = np.hstack([s1, sS])
+        S2 = np.hstack([s3, sS])
+    
+        P1 = np.random.randn(S1.shape[1], D1)
+        P2 = np.random.randn(S2.shape[1], D2)
+    
+        Y1 = S1.dot(P1)
+        Y2 = S2.dot(P2)
+    
+        Y1 += .01 * np.random.randn(*Y1.shape)
+        Y2 += .01 * np.random.randn(*Y2.shape)
+    
+        Y1 -= Y1.mean(0)
+        Y2 -= Y2.mean(0)
+        Y1 /= Y1.std(0)
+        Y2 /= Y2.std(0)
+
+        slist = [s1, s2, s3, sS]
+        slist_names = ["s1", "s2", "s3", "sS"]
+        Ylist = [Y1, Y2]
+        
+        return Ylist
+    
+    def test_inferenceX_BGPLVM(self):
+        Ys = self.genData()
+        m = GPy.models.BayesianGPLVM(Ys[0],5,kernel=GPy.kern.Linear(5,ARD=True))
+        
+        x,mi = m.infer_newX(m.Y, optimize=False)
+        self.assertTrue(mi.checkgrad())
+        
+        m.optimize(max_iters=10000)
+        x,mi = m.infer_newX(m.Y)
+
+        self.assertTrue(np.allclose(m.X.mean, mi.X.mean))
+        self.assertTrue(np.allclose(m.X.variance, mi.X.variance))
+        
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From 13fd45e31f146faef9a921b591a8b24e6788db9e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 14:39:17 +0000
Subject: [PATCH 13/22] Minor doc changes, fixed MPI dependency and 'stop' in
 var_dtc

---
 .../latent_function_inference/var_dtc.py      |   2 +-
 GPy/util/parallel.py                          |  11 +-
 ...Py.inference.latent_function_inference.rst |   8 +
 doc/GPy.inference.mcmc.rst                    |  30 +++
 doc/GPy.kern.parts.rst                        | 246 ------------------
 doc/GPy.likelihoods.noise_models.rst          |  70 -----
 doc/GPy.models.rst                            |  16 ++
 doc/GPy.testing.rst                           |   8 +
 ...atent_space_visualizations.controllers.rst |  30 ---
 doc/GPy.util.latent_space_visualizations.rst  |  17 --
 doc/conf.py                                   |  10 +-
 11 files changed, 74 insertions(+), 374 deletions(-)
 create mode 100644 doc/GPy.inference.mcmc.rst
 delete mode 100644 doc/GPy.kern.parts.rst
 delete mode 100644 doc/GPy.likelihoods.noise_models.rst
 delete mode 100644 doc/GPy.util.latent_space_visualizations.controllers.rst
 delete mode 100644 doc/GPy.util.latent_space_visualizations.rst

diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 0459132a..cfc2bb47 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -167,7 +167,7 @@ class VarDTC(LatentFunctionInference):
             woodbury_vector = Cpsi1Vf # == Cpsi1V
         else:
             print 'foobar'
-            stop
+            import ipdb; ipdb.set_trace()
             psi1V = np.dot(Y.T*beta, psi1).T
             tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
             tmp, _ = dpotrs(LB, tmp, lower=1)
diff --git a/GPy/util/parallel.py b/GPy/util/parallel.py
index fd8791d4..bc847c95 100644
--- a/GPy/util/parallel.py
+++ b/GPy/util/parallel.py
@@ -4,11 +4,10 @@ The module of tools for parallelization (MPI)
 
 try:
     from mpi4py import MPI
+    def get_id_within_node(comm=MPI.COMM_WORLD):
+        rank = comm.rank
+        nodename =  MPI.Get_processor_name()
+        nodelist = comm.allgather(nodename)
+        return len([i for i in nodelist[:rank] if i==nodename])
 except:
     pass
-
-def get_id_within_node(comm=MPI.COMM_WORLD):
-    rank = comm.rank
-    nodename =  MPI.Get_processor_name()
-    nodelist = comm.allgather(nodename)
-    return len([i for i in nodelist[:rank] if i==nodename])
diff --git a/doc/GPy.inference.latent_function_inference.rst b/doc/GPy.inference.latent_function_inference.rst
index c47da33a..98d16705 100644
--- a/doc/GPy.inference.latent_function_inference.rst
+++ b/doc/GPy.inference.latent_function_inference.rst
@@ -44,6 +44,14 @@ GPy.inference.latent_function_inference.fitc module
     :undoc-members:
     :show-inheritance:
 
+GPy.inference.latent_function_inference.inferenceX module
+---------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.inferenceX
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.inference.latent_function_inference.laplace module
 ------------------------------------------------------
 
diff --git a/doc/GPy.inference.mcmc.rst b/doc/GPy.inference.mcmc.rst
new file mode 100644
index 00000000..273658b7
--- /dev/null
+++ b/doc/GPy.inference.mcmc.rst
@@ -0,0 +1,30 @@
+GPy.inference.mcmc package
+==========================
+
+Submodules
+----------
+
+GPy.inference.mcmc.hmc module
+-----------------------------
+
+.. automodule:: GPy.inference.mcmc.hmc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.mcmc.samplers module
+----------------------------------
+
+.. automodule:: GPy.inference.mcmc.samplers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.inference.mcmc
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.kern.parts.rst b/doc/GPy.kern.parts.rst
deleted file mode 100644
index ec0661b4..00000000
--- a/doc/GPy.kern.parts.rst
+++ /dev/null
@@ -1,246 +0,0 @@
-GPy.kern.parts package
-======================
-
-Submodules
-----------
-
-GPy.kern.parts.Brownian module
-------------------------------
-
-.. automodule:: GPy.kern.parts.Brownian
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.Matern32 module
-------------------------------
-
-.. automodule:: GPy.kern.parts.Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.Matern52 module
-------------------------------
-
-.. automodule:: GPy.kern.parts.Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.bias module
---------------------------
-
-.. automodule:: GPy.kern.parts.bias
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.coregionalize module
------------------------------------
-
-.. automodule:: GPy.kern.parts.coregionalize
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.exponential module
----------------------------------
-
-.. automodule:: GPy.kern.parts.exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.finite_dimensional module
-----------------------------------------
-
-.. automodule:: GPy.kern.parts.finite_dimensional
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.fixed module
----------------------------
-
-.. automodule:: GPy.kern.parts.fixed
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.gibbs module
----------------------------
-
-.. automodule:: GPy.kern.parts.gibbs
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.hetero module
-----------------------------
-
-.. automodule:: GPy.kern.parts.hetero
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.hierarchical module
-----------------------------------
-
-.. automodule:: GPy.kern.parts.hierarchical
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.independent_outputs module
------------------------------------------
-
-.. automodule:: GPy.kern.parts.independent_outputs
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.kernpart module
-------------------------------
-
-.. automodule:: GPy.kern.parts.kernpart
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.linear module
-----------------------------
-
-.. automodule:: GPy.kern.parts.linear
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.mlp module
--------------------------
-
-.. automodule:: GPy.kern.parts.mlp
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.periodic_Matern32 module
----------------------------------------
-
-.. automodule:: GPy.kern.parts.periodic_Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.periodic_Matern52 module
----------------------------------------
-
-.. automodule:: GPy.kern.parts.periodic_Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.periodic_exponential module
-------------------------------------------
-
-.. automodule:: GPy.kern.parts.periodic_exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.poly module
---------------------------
-
-.. automodule:: GPy.kern.parts.poly
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.prod module
---------------------------
-
-.. automodule:: GPy.kern.parts.prod
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.prod_orthogonal module
--------------------------------------
-
-.. automodule:: GPy.kern.parts.prod_orthogonal
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.rational_quadratic module
-----------------------------------------
-
-.. automodule:: GPy.kern.parts.rational_quadratic
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.rbf module
--------------------------
-
-.. automodule:: GPy.kern.parts.rbf
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.rbf_inv module
------------------------------
-
-.. automodule:: GPy.kern.parts.rbf_inv
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.rbfcos module
-----------------------------
-
-.. automodule:: GPy.kern.parts.rbfcos
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.spline module
-----------------------------
-
-.. automodule:: GPy.kern.parts.spline
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.symmetric module
--------------------------------
-
-.. automodule:: GPy.kern.parts.symmetric
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.sympykern module
--------------------------------
-
-.. automodule:: GPy.kern.parts.sympykern
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.parts.white module
----------------------------
-
-.. automodule:: GPy.kern.parts.white
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: GPy.kern.parts
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/doc/GPy.likelihoods.noise_models.rst b/doc/GPy.likelihoods.noise_models.rst
deleted file mode 100644
index d1a4f451..00000000
--- a/doc/GPy.likelihoods.noise_models.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-GPy.likelihoods.noise_models package
-====================================
-
-Submodules
-----------
-
-GPy.likelihoods.noise_models.binomial_noise module
---------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.binomial_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.exponential_noise module
------------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.exponential_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.gamma_noise module
------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.gamma_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.gaussian_noise module
---------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.gaussian_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.gp_transformations module
-------------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.gp_transformations
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.noise_distributions module
--------------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.noise_distributions
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.likelihoods.noise_models.poisson_noise module
--------------------------------------------------
-
-.. automodule:: GPy.likelihoods.noise_models.poisson_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: GPy.likelihoods.noise_models
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index 5ee7e3a9..cb043afa 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -12,6 +12,14 @@ GPy.models.bayesian_gplvm module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.bayesian_gplvm_minibatch module
+------------------------------------------
+
+.. automodule:: GPy.models.bayesian_gplvm_minibatch
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.bcgplvm module
 -------------------------
 
@@ -116,6 +124,14 @@ GPy.models.sparse_gp_coregionalized_regression module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.sparse_gp_minibatch module
+-------------------------------------
+
+.. automodule:: GPy.models.sparse_gp_minibatch
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.sparse_gp_multioutput_regression module
 --------------------------------------------------
 
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index 2d1132d7..45bb307f 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -28,6 +28,14 @@ GPy.testing.index_operations_tests module
     :undoc-members:
     :show-inheritance:
 
+GPy.testing.inference_tests module
+----------------------------------
+
+.. automodule:: GPy.testing.inference_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.testing.kernel_tests module
 -------------------------------
 
diff --git a/doc/GPy.util.latent_space_visualizations.controllers.rst b/doc/GPy.util.latent_space_visualizations.controllers.rst
deleted file mode 100644
index a88c1f5c..00000000
--- a/doc/GPy.util.latent_space_visualizations.controllers.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-GPy.util.latent_space_visualizations.controllers package
-========================================================
-
-Submodules
-----------
-
-GPy.util.latent_space_visualizations.controllers.axis_event_controller module
------------------------------------------------------------------------------
-
-.. automodule:: GPy.util.latent_space_visualizations.controllers.axis_event_controller
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.util.latent_space_visualizations.controllers.imshow_controller module
--------------------------------------------------------------------------
-
-.. automodule:: GPy.util.latent_space_visualizations.controllers.imshow_controller
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: GPy.util.latent_space_visualizations.controllers
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/doc/GPy.util.latent_space_visualizations.rst b/doc/GPy.util.latent_space_visualizations.rst
deleted file mode 100644
index d8cbd843..00000000
--- a/doc/GPy.util.latent_space_visualizations.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-GPy.util.latent_space_visualizations package
-============================================
-
-Subpackages
------------
-
-.. toctree::
-
-    GPy.util.latent_space_visualizations.controllers
-
-Module contents
----------------
-
-.. automodule:: GPy.util.latent_space_visualizations
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/doc/conf.py b/doc/conf.py
index 7b71a897..f051c986 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,6 +11,9 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
+autodoc_default_flags = ['members', 'show-inheritance', 'private-members', 'special-members']
+autodoc_member_order = "source"
+
 import sys
 import os
 
@@ -114,7 +117,7 @@ for mod_name in MOCK_MODULES:
 # ----------------------- READTHEDOCS ------------------
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
-on_rtd = True
+#on_rtd = True
 if on_rtd:
     sys.path.append(os.path.abspath('../GPy'))
 
@@ -126,7 +129,8 @@ if on_rtd:
     proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True)
     (out, err) = proc.communicate()
     print "program output:", out
-    proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
+    #proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
+    proc = subprocess.Popen("make html", stdout=subprocess.PIPE, shell=True)
     (out, err) = proc.communicate()
     print "program output:", out
     #proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True)
@@ -397,5 +401,3 @@ epub_copyright = u'2013, Author'
 
 # Allow duplicate toc entries.
 #epub_tocdup = True
-
-autodoc_member_order = "source"

From 71523427e4a2c805f97794b8fe915c72e1dec18c Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 3 Nov 2014 14:43:38 +0000
Subject: [PATCH 14/22] Removed ordinal.py (to Symbolic).

---
 GPy/likelihoods/ordinal.py | 47 --------------------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 GPy/likelihoods/ordinal.py

diff --git a/GPy/likelihoods/ordinal.py b/GPy/likelihoods/ordinal.py
deleted file mode 100644
index b6855e54..00000000
--- a/GPy/likelihoods/ordinal.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import sympy as sym
-from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
-import numpy as np
-import link_functions
-from symbolic import Symbolic
-from scipy import stats
-
-class Ordinal(Symbolic):
-    """
-    Ordinal
-
-    .. math::
-        p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
-
-    .. Note::
-        Y takes non zero integer values..
-        link function should have a positive domain, e.g. log (default).
-
-    .. See also::
-        symbolic.py, for the parent class
-    """
-    def __init__(self, categories=3, gp_link=None):
-        if gp_link is None:
-            gp_link = link_functions.Identity()
-
-        dispersion = sym.Symbol('width', positive=True, real=True)
-        y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
-        f_0 = sym.Symbol('f_0', positive=True, real=True) 
-        log_pdf = create_matrix('log_pdf', 1, categories)
-        log_pdf[0] = normcdfln(-f_0)
-        if categories>2:
-            w = create_matrix('w', 1, categories)
-            log_pdf[categories-1] = normcdfln(w.sum() + f_0)
-            for i in range(1, categories-1):
-                log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
-        else:
-            log_pdf[1] = normcdfln(f_0)
-        log_pdf.index_var = y_0
-        super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
-
-        # TODO: Check this.
-        self.log_concave = True
-

From 1a8cae99a5bfbfb1153d46ec9a54455de211f63b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 15:04:12 +0000
Subject: [PATCH 15/22] [MRD] running again, using missing_data classes, more
 details needed for missing data though

---
 GPy/examples/dimensionality_reduction.py |  9 +---
 GPy/models/mrd.py                        | 64 +++++++++++-------------
 2 files changed, 32 insertions(+), 41 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 9d39bc83..21d3804a 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -415,7 +415,6 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
 def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
     from GPy import kern
     from GPy.models import MRD
-    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
 
     D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
     _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
@@ -429,12 +428,8 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
         inanlist.append(inan)
         Y[inan] = _np.nan
 
-    imlist = []
-    for inan in inanlist:
-        imlist.append(VarDTCMissingData(limit=1, inan=inan))
-
     m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
-            kernel=k, inference_method=imlist,
+            kernel=k, inference_method=None,
             initx="random", initz='permute', **kw)
 
     if optimize:
@@ -494,7 +489,7 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
 
 def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
     import GPy
-    import pods 
+    import pods
 
     data = pods.datasets.osu_run1()
     # optimize
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 6e105842..bc39079d 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -13,11 +13,11 @@ from ..inference.latent_function_inference import InferenceMethodList
 from ..likelihoods import Gaussian
 from ..util.initialization import initialize_latent
 from ..core.sparse_gp import SparseGP, GP
-from GPy.models.bayesian_gplvm import BayesianGPLVM
 from GPy.core.parameterization.variational import VariationalPosterior
-from GPy.core.sparse_gp_mpi import SparseGP_MPI
+from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
+from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
 
-class MRD(BayesianGPLVM):
+class MRD(BayesianGPLVMMiniBatch):
     """
     !WARNING: This is bleeding edge code and still in development.
     Functionality may change fundamentally during development!
@@ -92,7 +92,8 @@ class MRD(BayesianGPLVM):
         else:
             fracs = [X.var(0)]*len(Ylist)
 
-        self.Z = Param('inducing inputs', self._init_Z(initz, X))
+        Z = self._init_Z(initz, X)
+        self.Z = Param('inducing inputs', Z)
         self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
 
         # sort out the kernels
@@ -104,6 +105,7 @@ class MRD(BayesianGPLVM):
             kernels = []
             for i in range(len(Ylist)):
                 k = kernel.copy()
+                print k is kernel, k.observers, k.constraints
                 kernels.append(k)
         else:
             assert len(kernel) == len(Ylist), "need one kernel per output"
@@ -114,7 +116,7 @@ class MRD(BayesianGPLVM):
             X_variance = np.random.uniform(0.1, 0.2, X.shape)
 
         self.variational_prior = NormalPrior()
-        self.X = NormalPosterior(X, X_variance)
+        #self.X = NormalPosterior(X, X_variance)
 
         if likelihoods is None:
             likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
@@ -123,48 +125,33 @@ class MRD(BayesianGPLVM):
         self.logger.info("adding X and Z")
         super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,
                  Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(),
-                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
+                 name='manifold relevance determination', normalizer=None,
                  missing_data=False, stochastic=False, batchsize=1)
 
-        import GPy
         self._log_marginal_likelihood = 0
 
-        print "------------"
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
         self.unlink_parameter(self.likelihood)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
         self.unlink_parameter(self.kern)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
-
-        print
-        print '================='
+        del self.kern
+        del self.likelihood
 
         self.num_data = Ylist[0].shape[0]
         if isinstance(batchsize, int):
             batchsize = itertools.repeat(batchsize)
 
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
+        self.bgplvms = []
 
         for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
             assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
             md = np.isnan(Y).any()
-            spgp = SparseGP(self.X, Y, self.Z, k, l, im, n, None, normalizer, md, stochastic, bs)
+            spgp = SparseGPMiniBatch(self.X, Y, Z, k, l, im, n, None, normalizer, md, stochastic, bs)
             spgp.unlink_parameter(spgp.Z)
+            del spgp.Z
+            del spgp.X
             spgp.Z = self.Z
+            spgp.X = self.X
             self.link_parameter(spgp, i+2)
-
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        self.link_parameter(self.Z, 2)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "==========="
+            self.bgplvms.append(spgp)
 
         self.posterior = None
         self.logger.info("init done")
@@ -173,7 +160,9 @@ class MRD(BayesianGPLVM):
         self._log_marginal_likelihood = 0
         self.Z.gradient[:] = 0.
         self.X.gradient[:] = 0.
-        for b, i in itertools.izip(self.parameters[3:], self.inference_method):
+        for b, i in itertools.izip(self.bgplvms, self.inference_method):
+            self._log_marginal_likelihood += b._log_marginal_likelihood
+
             self.logger.info('working on im <{}>'.format(hex(id(i))))
             self.Z.gradient[:] += b.full_values['Zgrad']
             grad_dict = b.grad_dict
@@ -195,6 +184,7 @@ class MRD(BayesianGPLVM):
             # update for the KL divergence
             self.variational_prior.update_gradients_KL(self.X)
             self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
+            pass
 
     def log_likelihood(self):
         return self._log_marginal_likelihood
@@ -268,7 +258,7 @@ class MRD(BayesianGPLVM):
         Prediction for data set Yindex[default=0].
         This predicts the output mean and variance for the dataset given in Ylist[Yindex]
         """
-        b = self.parameters[Yindex+2]
+        b = self.bgplvms[Yindex]
         self.posterior = b.posterior
         self.kern = b.kern
         self.likelihood = b.likelihood
@@ -317,16 +307,20 @@ class MRD(BayesianGPLVM):
         from ..plotting.matplot_dep import dim_reduction_plots
         if "Yindex" not in predict_kwargs:
             predict_kwargs['Yindex'] = 0
+
+        Yindex = predict_kwargs['Yindex']
         if ax is None:
             fig = plt.figure(num=fignum)
             ax = fig.add_subplot(111)
         else:
             fig = ax.figure
+        self.kern = self.bgplvms[Yindex].kern
+        self.likelihood = self.bgplvms[Yindex].likelihood
         plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
                                         resolution, ax, marker, s,
                                         fignum, plot_inducing, legend,
                                         plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
-        ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name)
+        ax.set_title(self.bgplvms[Yindex].name)
         try:
             fig.tight_layout()
         except:
@@ -336,8 +330,10 @@ class MRD(BayesianGPLVM):
 
     def __getstate__(self):
         state = super(MRD, self).__getstate__()
-        del state['kern']
-        del state['likelihood']
+        if state.has_key('kern'):
+            del state['kern']
+        if state.has_key('likelihood'):
+            del state['likelihood']
         return state
 
     def __setstate__(self, state):

From 2eedd7f385686c84561e15a04e309f3025b2ad0a Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 15:12:24 +0000
Subject: [PATCH 16/22] [tests] pickle tests updated to new structure

---
 GPy/testing/pickle_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index b541d21e..d6d6f923 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -141,6 +141,7 @@ class Test(ListDictTestCase):
         pcopy.optimize('bfgs')
         par.optimize('bfgs')
         np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
+        par.randomize()
         with tempfile.TemporaryFile('w+b') as f:
             par.pickle(f)
             f.seek(0)

From d263432dde0d3dcd00b92494a5f875f38a508a48 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 3 Nov 2014 15:21:08 +0000
Subject: [PATCH 17/22] [tests] for issue ##146 and #147, fixing parameters
 inside __init__

---
 GPy/testing/model_tests.py         |  1 +
 GPy/testing/observable_tests.py    |  1 -
 GPy/testing/parameterized_tests.py | 25 +++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 55dfafec..fc78c55e 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -447,6 +447,7 @@ class GradientTests(np.testing.TestCase):
         m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
         self.assertTrue(m.checkgrad())
 
+
     def test_gp_kronecker_gaussian(self):
         N1, N2 = 30, 20
         X1 = np.random.randn(N1, 1)
diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py
index fb9112f8..d8aad4c7 100644
--- a/GPy/testing/observable_tests.py
+++ b/GPy/testing/observable_tests.py
@@ -130,7 +130,6 @@ class Test(unittest.TestCase):
         self.assertEqual(self._first, self._trigger, 'priority should be second')
         self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
 
-
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.testName']
     unittest.main()
\ No newline at end of file
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index 691aef79..7c4f4ce2 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -221,6 +221,31 @@ class ParameterizedTest(unittest.TestCase):
         np.testing.assert_equal(t.x.constraints[Logistic(0,1)], c[Logistic(0,1)])
         np.testing.assert_equal(t.x.constraints['fixed'], c['fixed'])
 
+    def test_parameter_modify_in_init(self):
+        class TestLikelihood(Parameterized):
+            def __init__(self, param1 = 2., param2 = 3.):
+                super(TestLikelihood, self).__init__("TestLike")
+                self.p1 = Param('param1', param1)
+                self.p2 = Param('param2', param2)
+
+                self.link_parameter(self.p1)
+                self.link_parameter(self.p2)
+
+                self.p1.fix()
+                self.p1.unfix()
+                self.p2.constrain_negative()
+                self.p1.fix()
+                self.p2.constrain_positive()
+                self.p2.fix()
+                self.p2.constrain_positive()
+
+        m = TestLikelihood()
+        print m
+        val = m.p1.values.copy()
+        self.assert_(m.p1.is_fixed)
+        self.assert_(m.constraints[GPy.constraints.Logexp()].tolist(), [1])
+        m.randomize()
+        self.assertEqual(m.p1, val)
 
     def test_printing(self):
         print self.test1

From 63c23214691ef5ce2728959a5f75021220a5120e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 15:28:48 +0000
Subject: [PATCH 18/22] Added homepage to main GPy project page

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2c33f0d2..6a880209 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,11 @@ GPy
 
 A Gaussian processes framework in Python.
 
+* [GPy homepage](http://sheffieldml.github.io/GPy/)
 * [User mailing list](https://lists.shef.ac.uk/sympa/subscribe/gpy-users)
 * [Online documentation](https://gpy.readthedocs.org/en/latest/)
 * [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
 
-
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
 Citation
@@ -20,6 +20,10 @@ Citation
       year = {2012--2014}
     }
 
+Pronounciation
+==============
+We like to pronounce it 'Gee-pie'.
+
 Getting started
 ===============
 Installing with pip

From 1840b7e6b881a47a137b94c53cbfcfa41920b9fc Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 16:04:15 +0000
Subject: [PATCH 19/22] extend inference X for all gp models

---
 GPy/core/gp.py                                | 16 +++++
 .../latent_function_inference/inferenceX.py   | 69 +++++++++++++------
 GPy/models/bayesian_gplvm.py                  | 16 -----
 GPy/models/sparse_gplvm.py                    |  3 +-
 GPy/testing/inference_tests.py                | 11 +++
 5 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 0e93cd99..e0dfde0c 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -354,3 +354,19 @@ class GP(Model):
             print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
             self.inference_method.on_optimization_end()
             raise
+        
+    def infer_newX(self, Y_new, optimize=True, ):
+        """
+        Infer the distribution of X for the new observed data *Y_new*.
+        
+        :param model: the GPy model used in inference
+        :type model: GPy.core.Model
+        :param Y_new: the new observed data for inference
+        :type Y_new: numpy.ndarray
+        :param optimize: whether to optimize the location of new X (True by default)
+        :type optimize: boolean
+        :return: a tuple containing the estimated posterior distribution of X and the model that optimize X 
+        :rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
+        """
+        from ..inference.latent_function_inference.inferenceX import infer_newX
+        return infer_newX(self, Y_new, optimize=optimize)
diff --git a/GPy/inference/latent_function_inference/inferenceX.py b/GPy/inference/latent_function_inference/inferenceX.py
index 7480c910..66fbcd4d 100644
--- a/GPy/inference/latent_function_inference/inferenceX.py
+++ b/GPy/inference/latent_function_inference/inferenceX.py
@@ -51,8 +51,18 @@ class InferenceX(Model):
                 self.kern.GPU(True)
         from copy import deepcopy
         self.posterior = deepcopy(model.posterior)
-        self.variational_prior = model.variational_prior.copy()
-        self.Z = model.Z.copy()
+        if hasattr(model, 'variational_prior'):
+            self.uncertain_input = True
+            self.variational_prior = model.variational_prior.copy()
+        else:
+            self.uncertain_input = False
+        if hasattr(model, 'inducing_inputs'):
+            self.sparse_gp = True
+            self.Z = model.Z.copy()
+        else:
+            self.sparse_gp = False
+            self.uncertain_input = False
+            self.Z = model.X.copy()
         self.Y = Y
         self.X = self._init_X(model, Y, init=init)
         self.compute_dL()
@@ -72,6 +82,8 @@ class InferenceX(Model):
                 dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:]
             elif init=='NCC':
                 dist = Y_new.dot(Y.T)
+            elif init=='rand':
+                dist = np.random.rand(Y_new.shape[0],Y.shape[0])
         idx = dist.argmin(axis=1)
         
         from ...models import SSGPLVM
@@ -81,7 +93,11 @@ class InferenceX(Model):
             if model.group_spike:
                 X.gamma.fix()
         else:
-            X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
+            if self.uncertain_input and self.sparse_gp:
+                X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
+            else:
+                from ...core import Param
+                X = Param('latent mean',param_to_array(model.X[idx]).copy())
         
         return X
         
@@ -99,29 +115,42 @@ class InferenceX(Model):
         else:
             self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
             self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
-            self.dL_dpsi0 = -beta/2.* np.ones(self.Y.shape[0])            #self.dL_dpsi0[:] = 0
+            self.dL_dpsi0 = -beta/2.* np.ones(self.Y.shape[0])
                 
     def parameters_changed(self):
-        psi0 = self.kern.psi0(self.Z, self.X)
-        psi1 = self.kern.psi1(self.Z, self.X)
-        psi2 = self.kern.psi2(self.Z, self.X)
+        if self.uncertain_input:
+            psi0 = self.kern.psi0(self.Z, self.X)
+            psi1 = self.kern.psi1(self.Z, self.X)
+            psi2 = self.kern.psi2(self.Z, self.X)
+        else:
+            psi0 = self.kern.Kdiag(self.X)
+            psi1 = self.kern.K(self.X, self.Z)
+            psi2 = np.dot(psi1.T,psi1)
 
         self._log_marginal_likelihood = (self.dL_dpsi2*psi2).sum()+(self.dL_dpsi1*psi1).sum()+(self.dL_dpsi0*psi0).sum()
-        X_grad = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2)
-        self.X.set_gradients(X_grad)
         
-        from ...core.parameterization.variational import SpikeAndSlabPrior
-        if isinstance(self.variational_prior, SpikeAndSlabPrior):
-            # Update Log-likelihood
-            KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
-            # update for the KL divergence
-            self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
+        if self.uncertain_input:
+            X_grad = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2)
+            self.X.set_gradients(X_grad)
         else:
-            # Update Log-likelihood
-            KL_div = self.variational_prior.KL_divergence(self.X)
-            # update for the KL divergence
-            self.variational_prior.update_gradients_KL(self.X)
-        self._log_marginal_likelihood += -KL_div
+            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(psi1,self.dL_dpsi2)
+            X_grad = self.kern.gradients_X_diag(self.dL_dpsi0, self.X)
+            X_grad += self.kern.gradients_X(dL_dpsi1, self.X, self.Z)
+            self.X.gradient = X_grad
+        
+        if self.uncertain_input:
+            from ...core.parameterization.variational import SpikeAndSlabPrior
+            if isinstance(self.variational_prior, SpikeAndSlabPrior):
+                # Update Log-likelihood
+                KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
+                # update for the KL divergence
+                self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
+            else:
+                # Update Log-likelihood
+                KL_div = self.variational_prior.KL_divergence(self.X)
+                # update for the KL divergence
+                self.variational_prior.update_gradients_KL(self.X)
+            self._log_marginal_likelihood += -KL_div
         
     def log_likelihood(self):
         return self._log_marginal_likelihood
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 629522d1..73fd6f8f 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -141,22 +141,6 @@ class BayesianGPLVM(SparseGP_MPI):
                 resolution, ax, marker, s,
                 fignum, plot_inducing, legend,
                 plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
-        
-    def infer_newX(self, Y_new, optimize=True, ):
-        """
-        Infer the distribution of X for the new observed data *Y_new*.
-        
-        :param model: the GPy model used in inference
-        :type model: GPy.core.Model
-        :param Y_new: the new observed data for inference
-        :type Y_new: numpy.ndarray
-        :param optimize: whether to optimize the location of new X (True by default)
-        :type optimize: boolean
-        :return: a tuple containing the estimated posterior distribution of X and the model that optimize X 
-        :rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
-        """
-        from ..inference.latent_function_inference.inferenceX import infer_newX
-        return infer_newX(self, Y_new, optimize=optimize)
 
     def do_test_latents(self, Y):
         """
diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py
index 251103f4..d1ad5884 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/models/sparse_gplvm.py
@@ -26,7 +26,8 @@ class SparseGPLVM(SparseGPRegression):
 
     def parameters_changed(self):
         super(SparseGPLVM, self).parameters_changed()
-        self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
+        self.X.gradient = self.kern.gradients_X_diag(self.grad_dict['dL_dKdiag'], self.X)
+        self.X.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
 
     def plot_latent(self, labels=None, which_indices=None,
                 resolution=50, ax=None, marker='o', s=40,
diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py
index c7efa821..fd81022a 100644
--- a/GPy/testing/inference_tests.py
+++ b/GPy/testing/inference_tests.py
@@ -65,6 +65,17 @@ class InferenceXTestCase(unittest.TestCase):
 
         self.assertTrue(np.allclose(m.X.mean, mi.X.mean))
         self.assertTrue(np.allclose(m.X.variance, mi.X.variance))
+
+    def test_inferenceX_GPLVM(self):
+        Ys = self.genData()
+        m = GPy.models.GPLVM(Ys[0],3,kernel=GPy.kern.RBF(3,ARD=True))
+        
+        x,mi = m.infer_newX(m.Y, optimize=False)
+        self.assertTrue(mi.checkgrad())
+        
+#         m.optimize(max_iters=10000)
+#         x,mi = m.infer_newX(m.Y)
+#         self.assertTrue(np.allclose(m.X, x))
         
 
 if __name__ == "__main__":

From 4160a7b4d8f9fb90f7966e688af2be8204cba61a Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 3 Nov 2014 16:48:08 +0000
Subject: [PATCH 20/22] Fixed prior error

---
 GPy/core/parameterization/parameter_core.py |  6 +-
 GPy/testing/prior_tests.py                  | 63 +++++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 115acf3a..fb6b3a8c 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -518,7 +518,7 @@ class Indexable(Nameable, Observable):
             self.constrain_negative(warning)
         elif prior.domain is _REAL:
             rav_i = self._raveled_index()
-            assert all(all(c.domain is _REAL for c in con) for con in self.constraints.properties_for(rav_i))
+            assert all(all(False if c is __fixed__ else c.domain is _REAL for c in con) for con in self.constraints.properties_for(rav_i)), 'Domain of prior and constraint have to match, please unconstrain if you REALLY wish to use this prior'
 
     def unset_priors(self, *priors):
         """
@@ -824,7 +824,7 @@ class OptimizationHandlable(Indexable):
     #===========================================================================
     # Randomizeable
     #===========================================================================
-    def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
+    def randomize(self, rand_gen=None, *args, **kwargs):
         """
         Randomize the model.
         Make this draw from the prior if one exists, else draw from given random generator
@@ -834,6 +834,8 @@ class OptimizationHandlable(Indexable):
         :param float scale: scale parameter for random number generator
         :param args, kwargs: will be passed through to random number generator
         """
+        if rand_gen is None:
+            rand_gen = np.random.normal
         # first take care of all parameters (from N(0,1))
         x = rand_gen(size=self._size_transformed(), *args, **kwargs)
         updates = self.update_model()
diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py
index db6cc685..6a61fbb5 100644
--- a/GPy/testing/prior_tests.py
+++ b/GPy/testing/prior_tests.py
@@ -45,6 +45,69 @@ class PriorTests(unittest.TestCase):
         # should raise an assertionerror.
         self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)
 
+    def test_set_prior(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GPRegression(X, y)
+
+        gaussian = GPy.priors.Gaussian(1, 1)
+        #m.rbf.set_prior(gaussian)
+        # setting a Gaussian prior on non-negative parameters
+        # should raise an assertionerror.
+        self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)
+
+    def test_set_gaussian_for_reals(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.SparseGPRegression(X, y)
+
+        gaussian = GPy.priors.Gaussian(1, 1)
+        m.Z.set_prior(gaussian)
+        # setting a Gaussian prior on non-negative parameters
+        # should raise an assertionerror.
+        #self.assertRaises(AssertionError, m.Z.set_prior, gaussian)
+
+
+
+    def test_fixed_domain_check(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GPRegression(X, y)
+
+        m.rbf.fix()
+        gaussian = GPy.priors.Gaussian(1, 1)
+        # setting a Gaussian prior on non-negative parameters
+        # should raise an assertionerror.
+        self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)
+
+    def test_fixed_domain_check1(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GPRegression(X, y)
+
+        m.kern.lengthscale.fix()
+        gaussian = GPy.priors.Gaussian(1, 1)
+        # setting a Gaussian prior on non-negative parameters
+        # should raise an assertionerror.
+        self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)
+
+
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."

From c885af7fbb8a67ff9891ae56e634a9f82c4bc233 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 17:26:31 +0000
Subject: [PATCH 21/22] update the set_XY function

---
 GPy/core/gp.py | 60 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index e0dfde0c..7121c539 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -93,21 +93,51 @@ class GP(Model):
         self.link_parameter(self.kern)
         self.link_parameter(self.likelihood)
 
+    def set_XY(self, X=None, Y=None):
+        """
+        Set the input / output of the model
+        
+        :param X: input observations
+        :param Y: output observations
+        """
+        self.update_model(False)
+        if Y is not None:
+            if self.normalizer is not None:
+                self.normalizer.scale_by(Y)
+                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
+                self.Y = Y
+            else:
+                self.Y = ObsAr(Y)
+                self.Y_normalized = self.Y
+        if X is not None:
+            if self.X in self.parameters:
+                # LVM models
+                from ..core.parameterization.variational import VariationalPosterior
+                if isinstance(self.X, VariationalPosterior):
+                    assert isinstance(X, type(self.X), "The given X must have the same type as the X in the model!")
+                    self.unlink_parameter(self.X)
+                    self.X = X
+                    self.link_parameters(self.X)
+                else:
+                    self.unlink_parameter(self.X)
+                    from ..core import Param
+                    self.X = Param('latent mean',X)
+                    self.link_parameters(self.X)
+            else:
+                self.X = ObsAr(X)
+        self.update_model(True)
+
     def set_X(self,X):
-        # TODO: it does not work with BGPLVM
-        if isinstance(X, ObsAr):
-            self.X = X
-        else:
-            self.X = ObsAr(X)
+        """
+        Set the input of the model
+        """
+        self.set_XY(X=X)
 
     def set_Y(self,Y):
-        if self.normalizer is not None:
-            self.normalizer.scale_by(Y)
-            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
-            self.Y = Y
-        else:
-            self.Y = ObsAr(Y)
-            self.Y_normalized = self.Y
+        """
+        Set the input of the model
+        """
+        self.set_XY(Y=Y)
 
     def parameters_changed(self):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
@@ -359,14 +389,12 @@ class GP(Model):
         """
         Infer the distribution of X for the new observed data *Y_new*.
         
-        :param model: the GPy model used in inference
-        :type model: GPy.core.Model
         :param Y_new: the new observed data for inference
         :type Y_new: numpy.ndarray
         :param optimize: whether to optimize the location of new X (True by default)
         :type optimize: boolean
-        :return: a tuple containing the estimated posterior distribution of X and the model that optimize X 
-        :rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
+        :return: a tuple containing the posterior estimation of X and the model that optimize X 
+        :rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model)
         """
         from ..inference.latent_function_inference.inferenceX import infer_newX
         return infer_newX(self, Y_new, optimize=optimize)

From 6f78a724fca7835b4c0dbf90f47be250ad1a2d2a Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 3 Nov 2014 17:37:33 +0000
Subject: [PATCH 22/22] a bug fix for set_XY

---
 GPy/core/gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 7121c539..7835f8b9 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -114,7 +114,7 @@ class GP(Model):
                 # LVM models
                 from ..core.parameterization.variational import VariationalPosterior
                 if isinstance(self.X, VariationalPosterior):
-                    assert isinstance(X, type(self.X), "The given X must have the same type as the X in the model!")
+                    assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
                     self.unlink_parameter(self.X)
                     self.X = X
                     self.link_parameters(self.X)