From ef84339e46b0910bea2631dfc575e675c67dcfd5 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Wed, 5 Nov 2014 16:33:02 +0000
Subject: [PATCH 1/5] add test case for mpi

---
 GPy/models/bayesian_gplvm.py |  5 ++--
 GPy/testing/mpi_tests.py     | 57 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 GPy/testing/mpi_tests.py
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 0838b684..d363fb7a 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -84,6 +84,8 @@ class BayesianGPLVM(SparseGP_MPI):
 
     def parameters_changed(self):
         super(BayesianGPLVM,self).parameters_changed()
+        if isinstance(self.inference_method, VarDTC_minibatch):
+            return        
 
         kl_fctr = 1.
         self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
@@ -98,9 +100,6 @@ class BayesianGPLVM(SparseGP_MPI):
         self.variational_prior.update_gradients_KL(self.X)
 
 
-        if isinstance(self.inference_method, VarDTC_minibatch):
-            return
-
         #super(BayesianGPLVM, self).parameters_changed()
         #self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
 
diff --git a/GPy/testing/mpi_tests.py b/GPy/testing/mpi_tests.py
new file mode 100644
index 00000000..4848a6ec
--- /dev/null
+++ b/GPy/testing/mpi_tests.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+try:
+    from mpi4py import MPI
+    import subprocess
+
+    class MPITests(unittest.TestCase):
+            
+        def test_BayesianGPLVM_MPI(self):
+            code = """
+import numpy as np
+import GPy
+from mpi4py import MPI
+np.random.seed(123456)
+comm = MPI.COMM_WORLD
+N = 100
+x = np.linspace(-6., 6., N)
+y = np.sin(x) + np.random.randn(N) * 0.05
+data = np.vstack([x,y])
+infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm)
+m = GPy.models.BayesianGPLVM(data.T,1,mpi_comm=comm)
+m.optimize(max_iters=10)
+if comm.rank==0:
+    print float(m.objective_function())
+    m.inference_method.mpi_comm=None
+    m.mpi_comm=None
+    m._trigger_params_changed()
+    print float(m.objective_function())
+            """
+            with open('mpi_test__.py','w') as f:
+                f.write(code)
+                f.close()
+            p = subprocess.Popen('mpirun -n 4 python mpi_test__.py',stdout=subprocess.PIPE,shell=True)
+            (stdout, stderr) = p.communicate()
+            L1 =  float(stdout.splitlines()[-2])
+            L2 =  float(stdout.splitlines()[-1])
+            self.assertAlmostEqual(L1, L2)
+            import os
+            os.remove('mpi_test__.py')
+
+except:
+    pass
+
+
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    try:
+        import mpi4py
+        unittest.main()
+    except:
+        pass
\ No newline at end of file

From 1dbe3e34b036e8f45ed198461aacb8287537c58c Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Wed, 5 Nov 2014 16:42:28 +0000
Subject: [PATCH 2/5] update docstring for checkgrad

---
 GPy/core/model.py                           |  4 ++++
 GPy/core/parameterization/parameter_core.py | 14 ++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 28223429..355087ca 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -279,6 +279,10 @@ class Model(Parameterized):
         Note:-
            The gradient is considered correct if the ratio of the analytical
            and numerical gradients is within <tolerance> of unity.
+           
+           The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients. 
+           If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually 
+           not accurate enough for the tests (shown with blue). 
         """
         x = self.optimizer_array.copy()
 
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index b2f88687..f8f7f7cc 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -294,7 +294,7 @@ class Gradcheckable(Pickleable, Parentable):
     def __init__(self, *a, **kw):
         super(Gradcheckable, self).__init__(*a, **kw)
 
-    def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
+    def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3, df_tolerance=1e-12):
         """
         Check the gradient of this parameter with respect to the highest parent's
         objective function.
@@ -305,11 +305,17 @@ class Gradcheckable(Pickleable, Parentable):
 
         :param bool verbose: whether each parameter shall be checked individually.
         :param float step: the stepsize for the numerical three point gradient estimate.
-        :param flaot tolerance: the tolerance for the gradient ratio or difference.
+        :param float tolerance: the tolerance for the gradient ratio or difference.
+        :param float df_tolerance: the tolerance for df_tolerance
+        
+        Note:-
+           The *dF_ratio* indicates the limit of accuracy of numerical gradients. 
+           If it is too small, e.g., smaller than 1e-12, the numerical gradients 
+           are usually not accurate enough for the tests (shown with blue). 
         """
         if self.has_parent():
-            return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
-        return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
+            return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance, df_tolerance=df_tolerance)
+        return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance, df_tolerance=df_tolerance)
 
     def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
         """

From 65a041cadb443261922d0f93322e47fa22bb92c6 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Wed, 5 Nov 2014 17:23:19 +0000
Subject: [PATCH 3/5] Redundant models deleted

---
 GPy/models/gp_multioutput_regression.py       | 171 ------------------
 .../sparse_gp_multioutput_regression.py       |  80 --------
 2 files changed, 251 deletions(-)
 delete mode 100644 GPy/models/gp_multioutput_regression.py
 delete mode 100644 GPy/models/sparse_gp_multioutput_regression.py

diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/models/gp_multioutput_regression.py
deleted file mode 100644
index 2286ff95..00000000
--- a/GPy/models/gp_multioutput_regression.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright (c) 2013, Ricardo Andrade
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import numpy as np
-from ..core import GP
-from .. import likelihoods
-from .. import kern
-
-class GPMultioutputRegression(GP):
-    """
-    Multiple output Gaussian process with Gaussian noise
-
-    This is a wrapper around the models.GP class, with a set of sensible defaults
-
-    :param X_list: input observations
-    :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
-    :param Y_list: observed values
-    :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
-    :param kernel_list: GPy kernels, defaults to rbf
-    :type kernel_list: list of GPy kernels
-    :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
-    :type noise_variance_list: list of floats
-    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_X: False|True
-    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_Y: False|True
-    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type rank: integer
-    """
-
-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,rank=1):
-
-        self.output_dim = len(Y_list)
-        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
-
-        #Inputs indexing
-        i = 0
-        index = []
-        for x,y in zip(X_list,Y_list):
-            assert x.shape[0] == y.shape[0]
-            index.append(np.repeat(i,x.size)[:,None])
-            i += 1
-        index = np.vstack(index)
-        X = np.hstack([np.vstack(X_list),index])
-        original_dim = X.shape[1] - 1
-
-        #Mixed noise likelihood definition
-        likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
-
-        #Coregionalization kernel definition
-        if kernel_list is None:
-            kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
-
-        self.multioutput = True
-        GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
-        self.ensure_default_constraints()
-
-    def _add_output_index(self,X,output):
-        """
-        In a multioutput model, appends an index column to X to specify the output it is related to.
-
-        :param X: Input data
-        :type X: np.ndarray, N x self.input_dim
-        :param output: output X is related to
-        :type output: integer in {0,..., output_dim-1}
-
-        .. Note:: For multiple non-independent outputs models only.
-        """
-
-        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
-
-        index = np.ones((X.shape[0],1))*output
-        return np.hstack((X,index))
-
-    def plot_single_output(self, X, output):
-        """
-        A simple wrapper around self.plot, with appropriate setting of the fixed_inputs argument
-        """
-        raise NotImplementedError
-
-    def _raw_predict_single_output(self, _Xnew, output, which_parts='all', full_cov=False,stop=False):
-        """
-        For a specific output, calls _raw_predict() at the new point(s) _Xnew.
-        This functions calls _add_output_index(), so _Xnew should not have an index column specifying the output.
-        ---------
-
-        :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.input_dim
-        :param output: output to predict
-        :type output: integer in {0,..., output_dim-1}
-        :param which_parts:  specifies which outputs kernel(s) to use in prediction
-        :type which_parts: ('all', list of bools)
-        :param full_cov: whether to return the full covariance matrix, or just the diagonal
-
-        .. Note:: For multiple non-independent outputs models only.
-        """
-        _Xnew = self._add_output_index(_Xnew, output)
-        return self._raw_predict(_Xnew, which_parts=which_parts,full_cov=full_cov, stop=stop)
-
-    def predict_single_output(self, Xnew,output=0, which_parts='all', full_cov=False, likelihood_args=dict()):
-        """
-        For a specific output, calls predict() at the new point(s) Xnew.
-        This functions calls _add_output_index(), so Xnew should not have an index column specifying the output.
-
-        :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.input_dim
-        :param which_parts:  specifies which outputs kernel(s) to use in prediction
-        :type which_parts: ('all', list of bools)
-        :param full_cov: whether to return the full covariance matrix, or just the diagonal
-        :type full_cov: bool
-        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
-        :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
-        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
-
-        .. Note:: For multiple non-independent outputs models only.
-        """
-        Xnew = self._add_output_index(Xnew, output)
-        return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
-
-    def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
-        """
-        For a specific output, in a multioutput model, this function works just as plot_f on single output models.
-
-        :param output: which output to plot (for multiple output models only)
-        :type output: integer (first output is 0)
-        :param samples: the number of a posteriori samples to plot
-        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_data: which if the training data to plot (default all)
-        :type which_data: 'all' or a slice object to slice self.X, self.Y
-        :param which_parts: which of the kernel functions to plot (additively)
-        :type which_parts: 'all', or list of bools
-        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-        :type resolution: int
-        :param full_cov:
-        :type full_cov: bool
-                :param fignum: figure to plot on.
-        :type fignum: figure number
-        :param ax: axes to plot on.
-        :type ax: axes handle
-        """
-        assert output is not None, "An output must be specified."
-        assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
-
-        if which_data == 'all':
-            which_data = slice(None)
-
-        if ax is None:
-            fig = pb.figure(num=fignum)
-            ax = fig.add_subplot(111)
-
-        if self.X.shape[1] == 2:
-            Xu = self.X[self.X[:,-1]==output ,0:1]
-            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
-            Xnew_indexed = self._add_output_index(Xnew,output)
-
-            m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
-
-            if samples:
-                Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
-                for yi in Ysim.T:
-                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
-
-            gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
-            ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
-            ax.set_xlim(xmin, xmax)
-            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
-            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-            ax.set_ylim(ymin, ymax)
-
-
diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/models/sparse_gp_multioutput_regression.py
deleted file mode 100644
index d809610b..00000000
--- a/GPy/models/sparse_gp_multioutput_regression.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) 2013, Ricardo Andrade
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-from ..core import SparseGP
-from .. import likelihoods
-from .. import kern
-from ..util import multioutput
-
-class SparseGPMultioutputRegression(SparseGP):
-    """
-    Sparse multiple output Gaussian process with Gaussian noise
-
-    This is a wrapper around the models.SparseGP class, with a set of sensible defaults
-
-    :param X_list: input observations
-    :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
-    :param Y_list: observed values
-    :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
-    :param kernel_list: GPy kernels, defaults to rbf
-    :type kernel_list: list of GPy kernels
-    :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
-    :type noise_variance_list: list of floats
-    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_X: False|True
-    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_Y: False|True
-    :param Z_list: inducing inputs (optional)
-    :type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
-    :param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
-    :type num_inducing: integer
-    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type rank: integer
-    """
-    #NOTE not tested with uncertain inputs
-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,rank=1):
-
-        self.output_dim = len(Y_list)
-        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
-
-        #Inducing inputs list
-        if len(Z_list):
-            assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
-        else:
-            if isinstance(num_inducing,np.int):
-                num_inducing = [num_inducing] * self.output_dim
-            num_inducing = np.asarray(num_inducing)
-            assert num_inducing.size == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
-            for ni,X in zip(num_inducing,X_list):
-                i = np.random.permutation(X.shape[0])[:ni]
-                Z_list.append(X[i].copy())
-
-        #Inputs and inducing inputs indexing
-        i = 0
-        index = []
-        index_z = []
-        for x,y,z in zip(X_list,Y_list,Z_list):
-            assert x.shape[0] == y.shape[0]
-            index.append(np.repeat(i,x.size)[:,None])
-            index_z.append(np.repeat(i,z.size)[:,None])
-            i += 1
-        index = np.vstack(index)
-        index_z = np.vstack(index_z)
-        X = np.hstack([np.vstack(X_list),index])
-        Z = np.hstack([np.vstack(Z_list),index_z])
-        original_dim = X.shape[1] - 1
-
-        #Mixed noise likelihood definition
-        likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
-
-        #Coregionalization kernel definition
-        if kernel_list is None:
-            kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
-
-        self.multioutput = True
-        SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)
-        self.constrain_fixed('.*iip_\d+_1')
-        self.ensure_default_constraints()

From 4e541d854e40709e4651ca52c3d1d94c252c7539 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 5 Nov 2014 17:39:10 +0000
Subject: [PATCH 4/5] [examples] dim red bgplvm with missing data

---
 GPy/examples/dimensionality_reduction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 0bac25a6..6b78f73f 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -368,7 +368,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
                       max_iters=2e4,
                       ):
     from GPy import kern
-    from GPy.models import BayesianGPLVM
+    from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
 
     D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
     _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
@@ -379,7 +379,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
     Ymissing = Y.copy()
     Ymissing[inan] = _np.nan
 
-    m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
+    m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                       kernel=k, missing_data=True)
 
     m.Yreal = Y

From b45421c335ae79832078275e876198b9ba12bba7 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 5 Nov 2014 17:45:52 +0000
Subject: [PATCH 5/5] [dim red] cmu_mocap normalize

---
 GPy/examples/dimensionality_reduction.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 6b78f73f..1da855bc 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -624,7 +624,10 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
     if in_place:
         # Make figure move in place.
         data['Y'][:, 0:3] = 0.0
-    m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
+    Y = data['Y']
+    Y_mean = Y.mean(0)
+    Y_std = Y.std(0)
+    m = GPy.models.GPLVM((Y-Y_mean)/Y_std, 2)
 
     if optimize: m.optimize(messages=verbose, max_f_eval=10000)
     if plot: