From 3e9c266d0d9f591c56350a1eacf02c489fd81f62 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:35:46 +0100
Subject: [PATCH 01/12] structural changes for printing

---
 GPy/inference/SCG.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py
index 83ea054f..d0a30f0d 100644
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@@ -39,11 +39,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
     function_eval number of fn evaluations
     status: string describing convergence status
     """
-    
-    if display:
-        print "  SCG"
-        print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters)))
-
     if xtol is None:
         xtol = 1e-6
     if ftol is None:
@@ -69,6 +64,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
 
     iteration = 0
 
+    if display:
+        print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters)))
+
     # Main optimization loop.
     while iteration < maxiters:
 
@@ -129,10 +127,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
 
             else:
                 # Update variables for new position
-                fold = fnew
-                gradold = gradnew
                 gradnew = gradf(x, *optargs)
                 current_grad = np.dot(gradnew, gradnew)
+                gradold = gradnew
+                fold = fnew
                 # If the gradient is zero then we are done.
                 if current_grad <= gtol:
                     status = 'converged'

From 26dd629181e559512d2aa6a62e093d9a8640ffe2 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:36:29 +0100
Subject: [PATCH 02/12] removed logexp_clipped for now

---
 GPy/examples/dimensionality_reduction.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 6230ef3a..1f30a67f 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -131,7 +131,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k)
     m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
     m.data_labels = data['Y'][:N].argmax(axis=1)
 
-    m.constrain('variance|leng', logexp_clipped())
+    # m.constrain('variance|leng', logexp_clipped())
     m['lengt'] = m.X.var(0).max() / m.X.var(0)
     m['noise'] = Yn.var() / 100.
 
@@ -246,7 +246,7 @@ def bgplvm_simulation_matlab_compare():
 def bgplvm_simulation(optimize='scg',
                       plot=True,
                       max_f_eval=2e4):
-    from GPy.core.transformations import logexp_clipped
+#     from GPy.core.transformations import logexp_clipped
     D1, D2, D3, N, M, Q = 15, 8, 8, 100, 3, 5
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot)
 
@@ -259,8 +259,8 @@ def bgplvm_simulation(optimize='scg',
 
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
-    m.constrain('variance|noise', logexp_clipped())
-#     m.ensure_default_constraints()
+    # m.constrain('variance|noise', logexp_clipped())
+    m.ensure_default_constraints()
     m['noise'] = Y.var() / 100.
     m['linear_variance'] = .01
 
@@ -273,8 +273,8 @@ def bgplvm_simulation(optimize='scg',
         pylab.figure(); pylab.axis(); m.kern.plot_ARD()
     return m
 
-def mrd_simulation(optimize=True, plot_sim=False):
-    D1, D2, D3, N, M, Q = 150, 250, 30, 300, 3, 7
+def mrd_simulation(optimize=True, plot_sim=False, **kw):
+    D1, D2, D3, N, M, Q = 150, 250, 30, 200, 3, 7
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
@@ -284,12 +284,12 @@ def mrd_simulation(optimize=True, plot_sim=False):
     reload(mrd); reload(kern)
 
     k = kern.linear(Q, [0.01] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
-    m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute')
+    m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', **kw)
 
     for i, Y in enumerate(Ylist):
         m['{}_noise'.format(i + 1)] = Y.var() / 100.
 
-    m.constrain('variance|noise', logexp_clipped())
+    # m.constrain('variance|noise', logexp_clipped())
     m.ensure_default_constraints()
 
     # DEBUG

From cbdb89ffe837986efb639469873b64419d68ce81 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:37:15 +0100
Subject: [PATCH 03/12] catching precision infinity exceptions

---
 GPy/likelihoods/Gaussian.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index e08fee90..98227b16 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -53,7 +53,7 @@ class Gaussian(likelihood):
     def _set_params(self, x):
         x = float(x)
         if self._variance != x:
-            self.precision = 1. / x
+            self.precision = 1. / max(x, 1e-6)
             self.covariance_matrix = np.eye(self.N) * x
             self.V = (self.precision) * self.Y
             self._variance = x

From b7de50b5b3c3b50ec424d57c0f243bc1ced4f47f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:38:03 +0100
Subject: [PATCH 04/12] plotting labels are now in order as passed in and
 marker can be passed with as many markers as there are labels

---
 GPy/models/GPLVM.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 7543e111..ff2be732 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -60,7 +60,7 @@ class GPLVM(GP):
         mu, var, upper, lower = self.predict(Xnew)
         pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
 
-    def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None):
+    def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
         """
         :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
         :param resolution: the resolution of the grid on which to evaluate the predictive variance
@@ -94,13 +94,23 @@ class GPLVM(GP):
         ax.imshow(var.reshape(resolution, resolution).T,
                   extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
 
-        for i,ul in enumerate(np.unique(labels)):
+        # make sure labels are in order of input:
+        ulabels = []
+        for lab in labels:
+            if not lab in ulabels:
+                ulabels.append(lab)
+
+        for i, ul in enumerate(ulabels):
             if type(ul) is np.string_:
                 this_label = ul
             elif type(ul) is np.int64:
                 this_label = 'class %i'%ul
             else:
                 this_label = 'class %i'%i
+            if len(marker) == len(ulabels):
+                m = marker[i]
+            else:
+                m = marker
 
             index = np.nonzero(labels==ul)[0]
             if self.Q==1:
@@ -109,7 +119,7 @@ class GPLVM(GP):
             else:
                 x = self.X[index,input_1]
                 y = self.X[index,input_2]
-            ax.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
+            ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), mew=1.3, label=this_label)
 
         ax.set_xlabel('latent dimension %i'%input_1)
         ax.set_ylabel('latent dimension %i'%input_2)

From 1f5a7d0053ca745939598c0b1c5fc1c78ef29390 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:39:49 +0100
Subject: [PATCH 05/12] psi_stat tests renamed

---
 GPy/testing/__init__.py                       |  2 +-
 ...tests.py => psi_stat_expactation_tests.py} | 52 ++++++++++++-------
 ...at_tests.py => psi_stat_gradient_tests.py} |  0
 3 files changed, 33 insertions(+), 21 deletions(-)
 rename GPy/testing/{kern_psi_stat_tests.py => psi_stat_expactation_tests.py} (67%)
 rename GPy/testing/{psi_stat_tests.py => psi_stat_gradient_tests.py} (100%)

diff --git a/GPy/testing/__init__.py b/GPy/testing/__init__.py
index b2e4d822..aad6a46d 100644
--- a/GPy/testing/__init__.py
+++ b/GPy/testing/__init__.py
@@ -7,6 +7,6 @@ import unittest
 import sys
 
 def deepTest(reason):
-    if 'deep' in sys.argv:
+    if 'deep' in reason:
         return lambda x:x
     return unittest.skip("Not deep scanning, enable deepscan by adding 'deep' argument")
diff --git a/GPy/testing/kern_psi_stat_tests.py b/GPy/testing/psi_stat_expactation_tests.py
similarity index 67%
rename from GPy/testing/kern_psi_stat_tests.py
rename to GPy/testing/psi_stat_expactation_tests.py
index dc4f040f..95f83fb5 100644
--- a/GPy/testing/kern_psi_stat_tests.py
+++ b/GPy/testing/psi_stat_expactation_tests.py
@@ -6,10 +6,9 @@ Created on 26 Apr 2013
 import unittest
 import GPy
 import numpy as np
-import sys
-from .. import testing
+from GPy import testing
 
-__test__ = True
+__test__ = False
 np.random.seed(0)
 
 def ard(p):
@@ -20,7 +19,7 @@ def ard(p):
         pass
     return ""
 
-@testing.deepTest
+@testing.deepTest(__test__)
 class Test(unittest.TestCase):
     D = 9
     M = 4
@@ -29,13 +28,22 @@ class Test(unittest.TestCase):
 
     def setUp(self):
         self.kerns = (
-                      GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
-                      GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
-                      GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
-                      GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
-                      GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
-                      GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
-                      GPy.kern.bias(self.D), GPy.kern.white(self.D),
+#                       (GPy.kern.rbf(self.D, ARD=True) +
+#                        GPy.kern.linear(self.D, ARD=True) +
+#                        GPy.kern.bias(self.D) +
+#                        GPy.kern.white(self.D)),
+                      (GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) +
+                       GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) +
+                       GPy.kern.linear(self.D, np.random.rand(self.D), ARD=True) +
+                       GPy.kern.bias(self.D) +
+                       GPy.kern.white(self.D)),
+#                       GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
+#                       GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
+#                       GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
+#                       GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
+#                       GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
+#                       GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
+#                       GPy.kern.bias(self.D), GPy.kern.white(self.D),
                       )
         self.q_x_mean = np.random.randn(self.D)
         self.q_x_variance = np.exp(np.random.randn(self.D))
@@ -64,8 +72,9 @@ class Test(unittest.TestCase):
             K_ /= self.Nsamples / Nsamples
             msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts])
             try:
-#                 pylab.figure(msg)
-#                 pylab.plot(diffs)
+                import pylab
+                pylab.figure(msg)
+                pylab.plot(diffs)
                 self.assertTrue(np.allclose(psi1.squeeze(), K_,
                                             rtol=1e-1, atol=.1),
                                 msg=msg + ": not matching")
@@ -90,8 +99,9 @@ class Test(unittest.TestCase):
             K_ /= self.Nsamples / Nsamples
             msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
             try:
-#                 pylab.figure(msg)
-#                 pylab.plot(diffs)
+                import pylab
+                pylab.figure(msg)
+                pylab.plot(diffs)
                 self.assertTrue(np.allclose(psi2.squeeze(), K_,
                                             rtol=1e-1, atol=.1),
                                 msg=msg + ": not matching")
@@ -104,9 +114,11 @@ class Test(unittest.TestCase):
                 pass
 
 if __name__ == "__main__":
-    import sys;sys.argv = ['',
-                            'Test.test_psi0',
-                            'Test.test_psi1',
-                            'Test.test_psi2',
-                           ]
+    import sys
+    __test__ = 'deep' in sys.argv
+    sys.argv = ['',
+         'Test.test_psi0',
+         'Test.test_psi1',
+         'Test.test_psi2',
+         ]
     unittest.main()
diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_gradient_tests.py
similarity index 100%
rename from GPy/testing/psi_stat_tests.py
rename to GPy/testing/psi_stat_gradient_tests.py

From 03933f9604787a58b52b86530c0eed211285817a Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:41:44 +0100
Subject: [PATCH 06/12] nosetests do not test expextation of psi_statistics

---
 GPy/testing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/testing/__init__.py b/GPy/testing/__init__.py
index aad6a46d..f5a4c54f 100644
--- a/GPy/testing/__init__.py
+++ b/GPy/testing/__init__.py
@@ -7,6 +7,6 @@ import unittest
 import sys
 
 def deepTest(reason):
-    if 'deep' in reason:
+    if reason:
         return lambda x:x
     return unittest.skip("Not deep scanning, enable deepscan by adding 'deep' argument")

From db582390634930d0d71198b36f19480ce1814bf4 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 12:57:19 +0100
Subject: [PATCH 07/12] pickling for Bayesian_GPLVM simplified

---
 GPy/models/Bayesian_GPLVM.py | 83 ++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 05e9e255..e1e99af9 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -37,6 +37,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
         if X == None:
             X = self.initialise_latent(init, Q, likelihood.Y)
+        self.init = init
 
         if X_variance is None:
             X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
@@ -200,21 +201,21 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         assert not self.likelihood.is_heteroscedastic
         N_test = Y.shape[0]
         Q = self.Z.shape[1]
-        means = np.zeros((N_test,Q))
-        covars = np.zeros((N_test,Q))
+        means = np.zeros((N_test, Q))
+        covars = np.zeros((N_test, Q))
 
-        dpsi0 = - 0.5 * self.D * self.likelihood.precision
-        dpsi2 = self.dL_dpsi2[0][None,:,:] # TODO: this may change if we ignore het. likelihoods
-        V = self.likelihood.precision*Y
-        dpsi1 = np.dot(self.Cpsi1V,V.T)
+        dpsi0 = -0.5 * self.D * self.likelihood.precision
+        dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
+        V = self.likelihood.precision * Y
+        dpsi1 = np.dot(self.Cpsi1V, V.T)
 
-        start = np.zeros(self.Q*2)
+        start = np.zeros(self.Q * 2)
 
-        for n,dpsi1_n in enumerate(dpsi1.T[:,:,None]):
-            args = (self.kern,self.Z,dpsi0,dpsi1_n,dpsi2)
-            xopt,fopt,neval,status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display = False)
+        for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
+            args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
+            xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
 
-            mu,log_S = xopt.reshape(2,1,-1)
+            mu, log_S = xopt.reshape(2, 1, -1)
             means[n] = mu[0].copy()
             covars[n] = np.exp(log_S[0]).copy()
 
@@ -262,6 +263,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
         return fig
 
+    def __getstate__(self):
+        return (self.likelihood, self.Q, self.X, self.X_variance,
+                self.init, self.M, self.Z, self.kern,
+                self.oldpsave, self._debug)
+
+    def __setstate__(self, state):
+        self.__init__(*state)
+
     def _debug_filter_params(self, x):
         start, end = 0, self.X.size,
         X = x[start:end].reshape(self.N, self.Q)
@@ -523,59 +532,59 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
 
 
-def latent_cost_and_grad(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
     objective function for fitting the latent variables for test points
     (negative log-likelihood: should be minimised!)
     """
-    mu,log_S = mu_S.reshape(2,1,-1)
+    mu, log_S = mu_S.reshape(2, 1, -1)
     S = np.exp(log_S)
 
-    psi0 = kern.psi0(Z,mu,S)
-    psi1 = kern.psi1(Z,mu,S)
-    psi2 = kern.psi2(Z,mu,S)
+    psi0 = kern.psi0(Z, mu, S)
+    psi1 = kern.psi1(Z, mu, S)
+    psi2 = kern.psi2(Z, mu, S)
 
-    lik = dL_dpsi0*psi0 + np.dot(dL_dpsi1.flatten(),psi1.flatten()) + np.dot(dL_dpsi2.flatten(),psi2.flatten()) - 0.5*np.sum(np.square(mu) + S) + 0.5*np.sum(log_S)
+    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
 
-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0,Z,mu,S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1,Z,mu,S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2,Z,mu,S)
+    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
+    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
+    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
 
     dmu = mu0 + mu1 + mu2 - mu
-    #dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S*(S0 + S1 + S2 -0.5) + .5
-    return -lik,-np.hstack((dmu.flatten(),dlnS.flatten()))
+    # dS = S0 + S1 + S2 -0.5 + .5/S
+    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
+    return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
 
-def latent_cost(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
     objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
     This is the same as latent_cost_and_grad but only for the objective
     """
-    mu,log_S = mu_S.reshape(2,1,-1)
+    mu, log_S = mu_S.reshape(2, 1, -1)
     S = np.exp(log_S)
 
-    psi0 = kern.psi0(Z,mu,S)
-    psi1 = kern.psi1(Z,mu,S)
-    psi2 = kern.psi2(Z,mu,S)
+    psi0 = kern.psi0(Z, mu, S)
+    psi1 = kern.psi1(Z, mu, S)
+    psi2 = kern.psi2(Z, mu, S)
 
-    lik = dL_dpsi0*psi0 + np.dot(dL_dpsi1.flatten(),psi1.flatten()) + np.dot(dL_dpsi2.flatten(),psi2.flatten()) - 0.5*np.sum(np.square(mu) + S) + 0.5*np.sum(log_S)
+    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
     return -float(lik)
 
-def latent_grad(mu_S, kern,Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
     This is the same as latent_cost_and_grad but only for the grad
     """
-    mu,log_S = mu_S.reshape(2,1,-1)
+    mu, log_S = mu_S.reshape(2, 1, -1)
     S = np.exp(log_S)
 
-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0,Z,mu,S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1,Z,mu,S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2,Z,mu,S)
+    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
+    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
+    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
 
     dmu = mu0 + mu1 + mu2 - mu
-    #dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S*(S0 + S1 + S2 -0.5) + .5
+    # dS = S0 + S1 + S2 -0.5 + .5/S
+    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
 
-    return -np.hstack((dmu.flatten(),dlnS.flatten()))
+    return -np.hstack((dmu.flatten(), dlnS.flatten()))
 
 

From 043e208bdb85713d735e34f7e3e68e6ddc4236a7 Mon Sep 17 00:00:00 2001
From: Andreas <andrasd.damianou@sheffield.ac.uk>
Date: Wed, 22 May 2013 14:20:50 +0100
Subject: [PATCH 08/12] Completed the automatic mocap dataset fetch from url

---
 GPy/util/mocap.py       | 70 +++++++++++++++++++++++++++++++++++++++++
 GPy/util/mocap_fetch.py | 13 --------
 2 files changed, 70 insertions(+), 13 deletions(-)
 delete mode 100644 GPy/util/mocap_fetch.py

diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 174728bd..860c8782 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -1,6 +1,8 @@
 import os
 import numpy as np
 import math
+from GPy.util import datasets as dat
+import urllib2
 
 class vertex:
     def __init__(self, name, id, parents=[], children=[], meta = {}):
@@ -687,3 +689,71 @@ def read_connections(file_name, point_names):
     
   
 skel = acclaim_skeleton()
+
+
+
+    
+def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_dir = '.', motion_store_dir = '.', subj_motions = None, store_motions = True, return_motions = True, messages = True):
+    ''' 
+    Download and store the skel. and motions indicated in a tuple (A,B) where A is a list of skeletons and B
+    the corresponding 2-D list of motions, ie B_ij is the j-th motion to download for skeleton A_i
+    The method can optionally store the fetched data and / or return them as arrays.
+    If the data are already stored, they are not fetched but just retrieved.
+
+    e.g.
+    # Download the data, do not return anything
+    GPy.util.mocap.fetch_data(subj_motions = (['35'],[['01','02','03']]), return_motions = False)
+    # Fetch and return the data in a list. Do not store them anywhere
+    GPy.util.mocap.fetch_data(subj_motions = (['35'],[['01','02','03']]), return_motions = True, store_motions = False)
+
+    In both cases above, if the data do exist in the given skel_store_dir and motion_store_dir, they are just loaded from there.
+    '''
+    
+    subjects = subj_motions[0]
+    motions = subj_motions[1]
+    all_skels = []
+    
+    assert len(subjects) == len(motions)
+    
+    if return_motions:
+        all_motions = [list() for _ in range(len(subjects))]
+    else:
+        all_motions = []
+            
+    for i in range(len(subjects)):
+        cur_skel_suffix = '/' + subjects[i] + '/' 
+        cur_skel_dir = skel_store_dir + cur_skel_suffix
+        cur_skel_file = cur_skel_dir + subjects[i] + '.asf'
+        cur_skel_url = base_url + cur_skel_suffix + subjects[i] + '.asf'
+        
+        if os.path.isfile(cur_skel_file):
+            if return_motions:
+                with open(cur_skel_file, 'r') as f:
+                    cur_skel_data = f.read()
+        else:
+            if store_motions:
+                if not os.path.isdir(cur_skel_dir):
+                    os.mkdir(cur_skel_dir)
+                if not os.path.isdir(motion_store_dir + cur_skel_suffix):
+                    os.mkdir(motion_store_dir + cur_skel_suffix)
+            cur_skel_data = dat.fetch_dataset(cur_skel_url, cur_skel_file, store_motions, messages)
+        
+        if return_motions:
+            all_skels.append(cur_skel_data)
+        
+        for j in range(len(motions[i])):
+            cur_motion_url = base_url + cur_skel_suffix + subjects[i] + '_' + motions[i][j] + '.amc'
+            cur_motion_file = motion_store_dir + cur_skel_suffix  + subjects[i] + '_' + motions[i][j] + '.amc'
+            if os.path.isfile(cur_motion_file):
+                with open(cur_motion_file, 'r') as f:
+                    if return_motions:
+                        cur_motion_data = f.read()
+            else:
+                cur_motion_data = dat.fetch_dataset(cur_motion_url, cur_motion_file, store_motions, messages)
+
+            if return_motions:
+                all_motions[i].append(cur_motion_data)
+
+    return all_skels, all_motions
+
+
diff --git a/GPy/util/mocap_fetch.py b/GPy/util/mocap_fetch.py
deleted file mode 100644
index 323cc5d8..00000000
--- a/GPy/util/mocap_fetch.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import GPy
-import urllib2
-
-# TODO...
-class mocap_fetch(base_url = 'http://mocap.cs.cmu.edu:8080/subjects/', skel_store_dir = './', motion_store_dir = './'):
-    def __init__(self):
-        self.base_url = base_url
-        self.store_dir = store_dir
-        self.motion_dict = []
-    
-    def fetch_motions(self, motion_dict = None):
-        response = urllib2.urlopen(...)
-        html = response.read()

From 94bc40f11519b998d919fbad068790f0973dedf6 Mon Sep 17 00:00:00 2001
From: Teo de Campos <teo@compbio1.(none)>
Date: Wed, 22 May 2013 15:22:37 +0100
Subject: [PATCH 09/12] Minor changes to make the demo run faster

---
 GPy/examples/dimensionality_reduction.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 6230ef3a..9c96031c 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -305,12 +305,13 @@ def brendan_faces():
     from GPy import kern
     data = GPy.util.datasets.brendan_faces()
     Q = 2
-    # Y = data['Y'][0:-1:2, :]
-    Y = data['Y']
+    Y = data['Y'][0:-1:10, :]
+    # Y = data['Y']
     Yn = Y - Y.mean()
     Yn /= Yn.std()
 
-    m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
+    m = GPy.models.GPLVM(Yn, Q)
+    # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100)
 
     # optimize
     m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@@ -318,7 +319,7 @@ def brendan_faces():
     m.ensure_default_constraints()
     m.optimize('scg', messages=1, max_f_eval=10000)
 
-    ax = m.plot_latent()
+    ax = m.plot_latent(which_indices=(0,1))
     y = m.likelihood.Y[0, :]
     data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)

From 9a8caa70fb39da6a360616553ea0331c74f1a59b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 22 May 2013 15:25:22 +0100
Subject: [PATCH 10/12] mew argument in plotting routine

---
 GPy/models/GPLVM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index ff2be732..2525ddf9 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -119,7 +119,7 @@ class GPLVM(GP):
             else:
                 x = self.X[index,input_1]
                 y = self.X[index,input_2]
-            ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), mew=1.3, label=this_label)
+            ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
 
         ax.set_xlabel('latent dimension %i'%input_1)
         ax.set_ylabel('latent dimension %i'%input_2)

From bd1e98f564abc31ff61fc94e591f50f8089be8ad Mon Sep 17 00:00:00 2001
From: Andreas <andrasd.damianou@sheffield.ac.uk>
Date: Wed, 22 May 2013 16:06:35 +0100
Subject: [PATCH 11/12] Implemented plot_latents as an external function in
 util

---
 GPy/models/Bayesian_GPLVM.py | 15 ++----
 GPy/models/GPLVM.py          | 76 ++----------------------------
 GPy/util/plot_latent.py      | 91 ++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 84 deletions(-)
 create mode 100644 GPy/util/plot_latent.py

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index e1e99af9..1e045a5a 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -14,6 +14,7 @@ import itertools
 from matplotlib.colors import colorConverter
 from matplotlib.figure import SubplotParams
 from GPy.inference.optimization import SCG
+from GPy.util import plot_latent
 
 class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
@@ -178,18 +179,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
         return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
 
-    def plot_latent(self, which_indices=None, *args, **kwargs):
-
-        if which_indices is None:
-            try:
-                input_1, input_2 = np.argsort(self.input_sensitivity())[:2]
-            except:
-                raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
-        else:
-            input_1, input_2 = which_indices
-        ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
-        ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
-        return ax
+    def plot_latent(self, *args, **kwargs):
+        util.plot_latent_indices(self, *args, **kwargs)
 
     def do_test_latents(self, Y):
         """
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 2525ddf9..7445d0ab 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -11,6 +11,8 @@ from ..util.linalg import pdinv, PCA
 from GP import GP
 from ..likelihoods import Gaussian
 from .. import util
+from GPy.util import plot_latent
+
 
 class GPLVM(GP):
     """
@@ -60,75 +62,5 @@ class GPLVM(GP):
         mu, var, upper, lower = self.predict(Xnew)
         pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
 
-    def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
-        """
-        :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
-        :param resolution: the resolution of the grid on which to evaluate the predictive variance
-        """
-        if ax is None:
-            ax = pb.gca()
-        util.plot.Tango.reset()
-
-        if labels is None:
-            labels = np.ones(self.N)
-        if which_indices is None:
-            if self.Q==1:
-                input_1 = 0
-                input_2 = None
-            if self.Q==2:
-                input_1, input_2 = 0,1
-            else:
-                try:
-                    input_1, input_2 = np.argsort(self.input_sensitivity())[:2]
-                except:
-                    raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
-        else:
-            input_1, input_2 = which_indices
-
-        #first, plot the output variance as a function of the latent space
-        Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
-        Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
-        Xtest_full[:, :2] = Xtest
-        mu, var, low, up = self.predict(Xtest_full)
-        var = var[:, :1]
-        ax.imshow(var.reshape(resolution, resolution).T,
-                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
-
-        # make sure labels are in order of input:
-        ulabels = []
-        for lab in labels:
-            if not lab in ulabels:
-                ulabels.append(lab)
-
-        for i, ul in enumerate(ulabels):
-            if type(ul) is np.string_:
-                this_label = ul
-            elif type(ul) is np.int64:
-                this_label = 'class %i'%ul
-            else:
-                this_label = 'class %i'%i
-            if len(marker) == len(ulabels):
-                m = marker[i]
-            else:
-                m = marker
-
-            index = np.nonzero(labels==ul)[0]
-            if self.Q==1:
-                x = self.X[index,input_1]
-                y = np.zeros(index.size)
-            else:
-                x = self.X[index,input_1]
-                y = self.X[index,input_2]
-            ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
-
-        ax.set_xlabel('latent dimension %i'%input_1)
-        ax.set_ylabel('latent dimension %i'%input_2)
-
-        if not np.all(labels==1.):
-            ax.legend(loc=0,numpoints=1)
-
-        ax.set_xlim(xmin[0],xmax[0])
-        ax.set_ylim(xmin[1],xmax[1])
-        ax.grid(b=False) # remove the grid if present, it doesn't look good
-        ax.set_aspect('auto') # set a nice aspect ratio
-        return ax
+    def plot_latent(self, *args, **kwargs):
+        util.plot_latent.plot_latent(self, *args, **kwargs)
\ No newline at end of file
diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py
new file mode 100644
index 00000000..47896e48
--- /dev/null
+++ b/GPy/util/plot_latent.py
@@ -0,0 +1,91 @@
+import pylab as pb
+import numpy as np
+from .. import util
+
+def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
+    """
+    :param labels: a np.array of size model.N containing labels for the points (can be number, strings, etc)
+    :param resolution: the resolution of the grid on which to evaluate the predictive variance
+    """
+    if ax is None:
+        ax = pb.gca()
+    util.plot.Tango.reset()
+
+    if labels is None:
+        labels = np.ones(model.N)
+    if which_indices is None:
+        if model.Q==1:
+            input_1 = 0
+            input_2 = None
+        if model.Q==2:
+            input_1, input_2 = 0,1
+        else:
+            try:
+                input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
+            except:
+                raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
+    else:
+        input_1, input_2 = which_indices
+
+    #first, plot the output variance as a function of the latent space
+    Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution)
+    Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
+    Xtest_full[:, :2] = Xtest
+    mu, var, low, up = model.predict(Xtest_full)
+    var = var[:, :1]
+    ax.imshow(var.reshape(resolution, resolution).T,
+              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
+
+    # make sure labels are in order of input:
+    ulabels = []
+    for lab in labels:
+        if not lab in ulabels:
+            ulabels.append(lab)
+
+    for i, ul in enumerate(ulabels):
+        if type(ul) is np.string_:
+            this_label = ul
+        elif type(ul) is np.int64:
+            this_label = 'class %i'%ul
+        else:
+            this_label = 'class %i'%i
+        if len(marker) == len(ulabels):
+            m = marker[i]
+        else:
+            m = marker
+
+        index = np.nonzero(labels==ul)[0]
+        if model.Q==1:
+            x = model.X[index,input_1]
+            y = np.zeros(index.size)
+        else:
+            x = model.X[index,input_1]
+            y = model.X[index,input_2]
+        ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
+
+    ax.set_xlabel('latent dimension %i'%input_1)
+    ax.set_ylabel('latent dimension %i'%input_2)
+
+    if not np.all(labels==1.):
+        ax.legend(loc=0,numpoints=1)
+
+    ax.set_xlim(xmin[0],xmax[0])
+    ax.set_ylim(xmin[1],xmax[1])
+    ax.grid(b=False) # remove the grid if present, it doesn't look good
+    ax.set_aspect('auto') # set a nice aspect ratio
+    return ax
+
+
+def plot_latent_indices(model, which_indices=None, *args, **kwargs):
+
+    if which_indices is None:
+        try:
+            input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
+        except:
+            raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'"
+    else:
+        input_1, input_2 = which_indices
+    ax = plot_latent(model, which_indices=[input_1, input_2], *args, **kwargs)
+    # TODO: Here test if there are inducing points...
+    ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
+    return ax
\ No newline at end of file

From cc370c24368a578032f305910b4b87f017803729 Mon Sep 17 00:00:00 2001
From: Teo de Campos <teo@compbio1.(none)>
Date: Wed, 22 May 2013 16:29:54 +0100
Subject: [PATCH 12/12] Fixed bug in BGPLVM plot

---
 GPy/models/Bayesian_GPLVM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 1e045a5a..10729a6f 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -180,7 +180,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
 
     def plot_latent(self, *args, **kwargs):
-        util.plot_latent_indices(self, *args, **kwargs)
+        plot_latent.plot_latent_indices(self, *args, **kwargs)
 
     def do_test_latents(self, Y):
         """