From ab50dc7ceca6e79e88c6cc68771719863b074730 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 16 Apr 2013 12:36:15 +0100
Subject: [PATCH 01/54] a litle more stability in svigp

Another instance of dpotrs instead of dot
---
 GPy/models/sparse_GP.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 4d9edacc..16b22094 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -148,7 +148,10 @@ class sparse_GP(GP):
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
+        tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
+        #tmp = np.dot(tmp,self.Kmmi)
+        tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 - self.Kmmi) + self.E) + tmp # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From aaf51f0e19603e07849c4c743b2425e9d47ef55b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 16 Apr 2013 12:37:31 +0100
Subject: [PATCH 02/54] simulation data changes

---
 GPy/examples/dimensionality_reduction.py | 34 +++++++++++++-----------
 GPy/models/mrd.py                        | 33 ++++++++---------------
 2 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 2c7d6bea..f3e40181 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -118,13 +118,13 @@ def mrd_simulation(plot_sim=False):
 #     Y2 -= Y2.mean(0)
 #     make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
 
-    D1, D2, D3, N, M, Q = 50, 100, 8, 200, 2, 5
-    x = np.linspace(0, 8 * np.pi, N)[:, None]
+    D1, D2, D3, N, M, Q = 50, 100, 8, 300, 2, 6
+    x = np.linspace(0, 4 * np.pi, N)[:, None]
 
     s1 = np.vectorize(lambda x: np.sin(x))
-    s2 = np.vectorize(lambda x: np.cos(x))
-    s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
-    sS = np.vectorize(lambda x: x * np.sin(2 * x))
+    s2 = np.vectorize(lambda x: x * np.cos(x))
+    sS = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
+    s3 = np.vectorize(lambda x: np.sin(2 * x))
 
     s1 = s1(x)
     s2 = s2(x)
@@ -161,16 +161,16 @@ def mrd_simulation(plot_sim=False):
     Y2 += .5 * np.random.randn(*Y2.shape)
     Y3 += .5 * np.random.randn(*Y3.shape)
 
-#     Y1 -= Y1.mean(0)
-#     Y2 -= Y2.mean(0)
-#     Y3 -= Y3.mean(0)
+    Y1 -= Y1.mean(0)
+    Y2 -= Y2.mean(0)
+    Y3 -= Y3.mean(0)
 
-    # Y1 /= Y1.std(0)
-    # Y2 /= Y2.std(0)
-    # Y3 /= Y3.std(0)
+    Y1 /= Y1.std(0)
+    Y2 /= Y2.std(0)
+    Y3 /= Y3.std(0)
 
     Slist = [s1, s2, sS]
-    Ylist = [Y1, Y2]
+    Ylist = [Y1]
 
     if plot_sim:
         import pylab
@@ -190,20 +190,22 @@ def mrd_simulation(plot_sim=False):
         pylab.tight_layout()
 
     # k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
-    k = kern.linear(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
-    m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", _debug=False)
+
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .1)
+    m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
     m.ensure_default_constraints()
+    ardvar = 5. / (m.X.max(axis=0) - m.X.min(axis=0))
 
     for i, Y in enumerate(Ylist):
         m.set('{}_noise'.format(i + 1), Y.var() / 100.)
 
-#     import ipdb;ipdb.set_trace()
     cstr = "variance"
-    m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-15, 1.)
+    m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-12, 1.)
 
 #     print "initializing beta"
 #     cstr = "noise"
 #     m.unconstrain(cstr); m.constrain_fixed(cstr)
+#     import ipdb;ipdb.set_trace()
 #     m.optimize('scg', messages=1, max_f_eval=200)
 #
 #     print "releasing beta"
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 943db420..f5e56d08 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -273,39 +273,28 @@ class MRD(model):
 
     def plot_X_1d(self, colors=None):
         fig = pylab.figure(num="MRD X 1d", figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
-        fig.clf()
-        ax1 = fig.add_subplot(self.X.shape[1], 1, 1)
         if colors is None:
-            colors = ax1._get_lines.color_cycle
-        ax1.plot(self.X, c='k', alpha=.3)
-        plots = ax1.plot(self.X.T[0], c=colors.next())
-        ax1.fill_between(numpy.arange(self.X.shape[0]),
-                         self.X.T[0] - 2 * numpy.sqrt(self.gref.X_variance.T[0]),
-                         self.X.T[0] + 2 * numpy.sqrt(self.gref.X_variance.T[0]),
-                         facecolor=plots[-1].get_color(),
-                         alpha=.3)
-        ax1.text(1, 1, r"$\mathbf{{X_{}}}".format(1),
-                 horizontalalignment='right',
-                 verticalalignment='top',
-                 transform=ax1.transAxes)
-        for i in range(self.X.shape[1] - 1):
-            ax = fig.add_subplot(self.X.shape[1], 1, i + 2)
+            colors = pylab.gca()._get_lines.color_cycle
+            pylab.clf()
+        plots = []
+        for i in range(self.X.shape[1]):
+            ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
             ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(self.X.T[i + 1], c=colors.next()))
+            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
             ax.fill_between(numpy.arange(self.X.shape[0]),
-                            self.X.T[i + 1] - 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
-                            self.X.T[i + 1] + 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
+                            self.X.T[i] - 2 * numpy.sqrt(self.gref.X_variance.T[i]),
+                            self.X.T[i] + 2 * numpy.sqrt(self.gref.X_variance.T[i]),
                             facecolor=plots[-1].get_color(),
                             alpha=.3)
-            if i < self.X.shape[1] - 2:
+            ax.legend(borderaxespad=0.)
+            if i < self.X.shape[1] - 1:
                 ax.set_xticklabels('')
-        ax1.set_xticklabels('')
 #         ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])],
 #                    bbox_to_anchor=(0., 1 + .01 * self.X.shape[1],
 #                                    1., 1. + .01 * self.X.shape[1]), loc=3,
 #                    ncol=self.X.shape[1], mode="expand", borderaxespad=0.)
         pylab.draw()
-        fig.tight_layout(h_pad=.01, rect=(0, 0, 1, .95))
+        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
         return fig
 
     def plot_X(self):

From 009b7314bfea0c2bd37a86956d88a494964adcff Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 16 Apr 2013 15:04:25 +0100
Subject: [PATCH 03/54] added bgplvm_simulation on same simulation

---
 GPy/examples/dimensionality_reduction.py | 180 +++++++++++++----------
 GPy/models/mrd.py                        |  10 ++
 2 files changed, 114 insertions(+), 76 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 1ab7a771..04687a35 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -6,6 +6,7 @@ import pylab as pb
 from matplotlib import pyplot as plt, pyplot
 
 import GPy
+from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
 
 default_seed = np.random.seed(123344)
 
@@ -46,7 +47,7 @@ def GPLVM_oil_100(optimize=True):
     data = GPy.util.datasets.oil_100()
 
     # create simple GP model
-    kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
+    kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
     m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
     m.data_labels = data['Y'].argmax(axis=1)
 
@@ -99,6 +100,92 @@ def oil_100():
     # m.plot_latent(labels=data['Y'].argmax(axis=1))
     return m
 
+def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
+    x = np.linspace(0, 4 * np.pi, N)[:, None]
+    s1 = np.vectorize(lambda x: np.sin(x))
+    s2 = np.vectorize(lambda x: x * np.cos(x))
+    s3 = np.vectorize(lambda x: np.sin(2 * x))
+    sS = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
+
+    s1 = s1(x)
+    s2 = s2(x)
+    s3 = s3(x)
+    sS = sS(x)
+
+    s1 -= s1.mean()
+    s2 -= s2.mean()
+    s3 -= s3.mean()
+    sS -= sS.mean()
+    s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
+    s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
+    s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
+    sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
+
+    S1 = np.hstack([s1, sS])
+    S2 = np.hstack([s2, sS])
+    S3 = np.hstack([s3, sS])
+
+    Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
+    Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
+    Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
+
+    Y1 += .5 * np.random.randn(*Y1.shape)
+    Y2 += .5 * np.random.randn(*Y2.shape)
+    Y3 += .5 * np.random.randn(*Y3.shape)
+
+    Y1 -= Y1.mean(0)
+    Y2 -= Y2.mean(0)
+    Y3 -= Y3.mean(0)
+    Y1 /= Y1.std(0)
+    Y2 /= Y2.std(0)
+    Y3 /= Y3.std(0)
+
+    slist = [s1, s2, s3, sS]
+    Ylist = [Y1, Y2, Y3]
+
+    if plot_sim:
+        import pylab
+        import itertools
+        fig = pylab.figure("MRD Simulation", figsize=(8, 6))
+        fig.clf()
+        ax = fig.add_subplot(2, 1, 1)
+        labls = sorted(filter(lambda x: x.startswith("s"), locals()))
+        for S, lab in itertools.izip(slist, labls):
+            ax.plot(S, label=lab)
+        ax.legend()
+        for i, Y in enumerate(Ylist):
+            ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
+            ax.imshow(Y)
+            ax.set_title("Y{}".format(i + 1))
+        pylab.draw()
+        pylab.tight_layout()
+
+    return slist, [S1, S2, S3], Ylist
+
+def bgplvm_simulation(plot_sim=False):
+    D1, D2, D3, N, M, Q = 50, 34, 8, 100, 2, 6
+    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
+
+    from GPy.models import mrd
+    from GPy import kern
+    reload(mrd); reload(kern)
+
+    Y = Ylist[0]
+
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .1)
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
+    m.ensure_default_constraints()
+    m.set('noise', Y.var() / 100.)
+    m.auto_scale_factor = True
+
+    cstr = 'variance'
+    m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-20, 1.)
+
+    cstr = 'linear_variance'
+    m.unconstrain(cstr), m.constrain_positive(cstr)
+
+    return m
+
 def mrd_simulation(plot_sim=False):
     # num = 2
 #     ard1 = np.array([1., 1, 0, 0], dtype=float)
@@ -117,32 +204,8 @@ def mrd_simulation(plot_sim=False):
 #     Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
 #     Y2 -= Y2.mean(0)
 #     make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
-
-    D1, D2, D3, N, M, Q = 50, 100, 8, 300, 2, 6
-    x = np.linspace(0, 4 * np.pi, N)[:, None]
-
-    s1 = np.vectorize(lambda x: np.sin(x))
-    s2 = np.vectorize(lambda x: x * np.cos(x))
-    sS = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
-    s3 = np.vectorize(lambda x: np.sin(2 * x))
-
-    s1 = s1(x)
-    s2 = s2(x)
-    s3 = s3(x)
-    sS = sS(x)
-
-    s1 -= s1.mean()
-    s2 -= s2.mean()
-    s3 -= s3.mean()
-    sS -= sS.mean()
-    s1 /= np.abs(s1).max()
-    s2 /= np.abs(s2).max()
-    s3 /= np.abs(s3).max()
-    sS /= np.abs(sS).max()
-
-    S1 = np.hstack([s1, sS])
-    S2 = np.hstack([s2, sS])
-    S3 = np.hstack([s3, sS])
+    D1, D2, D3, N, M, Q = 50, 34, 8, 100, 2, 6
+    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
     from GPy import kern
@@ -153,41 +216,7 @@ def mrd_simulation(plot_sim=False):
 #     Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
 #     Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
 
-    Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
-    Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
-    Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
-
-    Y1 += .5 * np.random.randn(*Y1.shape)
-    Y2 += .5 * np.random.randn(*Y2.shape)
-    Y3 += .5 * np.random.randn(*Y3.shape)
-
-    Y1 -= Y1.mean(0)
-    Y2 -= Y2.mean(0)
-    Y3 -= Y3.mean(0)
-
-    Y1 /= Y1.std(0)
-    Y2 /= Y2.std(0)
-    Y3 /= Y3.std(0)
-
-    Slist = [s1, s2, sS]
-    Ylist = [Y1]
-
-    if plot_sim:
-        import pylab
-        import itertools
-        fig = pylab.figure("MRD Simulation", figsize=(8, 6))
-        fig.clf()
-        ax = fig.add_subplot(2, 1, 1)
-        labls = sorted(filter(lambda x: x.startswith("s"), locals()))
-        for S, lab in itertools.izip(Slist, labls):
-            ax.plot(x, S, label=lab)
-        ax.legend()
-        for i, Y in enumerate(Ylist):
-            ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
-            ax.imshow(Y)
-            ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
-        pylab.tight_layout()
+    Ylist = [Ylist[0]]
 
     # k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
 
@@ -199,29 +228,28 @@ def mrd_simulation(plot_sim=False):
     for i, Y in enumerate(Ylist):
         m.set('{}_noise'.format(i + 1), Y.var() / 100.)
 
-    cstr = "variance"
-    m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-12, 1.)
+
+    cstr = 'variance'
+    m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
+
+    cstr = 'linear_variance'
+    m.unconstrain(cstr), m.constrain_positive(cstr)
 
 #     print "initializing beta"
 #     cstr = "noise"
 #     m.unconstrain(cstr); m.constrain_fixed(cstr)
-#     import ipdb;ipdb.set_trace()
-#     m.optimize('scg', messages=1, max_f_eval=200)
-#
+#     m.optimize('scg', messages=1, max_f_eval=100)
+
 #     print "releasing beta"
 #     cstr = "noise"
 #     m.unconstrain(cstr);  m.constrain_positive(cstr)
 
+    np.seterr(all='call')
+    def ipdbonerr(errtype, flags):
+        import ipdb; ipdb.set_trace()
+    np.seterrcall(ipdbonerr)
 
-    m.auto_scale_factor = True
-
-#     fig = pyplot.figure("expected", figsize=(8, 3))
-#     ax = fig.add_subplot(121)
-#     ax.bar(np.arange(ard1.size) + .1, ard1)
-#     ax = fig.add_subplot(122)
-#     ax.bar(np.arange(ard2.size) + .1, ard2)
-
-    return m
+    return m  # , mtest
 
 def mrd_silhouette():
 
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index f5e56d08..31548d9a 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -345,6 +345,16 @@ class MRD(model):
 
     def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10):
         iters = 0
+
+        import multiprocessing
+        class M(multiprocessing.Process):
+            def __init__(self, q, *args, **kw):
+                self.q = q
+                super(M, self).__init__(*args, **kw)
+                pass
+            def run(self):
+                pass
+
         optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
         self._debug_plot()
         raw_input("enter to start debug")

From 865e9df255d4e641a91ef433b0c979183a0ba9ce Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 17 Apr 2013 15:45:20 +0100
Subject: [PATCH 04/54] BGPLVM still failing, doesn't seem to be numerical : (

---
 GPy/examples/dimensionality_reduction.py | 75 +++++++++++++------
 GPy/models/Bayesian_GPLVM.py             | 34 ++++++---
 GPy/models/mrd.py                        | 92 ++++++++++--------------
 3 files changed, 117 insertions(+), 84 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 04687a35..1ee19e62 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -103,9 +103,9 @@ def oil_100():
 def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     x = np.linspace(0, 4 * np.pi, N)[:, None]
     s1 = np.vectorize(lambda x: np.sin(x))
-    s2 = np.vectorize(lambda x: x * np.cos(x))
-    s3 = np.vectorize(lambda x: np.sin(2 * x))
-    sS = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
+    s2 = np.vectorize(lambda x: np.cos(x))
+    s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
+    sS = np.vectorize(lambda x: np.sin(2 * x))
 
     s1 = s1(x)
     s2 = s2(x)
@@ -162,27 +162,57 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
 
     return slist, [S1, S2, S3], Ylist
 
-def bgplvm_simulation(plot_sim=False):
-    D1, D2, D3, N, M, Q = 50, 34, 8, 100, 2, 6
+def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
+    D1, D2, D3, N, M, Q = 2000, 8, 8, 500, 2, 6
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
     from GPy import kern
     reload(mrd); reload(kern)
 
-    Y = Ylist[0]
+    Y = Ylist[1]
 
-    k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .1)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, .0001) + kern.white(Q, .1)
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
-    m.ensure_default_constraints()
     m.set('noise', Y.var() / 100.)
-    m.auto_scale_factor = True
+#     m.auto_scale_factor = True
+#     m.scale_factor = 1.
 
-    cstr = 'variance'
-    m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-20, 1.)
+    m.ensure_default_constraints()
+
+    if burnin:
+        print "initializing beta"
+        cstr = "noise"
+        m.unconstrain(cstr); m.constrain_fixed(cstr)
+        m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
+
+        print "releasing beta"
+        cstr = "noise"
+        m.unconstrain(cstr);  m.constrain_positive(cstr)
+
+
+# #     cstr = 'variance'
+# #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
+#     cstr = 'X_\d'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, -100., 100.)
+#
+#     cstr = 'noise'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
+#
+#     cstr = 'white'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
+#
+#     cstr = 'linear_variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)  # m.constrain_positive(cstr)
+#
+#     cstr = 'X_variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)  # m.constrain_positive(cstr)
+
+#     np.seterr(all='call')
+#     def ipdbonerr(errtype, flags):
+#         import ipdb; ipdb.set_trace()
+#     np.seterrcall(ipdbonerr)
 
-    cstr = 'linear_variance'
-    m.unconstrain(cstr), m.constrain_positive(cstr)
 
     return m
 
@@ -204,7 +234,7 @@ def mrd_simulation(plot_sim=False):
 #     Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
 #     Y2 -= Y2.mean(0)
 #     make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
-    D1, D2, D3, N, M, Q = 50, 34, 8, 100, 2, 6
+    D1, D2, D3, N, M, Q = 2000, 34, 8, 500, 3, 6
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
@@ -216,24 +246,23 @@ def mrd_simulation(plot_sim=False):
 #     Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
 #     Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
 
-    Ylist = [Ylist[0]]
+    Ylist = Ylist[0:2]
 
     # k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
 
-    k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .1)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .001)
     m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
-    m.ensure_default_constraints()
-    ardvar = 5. / (m.X.max(axis=0) - m.X.min(axis=0))
 
     for i, Y in enumerate(Ylist):
         m.set('{}_noise'.format(i + 1), Y.var() / 100.)
 
+    m.ensure_default_constraints()
 
-    cstr = 'variance'
-    m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
-
-    cstr = 'linear_variance'
-    m.unconstrain(cstr), m.constrain_positive(cstr)
+#     cstr = 'variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
+#
+#     cstr = 'linear_variance'
+#     m.unconstrain(cstr), m.constrain_positive(cstr)
 
 #     print "initializing beta"
 #     cstr = "noise"
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index a99f7667..211d21c6 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -9,6 +9,7 @@ from sparse_GP import sparse_GP
 from GPy.util.linalg import pdinv
 from ..likelihoods import Gaussian
 from .. import kern
+from numpy.linalg.linalg import LinAlgError
 
 class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
@@ -22,7 +23,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
+    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, oldpsave=5, **kwargs):
         if X == None:
             X = self.initialise_latent(init, Q, Y)
 
@@ -36,9 +37,21 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         if kernel is None:
             kernel = kern.rbf(Q) + kern.white(Q)
 
+        self.oldpsave = oldpsave
+        self._oldps = []
 
         sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
 
+    @property
+    def oldps(self):
+        return self._oldps
+    @oldps.setter
+    def oldps(self, p):
+        if len(self._oldps) == (self.oldpsave + 1):
+            self._oldps.pop()
+        # if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
+        self._oldps.insert(0, p.copy())
+
     def _get_param_names(self):
         X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
         S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
@@ -54,14 +67,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         ===============================================================
 
         """
-        return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
-
-    def _set_params(self, x):
-        N, Q = self.N, self.Q
-        self.X = x[:self.X.size].reshape(N, Q).copy()
-        self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
-        sparse_GP._set_params(self, x[(2 * N * Q):])
+        x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
+        return x
 
+    def _set_params(self, x, save_old=True):
+        try:
+            N, Q = self.N, self.Q
+            self.X = x[:self.X.size].reshape(N, Q).copy()
+            self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
+            sparse_GP._set_params(self, x[(2 * N * Q):])
+            self.oldps = x
+        except (LinAlgError, FloatingPointError):
+            print "\rWARNING: Caught LinAlgError, reconstructing old state            "
+            self._set_params(self.oldps[-1], save_old=False)
 
     def dKL_dmuS(self):
         dKL_dS = (1. - (1. / self.X_variance)) * 0.5
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 31548d9a..096c9cb9 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -271,14 +271,31 @@ class MRD(model):
         self.Z = Z
         return Z
 
-    def plot_X_1d(self, colors=None):
-        fig = pylab.figure(num="MRD X 1d", figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
+    def _handle_plotting(self, fig_num, axes, plotf):
+        if axes is None:
+            fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
+        for i, g in enumerate(self.bgplvms):
+            if axes is None:
+                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
+            else:
+                ax = axes[i]
+            plotf(i, g, ax)
+        pylab.draw()
+        if axes is None:
+            fig.tight_layout()
+            return fig
+        else:
+            return pylab.gcf()
+
+    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
+        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
         if colors is None:
             colors = pylab.gca()._get_lines.color_cycle
             pylab.clf()
         plots = []
         for i in range(self.X.shape[1]):
-            ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
+            if axes is None:
+                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
             ax.plot(self.X, c='k', alpha=.3)
             plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
             ax.fill_between(numpy.arange(self.X.shape[0]),
@@ -289,72 +306,41 @@ class MRD(model):
             ax.legend(borderaxespad=0.)
             if i < self.X.shape[1] - 1:
                 ax.set_xticklabels('')
-#         ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])],
-#                    bbox_to_anchor=(0., 1 + .01 * self.X.shape[1],
-#                                    1., 1. + .01 * self.X.shape[1]), loc=3,
-#                    ncol=self.X.shape[1], mode="expand", borderaxespad=0.)
         pylab.draw()
         fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
         return fig
 
-    def plot_X(self):
-        fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
-        fig.clf()
-        for i, g in enumerate(self.bgplvms):
-            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
-            ax.imshow(g.X)
-        pylab.draw()
-        fig.tight_layout()
+    def plot_X(self, fig_num="MRD Predictions", axes=None):
+        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
         return fig
 
-    def plot_predict(self):
-        fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
-        fig.clf()
-        for i, g in enumerate(self.bgplvms):
-            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
-            ax.imshow(g.predict(g.X)[0])
-        pylab.draw()
-        fig.tight_layout()
+    def plot_predict(self, fig_num="MRD Predictions", axes=None):
+        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0]))
         return fig
 
-    def plot_scales(self, *args, **kwargs):
-        fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
-        fig.clf()
-        for i, g in enumerate(self.bgplvms):
-            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
-            g.kern.plot_ARD(ax=ax, *args, **kwargs)
-        pylab.draw()
-        fig.tight_layout()
+    def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):
+        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
         return fig
 
-    def plot_latent(self, *args, **kwargs):
-        fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
-        fig.clf()
-        for i, g in enumerate(self.bgplvms):
-            ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
-            g.plot_latent(ax=ax, *args, **kwargs)
-        pylab.draw()
-        fig.tight_layout()
+    def plot_latent(self, fig_num="MRD Latent Spaces", axes=None, *args, **kwargs):
+        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):
-        self.plot_X()
         self.plot_X_1d()
-        self.plot_latent()
-        self.plot_scales()
+        fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
+        fig.clf()
+        axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
+        self.plot_X(axes=axes)
+        axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
+        self.plot_latent(axes=axes)
+        axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
+        self.plot_scales(axes=axes)
+        pylab.draw()
+        fig.tight_layout()
 
-    def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10):
+    def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
         iters = 0
-
-        import multiprocessing
-        class M(multiprocessing.Process):
-            def __init__(self, q, *args, **kw):
-                self.q = q
-                super(M, self).__init__(*args, **kw)
-                pass
-            def run(self):
-                pass
-
         optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
         self._debug_plot()
         raw_input("enter to start debug")

From 25063fd53d1d7d74da479f129f2fbfe57bf37950 Mon Sep 17 00:00:00 2001
From: andreas <andreas@Hulk.(none)>
Date: Thu, 18 Apr 2013 16:39:55 +0100
Subject: [PATCH 05/54] pdinv passes extra args to jitchol

---
 GPy/util/linalg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index f88099a4..79025d4f 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -97,7 +97,7 @@ def jitchol_old(A,maxtries=5):
 
         raise linalg.LinAlgError,"not positive definite, even with jitter."
 
-def pdinv(A):
+def pdinv(A, *args):
     """
     :param A: A DxD pd numpy array
 
@@ -110,7 +110,7 @@ def pdinv(A):
     :rval logdet: the log of the determinant of A
     :rtype logdet: float64
     """
-    L = jitchol(A)
+    L = jitchol(A, *args)
     logdet = 2.*np.sum(np.log(np.diag(L)))
     Li = chol_inv(L)
     Ai = linalg.lapack.flapack.dpotri(L)[0]

From 10703e47746576aa8a8b11eacd9d1c0628553827 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 18 Apr 2013 17:59:01 +0100
Subject: [PATCH 06/54] demo changed, bgplvm still broken

---
 GPy/examples/dimensionality_reduction.py |  12 +-
 GPy/models/Bayesian_GPLVM.py             |  19 ++-
 GPy/models/GP.py                         | 174 ++++++++++++-----------
 3 files changed, 113 insertions(+), 92 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 1ee19e62..8c8e23fe 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -170,26 +170,30 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
     from GPy import kern
     reload(mrd); reload(kern)
 
+
     Y = Ylist[1]
 
-    k = kern.linear(Q, ARD=True) + kern.bias(Q, .0001) + kern.white(Q, .1)
+    k = kern.linear(Q, ARD=True) + kern.white(Q, .00001)  # + kern.bias(Q)
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
-    m.set('noise', Y.var() / 100.)
+    # m.set('noise',)
 #     m.auto_scale_factor = True
 #     m.scale_factor = 1.
-
     m.ensure_default_constraints()
 
+
     if burnin:
         print "initializing beta"
         cstr = "noise"
-        m.unconstrain(cstr); m.constrain_fixed(cstr)
+        m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 100.)
         m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
 
         print "releasing beta"
         cstr = "noise"
         m.unconstrain(cstr);  m.constrain_positive(cstr)
 
+    true_X = np.hstack((slist[1], slist[3], 0. * np.ones((N, Q - 2))))
+    m.set('X_\d', true_X)
+    m.constrain_fixed("X_\d")
 
 # #     cstr = 'variance'
 # #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 211d21c6..a23368de 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -82,7 +82,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             self._set_params(self.oldps[-1], save_old=False)
 
     def dKL_dmuS(self):
-        dKL_dS = (1. - (1. / self.X_variance)) * 0.5
+        dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
         dKL_dmu = self.X
         return dKL_dmu, dKL_dS
 
@@ -101,13 +101,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
 
     def log_likelihood(self):
-        return sparse_GP.log_likelihood(self) - self.KL_divergence()
+        ll = sparse_GP.log_likelihood(self)
+        kl = self.KL_divergence()
+        return ll + kl
 
     def _log_likelihood_gradients(self):
         dKL_dmu, dKL_dS = self.dKL_dmuS()
         dL_dmu, dL_dS = self.dL_dmuS()
         # TODO: find way to make faster
-        dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
+
+        d_dmu = (dL_dmu + dKL_dmu).flatten()
+        d_dS = (dL_dS + dKL_dS).flatten()
+        # TEST KL: ====================
+        # d_dmu = (dKL_dmu).flatten()
+        # d_dS = (dKL_dS).flatten()
+        # ========================
+        # TEST L: ====================
+#         d_dmu = (dL_dmu).flatten()
+#         d_dS = (dL_dS).flatten()
+        # ========================
+        dbound_dmuS = np.hstack((d_dmu, d_dS))
         return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
 
     def plot_latent(self, which_indices=None, *args, **kwargs):
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index cfda0cfe..74bb5915 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -6,8 +6,8 @@ import numpy as np
 import pylab as pb
 from .. import kern
 from ..core import model
-from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
+from ..util.linalg import pdinv, mdot
+from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
 from ..likelihoods import EP
 
 class GP(model):
@@ -35,25 +35,25 @@ class GP(model):
         # parse arguments
         self.Xslices = Xslices
         self.X = X
-        assert len(self.X.shape)==2
+        assert len(self.X.shape) == 2
         self.N, self.Q = self.X.shape
         assert isinstance(kernel, kern.kern)
         self.kern = kernel
 
-        #here's some simple normalization for the inputs
+        # here's some simple normalization for the inputs
         if normalize_X:
-            self._Xmean = X.mean(0)[None,:]
-            self._Xstd = X.std(0)[None,:]
+            self._Xmean = X.mean(0)[None, :]
+            self._Xstd = X.std(0)[None, :]
             self.X = (X.copy() - self._Xmean) / self._Xstd
-            if hasattr(self,'Z'):
+            if hasattr(self, 'Z'):
                 self.Z = (self.Z - self._Xmean) / self._Xstd
         else:
-            self._Xmean = np.zeros((1,self.X.shape[1]))
-            self._Xstd = np.ones((1,self.X.shape[1]))
+            self._Xmean = np.zeros((1, self.X.shape[1]))
+            self._Xstd = np.ones((1, self.X.shape[1]))
 
         self.likelihood = likelihood
-        #assert self.X.shape[0] == self.likelihood.Y.shape[0]
-        #self.N, self.D = self.likelihood.Y.shape
+        # assert self.X.shape[0] == self.likelihood.Y.shape[0]
+        # self.N, self.D = self.likelihood.Y.shape
         assert self.X.shape[0] == self.likelihood.data.shape[0]
         self.N, self.D = self.likelihood.data.shape
 
@@ -65,24 +65,24 @@ class GP(model):
         """
         return np.zeros_like(self.Z)
 
-    def _set_params(self,p):
+    def _set_params(self, p):
         self.kern._set_params_transformed(p[:self.kern.Nparam])
-        #self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
-        self.likelihood._set_params(p[self.kern.Nparam_transformed():])    # test by Nicolas
+        # self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
+        self.likelihood._set_params(p[self.kern.Nparam_transformed():])  # test by Nicolas
 
 
-        self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
+        self.K = self.kern.K(self.X, slices1=self.Xslices, slices2=self.Xslices)
         self.K += self.likelihood.covariance_matrix
 
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
-        #the gradient of the likelihood wrt the covariance matrix
+        # the gradient of the likelihood wrt the covariance matrix
         if self.likelihood.YYT is None:
-            alpha = np.dot(self.Ki,self.likelihood.Y)
-            self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
+            alpha = np.dot(self.Ki, self.likelihood.Y)
+            self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
         else:
             tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
-            self.dL_dK = 0.5*(tmp - self.D*self.Ki)
+            self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
 
     def _get_params(self):
         return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@@ -98,16 +98,16 @@ class GP(model):
         this function does nothing
         """
         self.likelihood.fit_full(self.kern.K(self.X))
-        self._set_params(self._get_params()) # update the GP
+        self._set_params(self._get_params())  # update the GP
 
     def _model_fit_term(self):
         """
         Computes the model fit using YYT if it's available
         """
         if self.likelihood.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+            return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
         else:
-            return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
+            return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
     def log_likelihood(self):
         """
@@ -117,7 +117,7 @@ class GP(model):
         model for a new variable Y* = v_tilde/tau_tilde, with a covariance
         matrix K* = K + diag(1./tau_tilde) plus a normalization term.
         """
-        return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
+        return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
 
 
     def _log_likelihood_gradients(self):
@@ -128,27 +128,27 @@ class GP(model):
 
         For the likelihood parameters, pass in alpha = K^-1 y
         """
-        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X, slices1=self.Xslices, slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
-    def _raw_predict(self,_Xnew,slices=None, full_cov=False):
+    def _raw_predict(self, _Xnew, slices=None, full_cov=False):
         """
         Internal helper function for making predictions, does not account
         for normalization or likelihood
         """
-        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
-        KiKx = np.dot(self.Ki,Kx)
+        Kx = self.kern.K(self.X, _Xnew, slices1=self.Xslices, slices2=slices)
+        mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
+        KiKx = np.dot(self.Ki, Kx)
         if full_cov:
-            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
-            var = Kxx - np.dot(KiKx.T,Kx)
+            Kxx = self.kern.K(_Xnew, slices1=slices, slices2=slices)
+            var = Kxx - np.dot(KiKx.T, Kx)
         else:
             Kxx = self.kern.Kdiag(_Xnew, slices=slices)
-            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
-            var = var[:,None]
+            var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
+            var = var[:, None]
         return mu, var
 
 
-    def predict(self,Xnew, slices=None, full_cov=False):
+    def predict(self, Xnew, slices=None, full_cov=False):
         """
         Predict the function(s) at the new point(s) Xnew.
 
@@ -174,11 +174,11 @@ class GP(model):
            This is to allow for different normalizations of the output dimensions.
 
         """
-        #normalize X values
+        # normalize X values
         Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
         mu, var = self._raw_predict(Xnew, slices, full_cov)
 
-        #now push through likelihood TODO
+        # now push through likelihood TODO
         mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
 
         return mean, var, _025pm, _975pm
@@ -204,86 +204,90 @@ class GP(model):
         Can plot only part of the data and part of the posterior functions using which_data and which_functions
         Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
         """
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
+        if which_functions == 'all':
+            which_functions = [True] * self.kern.Nparts
+        if which_data == 'all':
             which_data = slice(None)
 
         if self.X.shape[1] == 1:
             Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
             if samples == 0:
-                m,v = self._raw_predict(Xnew, slices=which_functions)
-                gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
-                pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
+                m, v = self._raw_predict(Xnew, slices=which_functions)
+                gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
+                pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
             else:
-                m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
-                Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
-                gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
+                m, v = self._raw_predict(Xnew, slices=which_functions, full_cov=True)
+                Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+                gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
                 for i in range(samples):
-                    pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
-            pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
-            ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
-            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
-            pb.ylim(ymin,ymax)
-            if hasattr(self,'Z'):
-                pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
+                    pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+            pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
+            pb.xlim(xmin, xmax)
+            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
+            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            pb.ylim(ymin, ymax)
+            if hasattr(self, 'Z'):
+                pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
 
         elif self.X.shape[1] == 2:
             resolution = resolution or 50
-            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
-            m,v = self._raw_predict(Xnew, slices=which_functions)
-            m = m.reshape(resolution,resolution).T
-            pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
-            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
+            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
+            m, v = self._raw_predict(Xnew, slices=which_functions)
+            m = m.reshape(resolution, resolution).T
+            pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
+            pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
+            pb.xlim(xmin[0], xmax[0])
+            pb.ylim(xmin[1], xmax[1])
         else:
             raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
 
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
+    def plot(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, levels=20):
         """
         TODO: Docstrings!
         :param levels: for 2D plotting, the number of contour levels to use
 
         """
         # TODO include samples
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
+        if which_functions == 'all':
+            which_functions = [True] * self.kern.Nparts
+        if which_data == 'all':
             which_data = slice(None)
 
         if self.X.shape[1] == 1:
 
-            Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
+            Xu = self.X * self._Xstd + self._Xmean  # NOTE self.X are the normalized values now
 
             Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
             m, var, lower, upper = self.predict(Xnew, slices=which_functions)
-            gpplot(Xnew,m, lower, upper)
-            pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
-            ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
-            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
-            pb.xlim(xmin,xmax)
-            pb.ylim(ymin,ymax)
-            if hasattr(self,'Z'):
-                Zu = self.Z*self._Xstd + self._Xmean
-                pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
-                if self.has_uncertain_inputs:
-                    pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
+            gpplot(Xnew, m, lower, upper)
+            pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
+            if self.has_uncertain_inputs:
+                pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
 
-        elif self.X.shape[1]==2: #FIXME
+            ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
+            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            pb.xlim(xmin, xmax)
+            pb.ylim(ymin, ymax)
+            if hasattr(self, 'Z'):
+                Zu = self.Z * self._Xstd + self._Xmean
+                pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
+                    # pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
+
+        elif self.X.shape[1] == 2:  # FIXME
             resolution = resolution or 50
-            Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
-            x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
+            Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
+            x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
             m, var, lower, upper = self.predict(Xnew, slices=which_functions)
-            m = m.reshape(resolution,resolution).T
-            pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
+            m = m.reshape(resolution, resolution).T
+            pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
             Yf = self.likelihood.Y.flatten()
-            pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
-            if hasattr(self,'Z'):
-                pb.plot(self.Z[:,0],self.Z[:,1],'wo')
+            pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
+            pb.xlim(xmin[0], xmax[0])
+            pb.ylim(xmin[1], xmax[1])
+            if hasattr(self, 'Z'):
+                pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
 
         else:
             raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

From d0512f92b614d84cf521913aace14b6accee4182 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Sun, 21 Apr 2013 10:58:46 +0100
Subject: [PATCH 07/54] xticklabels improved

---
 GPy/kern/kern.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 414a911f..ca9d80ea 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -70,8 +70,8 @@ class kern(parameterised):
                     ard_params = 1./p.lengthscale
 
                 ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
-                ax.set_xticks(np.arange(len(ard_params)),
-                              ["${}$".format(i + 1) for i in range(len(ard_params))])
+                ax.set_xticks(np.arange(len(ard_params)))
+                ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
         return ax
 
     def _transform_gradients(self,g):

From 56ecd4782a576c4b471379b594aaa9639f90e799 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 11:59:32 +0100
Subject: [PATCH 08/54] made the basic GP class use dtrtrs where possible

---
 GPy/models/GP.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index cfda0cfe..a46a35d0 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -9,6 +9,7 @@ from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
 from ..likelihoods import EP
+from scipy import linalg
 
 class GP(model):
     """
@@ -78,10 +79,13 @@ class GP(model):
 
         #the gradient of the likelihood wrt the covariance matrix
         if self.likelihood.YYT is None:
-            alpha = np.dot(self.Ki,self.likelihood.Y)
+            #alpha = np.dot(self.Ki,self.likelihood.Y)
+            alpha,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
             self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
         else:
-            tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.YYT),lower=1)
+            tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(tmp.T),lower=1)
             self.dL_dK = 0.5*(tmp - self.D*self.Ki)
 
     def _get_params(self):
@@ -105,10 +109,13 @@ class GP(model):
         Computes the model fit using YYT if it's available
         """
         if self.likelihood.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+            #return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+            tmp,info = linalg.lapack.flapack.dtrtrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
+            return -0.5*np.sum(np.square(tmp))
         else:
             return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
+
     def log_likelihood(self):
         """
         The log marginal likelihood of the GP.
@@ -136,8 +143,11 @@ class GP(model):
         for normalization or likelihood
         """
         Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
-        KiKx = np.dot(self.Ki,Kx)
+        #mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
+        tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
+        mu = np.dot(Kx.T,tmp)
+        #KiKx = np.dot(self.Ki,Kx)
+        KiKx,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(Kx),lower=1)
         if full_cov:
             Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
             var = Kxx - np.dot(KiKx.T,Kx)

From 698f52e5e3cddb34c0524291fbb10165ffae858b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 13:15:39 +0100
Subject: [PATCH 09/54] GPy now fails silently if sympy is not present

---
 GPy/kern/__init__.py     |  6 ++++-
 GPy/kern/constructors.py | 58 ++++++++++++++++++++++------------------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index f062ee56..93274ec5 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,9 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+try:
+    from constructors import rbf_sympy, sympykern # these depend on sympy
+except:
+    pass
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 6a968da4..e5743f47 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -165,34 +165,40 @@ def Brownian(D,variance=1.):
     part = Brownianpart(D,variance)
     return kern(D, [part])
 
-import sympy as sp
-from sympykern import spkern
-from sympy.parsing.sympy_parser import parse_expr
+try:
+    import sympy as sp
+    from sympykern import spkern
+    from sympy.parsing.sympy_parser import parse_expr
+    sympy_available = True
+except ImportError:
+    sympy_available = False
 
-def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
-    """
-    Radial Basis Function covariance.
-    """
-    X = [sp.var('x%i'%i) for i in range(D)]
-    Z = [sp.var('z%i'%i) for i in range(D)]
-    rbf_variance = sp.var('rbf_variance',positive=True)
-    if ARD:
-        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
-        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/2.)
-    else:
-        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-    return kern(D,[spkern(D,f)])
+if sympy_available:
+    def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
+        """
+        Radial Basis Function covariance.
+        """
+        X = [sp.var('x%i'%i) for i in range(D)]
+        Z = [sp.var('z%i'%i) for i in range(D)]
+        rbf_variance = sp.var('rbf_variance',positive=True)
+        if ARD:
+            rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/2.)
+        else:
+            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+            dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+        return kern(D,[spkern(D,f)])
 
-def sympykern(D,k):
-    """
-    A kernel from a symbolic sympy representation
-    """
-    return kern(D,[spkern(D,k)])
+    def sympykern(D,k):
+        """
+        A kernel from a symbolic sympy representation
+        """
+        return kern(D,[spkern(D,k)])
+del sympy_available
 
 def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
     """

From 8bd017466d7c14a45ae77be3eb309c819d7109ea Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 13:37:59 +0100
Subject: [PATCH 10/54] Nparam_transformed work better now

Before, counted the number of fixes, which failed when a fix fixed more
than one parameter...
---
 GPy/core/parameterised.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index b5d880a3..c80926ce 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -103,10 +103,18 @@ class parameterised(object):
             return expr
 
     def Nparam_transformed(self):
-            ties = 0
-            for ar in self.tied_indices:
-                ties += ar.size - 1
-            return self.Nparam - len(self.constrained_fixed_indices) - ties
+        """
+        Compute the number of parameters after ties and fixing have been performed
+        """
+        ties = 0
+        for ti in self.tied_indices:
+            ties += ti.size - 1
+
+        fixes = 0
+        for fi in self.constrained_fixed_indices:
+            fixes += len(fi)
+
+        return self.Nparam - fixes - ties
 
     def constrain_positive(self, which):
         """

From f1451419232d78dc6fd8cdfc44a95c1ad7640d93 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 10:02:12 +0100
Subject: [PATCH 11/54] added a kernel for independent outputs

---
 GPy/kern/__init__.py            |  2 +-
 GPy/kern/constructors.py        | 12 ++++
 GPy/kern/independent_outputs.py | 97 +++++++++++++++++++++++++++++++++
 3 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/independent_outputs.py

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 93274ec5..327bf69c 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
 try:
     from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index e5743f47..9c2464a7 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
 from rbfcos import rbfcos as rbfcospart
+from independent_outputs import independent_outputs as independent_output_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -324,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
     """
     part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
     return kern(D,[part])
+
+def independent_outputs(k):
+    """
+    Construct a kernel with independent outputs from an existing kernel
+    """
+    for sl in k.input_slices:
+        assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
+    parts = [independent_output_part(p) for p in k.parts]
+    return kern(k.D+1,parts)
+
+
diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
new file mode 100644
index 00000000..214c542c
--- /dev/null
+++ b/GPy/kern/independent_outputs.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2012, James Hesnsman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import kernpart
+import numpy as np
+
+def index_to_slices(index):
+    """
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+
+    e.g.
+    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
+    returns
+    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
+
+    or, a more complicated example
+    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
+    returns
+    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
+    """
+
+    #contruct the return structure
+    ind = np.asarray(index,dtype=np.int64)
+    ret = [[] for i in range(ind.max()+1)]
+
+    #find the switchpoints
+    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
+    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
+
+    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
+    return ret
+
+class independent_outputs(kernpart):
+    """
+    A kernel part shich can reopresent several independent functions.
+    this kernel 'switches off' parts of the matrix where the output indexes are different.
+
+    The index of the functions is given by the last column in the input X
+    the rest of the columns of X are passed to the kernel for computation (in blocks).
+
+    """
+    def __init__(self,k):
+        self.D = k.D + 1
+        self.Nparam = k.Nparam
+        self.name = 'iops('+ k.name + ')'
+        self.k = k
+
+    def _get_params(self):
+        return self.k._get_params()
+
+    def _set_params(self,x):
+        self.k._set_params(x)
+        self.params = x
+
+    def _get_param_names(self):
+        return self.k._get_param_names()
+
+    def K(self,X,X2,target):
+        #Sort out the slices from the input data
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+
+        [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def Kdiag(self,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
+
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dtheta(X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dX(X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(X[s],target) for s in slices_i] for slices_i in slices]

From f35578804a2dcbc9066d48a103bcaf4ed1d0fd5d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 10:56:10 +0100
Subject: [PATCH 12/54] prod_orthogonal now caches the K matrices

---
 GPy/kern/coregionalise.py   | 13 ++++++---
 GPy/kern/prod_orthogonal.py | 53 ++++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index a76bb31e..b1b69325 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -62,11 +62,16 @@ class coregionalise(kernpart):
         ii,jj = np.meshgrid(index,index2)
         ii,jj = ii.T, jj.T
 
+        #dL_dK_small = np.zeros_like(self.B)
+        #for i in range(self.Nout):
+            #for j in range(self.Nout):
+                #tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
+                #dL_dK_small[i,j] = tmp
+        #as above, but slightly faster
         dL_dK_small = np.zeros_like(self.B)
-        for i in range(self.Nout):
-            for j in range(self.Nout):
-                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
-                dL_dK_small[i,j] = tmp
+        where_i = [ii==i for i in xrange(self.Nout)]
+        where_j = [jj==j for j in xrange(self.Nout)]
+        [[np.put(dL_dK_small,i+self.Nout*j,np.sum(dL_dK[np.logical_and(wi,wj)])) for i,wi in enumerate(where_i)] for j,wj in enumerate(where_j)]
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index fc349da8..2afafe25 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -22,6 +22,7 @@ class prod_orthogonal(kernpart):
         self.k1 = k1
         self.k2 = k2
         self._set_params(np.hstack((k1._get_params(),k2._get_params())))
+        self._X, self._X2, self._params = np.empty(shape=(3,1)) # initialize cache
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -39,23 +40,38 @@ class prod_orthogonal(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        target1 = np.zeros_like(target)
-        target2 = np.zeros_like(target)
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
-        target += target1 * target2
+        self._K_computations(X,X2)
+        target += self._K1*self._K2
+
+    def _K_computations(self,X,X2):
+        """
+        Compute the two kernel matrices.
+        The computation is only done if needed: many times it will be the same as the previous call
+        """
+        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
+            #store new values in cache
+            self._X = X.copy()
+            self._X2 = X2.copy()
+            self._params = self._get_params().copy()
+
+            #update self._K1, self._K2
+            if X2 is None: X2 = X
+            self._K1 = np.zeros((X.shape[0],X2.shape[0]))
+            self._K2 = np.zeros((X.shape[0],X2.shape[0]))
+            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
+            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
+        self._K_computations(X,X2)
+        self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
-        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        self._K_computations(X,X2)
+        self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -73,17 +89,6 @@ class prod_orthogonal(kernpart):
         self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
         self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
 
-    def dK_dX(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to X."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
-
-        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
-
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])

From 9109d451abf6009270c43fc0b88c00bdbd0e6151 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 11:59:00 +0100
Subject: [PATCH 13/54] fixing small bug in independent outputs kern

---
 GPy/kern/independent_outputs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
index 214c542c..cc7c0051 100644
--- a/GPy/kern/independent_outputs.py
+++ b/GPy/kern/independent_outputs.py
@@ -76,7 +76,7 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dtheta(dL_dK,X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
 
     def dK_dX(self,dL_dK,X,X2,target):
@@ -85,13 +85,13 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dX(dL_dK,X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
 
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(X[s],target) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target) for s in slices_i] for slices_i in slices]

From d402047ff3274898e4ebf29bcc0149c123ef0495 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 12:01:10 +0100
Subject: [PATCH 14/54] more minor bugs

---
 GPy/kern/independent_outputs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
index cc7c0051..b94202d7 100644
--- a/GPy/kern/independent_outputs.py
+++ b/GPy/kern/independent_outputs.py
@@ -76,7 +76,7 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(dL_dK,X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
 
     def dK_dX(self,dL_dK,X,X2,target):
@@ -85,13 +85,13 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(dL_dK,X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
 
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

From 2205c333b2913275218ffdf3156e46f190d3c09d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 12:19:41 +0100
Subject: [PATCH 15/54] fixed a weird regular expression bug in
 ensure_def_constraints

---
 GPy/core/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f70125fd..e7b993e0 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,6 +13,7 @@ import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
+import re
 
 class model(parameterised):
     def __init__(self):
@@ -239,7 +240,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(param_names[i])
+                    to_make_positive.append(re.escape(param_names[i]))
                     if warn:
                         print "Warning! constraining %s postive"%name
         if len(to_make_positive):

From dc6faeb30355bf9c6f0f3694e8546bcdf26372a8 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 13:44:31 +0100
Subject: [PATCH 16/54] psi stat tests

---
 GPy/testing/psi_stat_tests.py | 102 ++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 GPy/testing/psi_stat_tests.py

diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
new file mode 100644
index 00000000..93f9867c
--- /dev/null
+++ b/GPy/testing/psi_stat_tests.py
@@ -0,0 +1,102 @@
+'''
+Created on 22 Apr 2013
+
+@author: maxz
+'''
+import unittest
+import numpy
+
+from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
+import GPy
+import itertools
+from GPy.core import model
+
+class PsiStatModel(model):
+    def __init__(self, which, X, X_variance, Z, M, kernel, mu_or_S, dL_=numpy.ones((1, 1))):
+        self.which = which
+        self.dL_ = dL_
+        self.X = X
+        self.X_variance = X_variance
+        self.Z = Z
+        self.N, self.Q = X.shape
+        self.M, Q = Z.shape
+        self.mu_or_S = mu_or_S
+        assert self.Q == Q, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
+        self.kern = kernel
+        super(PsiStatModel, self).__init__()
+    def _get_param_names(self):
+        Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.Q))]
+        Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.Q))]
+        return Xnames + Znames + self.kern._get_param_names()
+    def _get_params(self):
+        return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
+    def _set_params(self, x, save_old=True, save_count=0):
+        start, end = 0, self.X.size
+        self.X = x[start:end].reshape(self.N, self.Q)
+        start, end = end, end + self.X_variance.size
+        self.X_variance = x[start: end].reshape(self.N, self.Q)
+        start, end = end, end + self.Z.size
+        self.Z = x[start: end].reshape(self.M, self.Q)
+        self.kern._set_params(x[end:])
+    def log_likelihood(self):
+#         if '2' in self.which:
+#             norm = self.N ** 2
+#         else:  # '0', '1' in self.which:
+#             norm = self.N
+        return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
+    def _log_likelihood_gradients(self):
+        psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
+        psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance)
+        try:
+            psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance)
+        except AttributeError:
+            psiZ = numpy.zeros(self.M * self.Q)
+        thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance).flatten()
+        return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
+
+class Test(unittest.TestCase):
+    Q = 5
+    N = 50
+    M = 10
+    D = 10
+    X = numpy.random.randn(N, Q)
+    X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
+    Z = numpy.random.permutation(X)[:M]
+    Y = X.dot(numpy.random.randn(Q, D))
+
+    def testPsi0(self):
+        kernel = GPy.kern.linear(Q)
+        m = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1)))
+        assert m.checkgrad(), "linear x psi0"
+
+    def testPsi1(self):
+        kernel = GPy.kern.linear(Q)
+        m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1)))
+        assert(m.checkgrad())
+
+    def testPsi2(self):
+        kernel = GPy.kern.linear(Q)
+        m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1, 1)))
+        assert(m.checkgrad())
+
+
+if __name__ == "__main__":
+    Q = 5
+    N = 50
+    M = 10
+    D = 10
+    X = numpy.random.randn(N, Q)
+    X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
+    Z = numpy.random.permutation(X)[:M]
+    Y = X.dot(numpy.random.randn(Q, D))
+    kernel = GPy.kern.linear(Q)  # GPy.kern.bias(Q)  # GPy.kern.linear(Q) + GPy.kern.rbf(Q)
+    m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1)))
+    m1 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1)))
+    m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1, 1, 1)))
+

From 0c8b83454f5ca772d2d87180ccbe891a295fcf8b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 14:02:15 +0100
Subject: [PATCH 17/54] Revert "merge devel mrd"

This reverts commit 3f625a9347fde47625f14898c0a3a6ed4f49b55a, reversing
changes made to dc6faeb30355bf9c6f0f3694e8546bcdf26372a8.
---
 GPy/core/model.py               |  3 +-
 GPy/core/parameterised.py       | 16 ++----
 GPy/kern/__init__.py            |  6 +-
 GPy/kern/constructors.py        | 70 +++++++++---------------
 GPy/kern/coregionalise.py       | 13 ++---
 GPy/kern/independent_outputs.py | 97 ---------------------------------
 GPy/kern/prod_orthogonal.py     | 53 ++++++++----------
 GPy/models/sparse_GP.py         |  5 +-
 8 files changed, 61 insertions(+), 202 deletions(-)
 delete mode 100644 GPy/kern/independent_outputs.py

diff --git a/GPy/core/model.py b/GPy/core/model.py
index e7b993e0..f70125fd 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,7 +13,6 @@ import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
-import re
 
 class model(parameterised):
     def __init__(self):
@@ -240,7 +239,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(re.escape(param_names[i]))
+                    to_make_positive.append(param_names[i])
                     if warn:
                         print "Warning! constraining %s postive"%name
         if len(to_make_positive):
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index c80926ce..b5d880a3 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -103,18 +103,10 @@ class parameterised(object):
             return expr
 
     def Nparam_transformed(self):
-        """
-        Compute the number of parameters after ties and fixing have been performed
-        """
-        ties = 0
-        for ti in self.tied_indices:
-            ties += ti.size - 1
-
-        fixes = 0
-        for fi in self.constrained_fixed_indices:
-            fixes += len(fi)
-
-        return self.Nparam - fixes - ties
+            ties = 0
+            for ar in self.tied_indices:
+                ties += ar.size - 1
+            return self.Nparam - len(self.constrained_fixed_indices) - ties
 
     def constrain_positive(self, which):
         """
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 327bf69c..f062ee56 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,9 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
-try:
-    from constructors import rbf_sympy, sympykern # these depend on sympy
-except:
-    pass
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 9c2464a7..6a968da4 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -25,7 +25,6 @@ from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
 from rbfcos import rbfcos as rbfcospart
-from independent_outputs import independent_outputs as independent_output_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -166,40 +165,34 @@ def Brownian(D,variance=1.):
     part = Brownianpart(D,variance)
     return kern(D, [part])
 
-try:
-    import sympy as sp
-    from sympykern import spkern
-    from sympy.parsing.sympy_parser import parse_expr
-    sympy_available = True
-except ImportError:
-    sympy_available = False
+import sympy as sp
+from sympykern import spkern
+from sympy.parsing.sympy_parser import parse_expr
 
-if sympy_available:
-    def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
-        """
-        Radial Basis Function covariance.
-        """
-        X = [sp.var('x%i'%i) for i in range(D)]
-        Z = [sp.var('z%i'%i) for i in range(D)]
-        rbf_variance = sp.var('rbf_variance',positive=True)
-        if ARD:
-            rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
-            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
-            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/2.)
-        else:
-            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-            dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-        return kern(D,[spkern(D,f)])
+def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
+    """
+    Radial Basis Function covariance.
+    """
+    X = [sp.var('x%i'%i) for i in range(D)]
+    Z = [sp.var('z%i'%i) for i in range(D)]
+    rbf_variance = sp.var('rbf_variance',positive=True)
+    if ARD:
+        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/2.)
+    else:
+        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+    return kern(D,[spkern(D,f)])
 
-    def sympykern(D,k):
-        """
-        A kernel from a symbolic sympy representation
-        """
-        return kern(D,[spkern(D,k)])
-del sympy_available
+def sympykern(D,k):
+    """
+    A kernel from a symbolic sympy representation
+    """
+    return kern(D,[spkern(D,k)])
 
 def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
     """
@@ -325,14 +318,3 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
     """
     part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
     return kern(D,[part])
-
-def independent_outputs(k):
-    """
-    Construct a kernel with independent outputs from an existing kernel
-    """
-    for sl in k.input_slices:
-        assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
-    parts = [independent_output_part(p) for p in k.parts]
-    return kern(k.D+1,parts)
-
-
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index b1b69325..a76bb31e 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -62,16 +62,11 @@ class coregionalise(kernpart):
         ii,jj = np.meshgrid(index,index2)
         ii,jj = ii.T, jj.T
 
-        #dL_dK_small = np.zeros_like(self.B)
-        #for i in range(self.Nout):
-            #for j in range(self.Nout):
-                #tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
-                #dL_dK_small[i,j] = tmp
-        #as above, but slightly faster
         dL_dK_small = np.zeros_like(self.B)
-        where_i = [ii==i for i in xrange(self.Nout)]
-        where_j = [jj==j for j in xrange(self.Nout)]
-        [[np.put(dL_dK_small,i+self.Nout*j,np.sum(dL_dK[np.logical_and(wi,wj)])) for i,wi in enumerate(where_i)] for j,wj in enumerate(where_j)]
+        for i in range(self.Nout):
+            for j in range(self.Nout):
+                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
+                dL_dK_small[i,j] = tmp
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
deleted file mode 100644
index b94202d7..00000000
--- a/GPy/kern/independent_outputs.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2012, James Hesnsman
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-
-def index_to_slices(index):
-    """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
-
-    e.g.
-    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
-    returns
-    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
-
-    or, a more complicated example
-    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
-    returns
-    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
-    """
-
-    #contruct the return structure
-    ind = np.asarray(index,dtype=np.int64)
-    ret = [[] for i in range(ind.max()+1)]
-
-    #find the switchpoints
-    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
-    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
-
-    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
-    return ret
-
-class independent_outputs(kernpart):
-    """
-    A kernel part shich can reopresent several independent functions.
-    this kernel 'switches off' parts of the matrix where the output indexes are different.
-
-    The index of the functions is given by the last column in the input X
-    the rest of the columns of X are passed to the kernel for computation (in blocks).
-
-    """
-    def __init__(self,k):
-        self.D = k.D + 1
-        self.Nparam = k.Nparam
-        self.name = 'iops('+ k.name + ')'
-        self.k = k
-
-    def _get_params(self):
-        return self.k._get_params()
-
-    def _set_params(self,x):
-        self.k._set_params(x)
-        self.params = x
-
-    def _get_param_names(self):
-        return self.k._get_param_names()
-
-    def K(self,X,X2,target):
-        #Sort out the slices from the input data
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-
-        [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-    def Kdiag(self,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
-
-    def dK_dtheta(self,dL_dK,X,X2,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-
-    def dK_dX(self,dL_dK,X,X2,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-    def dKdiag_dX(self,dL_dKdiag,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
-
-
-    def dKdiag_dtheta(self,dL_dKdiag,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index 2afafe25..fc349da8 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -22,7 +22,6 @@ class prod_orthogonal(kernpart):
         self.k1 = k1
         self.k2 = k2
         self._set_params(np.hstack((k1._get_params(),k2._get_params())))
-        self._X, self._X2, self._params = np.empty(shape=(3,1)) # initialize cache
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -40,38 +39,23 @@ class prod_orthogonal(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        self._K_computations(X,X2)
-        target += self._K1*self._K2
-
-    def _K_computations(self,X,X2):
-        """
-        Compute the two kernel matrices.
-        The computation is only done if needed: many times it will be the same as the previous call
-        """
-        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
-            #store new values in cache
-            self._X = X.copy()
-            self._X2 = X2.copy()
-            self._params = self._get_params().copy()
-
-            #update self._K1, self._K2
-            if X2 is None: X2 = X
-            self._K1 = np.zeros((X.shape[0],X2.shape[0]))
-            self._K2 = np.zeros((X.shape[0],X2.shape[0]))
-            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
-            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
+        if X2 is None: X2 = X
+        target1 = np.zeros_like(target)
+        target2 = np.zeros_like(target)
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
+        target += target1 * target2
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
-        self._K_computations(X,X2)
-        self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+        if X2 is None: X2 = X
+        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        K2 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
 
-    def dK_dX(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to X."""
-        self._K_computations(X,X2)
-        self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -89,6 +73,17 @@ class prod_orthogonal(kernpart):
         self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
         self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
 
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        if X2 is None: X2 = X
+        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        K2 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
+
+        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 16b22094..4d9edacc 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -148,10 +148,7 @@ class sparse_GP(GP):
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
-        tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
-        #tmp = np.dot(tmp,self.Kmmi)
-        tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 - self.Kmmi) + self.E) + tmp # d(C+D)
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From 2c3a53b1740bfbb85a55d827788b1995176bb0b3 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 14:10:38 +0100
Subject: [PATCH 18/54] psi stat tests done and failing gracefully

---
 GPy/testing/psi_stat_tests.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index 93f9867c..22737ca1 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -39,10 +39,6 @@ class PsiStatModel(model):
         self.Z = x[start: end].reshape(self.M, self.Q)
         self.kern._set_params(x[end:])
     def log_likelihood(self):
-#         if '2' in self.which:
-#             norm = self.N ** 2
-#         else:  # '0', '1' in self.which:
-#             norm = self.N
         return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
     def _log_likelihood_gradients(self):
         psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
@@ -64,23 +60,27 @@ class Test(unittest.TestCase):
     Z = numpy.random.permutation(X)[:M]
     Y = X.dot(numpy.random.randn(Q, D))
 
+    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
+               GPy.kern.linear(Q) + GPy.kern.bias(Q),
+               GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
+
     def testPsi0(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1)))
-        assert m.checkgrad(), "linear x psi0"
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi1(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1)))
-        assert(m.checkgrad())
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi2(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1, 1)))
-        assert(m.checkgrad())
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
 
 
 if __name__ == "__main__":

From f01be172beee0e6df3b0447cccbfc4099cf34fdb Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 15:22:30 +0100
Subject: [PATCH 19/54] moved *2. of psi2 statistics into kern and corrected
 bias+linear cross term

---
 GPy/kern/kern.py              | 483 +++++++++++++++++-----------------
 GPy/models/sparse_GP.py       |   4 +-
 GPy/testing/psi_stat_tests.py |  54 ++--
 3 files changed, 282 insertions(+), 259 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 414a911f..d1350be5 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -11,7 +11,7 @@ from prod_orthogonal import prod_orthogonal
 from prod import prod
 
 class kern(parameterised):
-    def __init__(self,D,parts=[], input_slices=None):
+    def __init__(self, D, parts=[], input_slices=None):
         """
         This kernel does 'compound' structures.
 
@@ -37,15 +37,15 @@ class kern(parameterised):
 
         self.D = D
 
-        #deal with input_slices
+        # deal with input_slices
         if input_slices is None:
             self.input_slices = [slice(None) for p in self.parts]
         else:
-            assert len(input_slices)==len(self.parts)
+            assert len(input_slices) == len(self.parts)
             self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
 
         for p in self.parts:
-            assert isinstance(p,kernpart), "bad kernel part"
+            assert isinstance(p, kernpart), "bad kernel part"
 
         self.compute_param_slices()
 
@@ -67,22 +67,22 @@ class kern(parameterised):
                 if p.name == 'linear':
                     ard_params = p.variances
                 else:
-                    ard_params = 1./p.lengthscale
+                    ard_params = 1. / p.lengthscale
 
                 ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
                 ax.set_xticks(np.arange(len(ard_params)),
                               ["${}$".format(i + 1) for i in range(len(ard_params))])
         return ax
 
-    def _transform_gradients(self,g):
+    def _transform_gradients(self, g):
         x = self._get_params()
-        g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
-        g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
-        [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
+        g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
+        g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
+        [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
         if len(self.tied_indices) or len(self.constrained_fixed_indices):
-            to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
-            return np.delete(g,to_remove)
+            to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
+            return np.delete(g, to_remove)
         else:
             return g
 
@@ -91,10 +91,10 @@ class kern(parameterised):
         self.param_slices = []
         count = 0
         for p in self.parts:
-            self.param_slices.append(slice(count,count+p.Nparam))
+            self.param_slices.append(slice(count, count + p.Nparam))
             count += p.Nparam
 
-    def _process_slices(self,slices1=None,slices2=None):
+    def _process_slices(self, slices1=None, slices2=None):
         """
         Format the slices so that they can easily be used.
         Both slices can be any of three things:
@@ -107,13 +107,13 @@ class kern(parameterised):
         returns actual lists of slice objects
         """
         if slices1 is None:
-            slices1 = [slice(None)]*self.Nparts
+            slices1 = [slice(None)] * self.Nparts
         elif all([type(s_i) is bool for s_i in slices1]):
             slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
         else:
             assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
         if slices2 is None:
-            slices2 = [slice(None)]*self.Nparts
+            slices2 = [slice(None)] * self.Nparts
         elif slices2 is False:
             return slices1
         elif all([type(s_i) is bool for s_i in slices2]):
@@ -122,10 +122,10 @@ class kern(parameterised):
             assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
         return slices1, slices2
 
-    def __add__(self,other):
+    def __add__(self, other):
         assert self.D == other.D
-        newkern =  kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
-        #transfer constraints:
+        newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
+        # transfer constraints:
         newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
         newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
         newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
@@ -136,29 +136,29 @@ class kern(parameterised):
         newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
         return newkern
 
-    def add(self,other):
+    def add(self, other):
         """
         Add another kernel to this one. Both kernels are defined on the same _space_
         :param other: the other kernel to be added
         :type other: GPy.kern
         """
-        return self + other
+        return self +other
 
-    def add_orthogonal(self,other):
+    def add_orthogonal(self, other):
         """
         Add another kernel to this one. Both kernels are defined on separate spaces
         :param other: the other kernel to be added
         :type other: GPy.kern
         """
-        #deal with input slices
+        # deal with input slices
         D = self.D + other.D
         self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
         other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
-        other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
+        other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
 
         newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
 
-        #transfer constraints:
+        # transfer constraints:
         newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
         newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
         newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
@@ -169,13 +169,13 @@ class kern(parameterised):
         newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
         return newkern
 
-    def __mul__(self,other):
+    def __mul__(self, other):
         """
         Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
         """
         return self.prod(other)
 
-    def prod(self,other):
+    def prod(self, other):
         """
         multiply two kernels defined on the same spaces.
         :param other: the other kernel to be added
@@ -184,20 +184,20 @@ class kern(parameterised):
         K1 = self.copy()
         K2 = other.copy()
 
-        newkernparts = [prod(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
 
         slices = []
-        for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
-            s1, s2 = [False]*K1.D, [False]*K2.D
+        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
+            s1, s2 = [False] * K1.D, [False] * K2.D
             s1[sl1], s2[sl2] = [True], [True]
-            slices += [s1+s2]
+            slices += [s1 + s2]
 
         newkern = kern(K1.D, newkernparts, slices)
-        newkern._follow_constrains(K1,K2)
+        newkern._follow_constrains(K1, K2)
 
         return newkern
 
-    def prod_orthogonal(self,other):
+    def prod_orthogonal(self, other):
         """
         multiply two kernels. Both kernels are defined on separate spaces.
         :param other: the other kernel to be added
@@ -206,31 +206,31 @@ class kern(parameterised):
         K1 = self.copy()
         K2 = other.copy()
 
-        newkernparts = [prod_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
 
         slices = []
-        for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
-            s1, s2 = [False]*K1.D, [False]*K2.D
+        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
+            s1, s2 = [False] * K1.D, [False] * K2.D
             s1[sl1], s2[sl2] = [True], [True]
-            slices += [s1+s2]
+            slices += [s1 + s2]
 
         newkern = kern(K1.D + K2.D, newkernparts, slices)
-        newkern._follow_constrains(K1,K2)
+        newkern._follow_constrains(K1, K2)
 
         return newkern
 
-    def _follow_constrains(self,K1,K2):
+    def _follow_constrains(self, K1, K2):
 
         # Build the array that allows to go from the initial indices of the param to the new ones
         K1_param = []
         n = 0
         for k1 in K1.parts:
-            K1_param += [range(n,n+k1.Nparam)]
+            K1_param += [range(n, n + k1.Nparam)]
             n += k1.Nparam
         n = 0
         K2_param = []
         for k2 in K2.parts:
-            K2_param += [range(K1.Nparam+n,K1.Nparam+n+k2.Nparam)]
+            K2_param += [range(K1.Nparam + n, K1.Nparam + n + k2.Nparam)]
             n += k2.Nparam
         index_param = []
         for p1 in K1_param:
@@ -254,47 +254,47 @@ class kern(parameterised):
         # follow the previous ties
         for arr in prev_ties:
             for j in arr:
-                index_param[np.where(index_param==j)[0]] = arr[0]
+                index_param[np.where(index_param == j)[0]] = arr[0]
 
         # ties and constrains
         for i in range(K1.Nparam + K2.Nparam):
-            index = np.where(index_param==i)[0]
+            index = np.where(index_param == i)[0]
             if index.size > 1:
                 self.tie_params(index)
         for i in prev_constr_pos:
-            self.constrain_positive(np.where(index_param==i)[0])
+            self.constrain_positive(np.where(index_param == i)[0])
         for i in prev_constr_neg:
-            self.constrain_neg(np.where(index_param==i)[0])
+            self.constrain_neg(np.where(index_param == i)[0])
         for j, i in enumerate(prev_constr_fix):
-            self.constrain_fixed(np.where(index_param==i)[0],prev_constr_fix_values[j])
+            self.constrain_fixed(np.where(index_param == i)[0], prev_constr_fix_values[j])
         for j, i in enumerate(prev_constr_bou):
-            self.constrain_bounded(np.where(index_param==i)[0],prev_constr_bou_low[j],prev_constr_bou_upp[j])
+            self.constrain_bounded(np.where(index_param == i)[0], prev_constr_bou_low[j], prev_constr_bou_upp[j])
 
     def _get_params(self):
         return np.hstack([p._get_params() for p in self.parts])
 
-    def _set_params(self,x):
+    def _set_params(self, x):
         [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
 
     def _get_param_names(self):
-        #this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
-        part_names = np.array([k.name for k in self.parts],dtype=np.str)
-        counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
-        cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
-        names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
+        # this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
+        part_names = np.array([k.name for k in self.parts], dtype=np.str)
+        counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
+        cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
+        names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
 
-        return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
+        return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
 
-    def K(self,X,X2=None,slices1=None,slices2=None):
-        assert X.shape[1]==self.D
-        slices1, slices2 = self._process_slices(slices1,slices2)
+    def K(self, X, X2=None, slices1=None, slices2=None):
+        assert X.shape[1] == self.D
+        slices1, slices2 = self._process_slices(slices1, slices2)
         if X2 is None:
             X2 = X
-        target = np.zeros((X.shape[0],X2.shape[0]))
-        [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        target = np.zeros((X.shape[0], X2.shape[0]))
+        [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
         return target
 
-    def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
+    def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
         """
         :param dL_dK: An array of dL_dK derivaties, dL_dK
         :type dL_dK: Np.ndarray (N x M)
@@ -306,282 +306,283 @@ class kern(parameterised):
         :type slices1: list of slice objects, or list of booleans
         :param slices2: slices for X2
         """
-        assert X.shape[1]==self.D
-        slices1, slices2 = self._process_slices(slices1,slices2)
+        assert X.shape[1] == self.D
+        slices1, slices2 = self._process_slices(slices1, slices2)
         if X2 is None:
             X2 = X
         target = np.zeros(self.Nparam)
-        [p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+        [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
 
         return self._transform_gradients(target)
 
-    def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
+    def dK_dX(self, dL_dK, X, X2=None, slices1=None, slices2=None):
         if X2 is None:
             X2 = X
-        slices1, slices2 = self._process_slices(slices1,slices2)
+        slices1, slices2 = self._process_slices(slices1, slices2)
         target = np.zeros_like(X)
-        [p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dK_dX(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
         return target
 
-    def Kdiag(self,X,slices=None):
-        assert X.shape[1]==self.D
-        slices = self._process_slices(slices,False)
+    def Kdiag(self, X, slices=None):
+        assert X.shape[1] == self.D
+        slices = self._process_slices(slices, False)
         target = np.zeros(X.shape[0])
-        [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
+        [p.Kdiag(X[s, i_s], target=target[s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
         return target
 
-    def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
-        assert X.shape[1]==self.D
-        assert len(dL_dKdiag.shape)==1
-        assert dL_dKdiag.size==X.shape[0]
-        slices = self._process_slices(slices,False)
+    def dKdiag_dtheta(self, dL_dKdiag, X, slices=None):
+        assert X.shape[1] == self.D
+        assert len(dL_dKdiag.shape) == 1
+        assert dL_dKdiag.size == X.shape[0]
+        slices = self._process_slices(slices, False)
         target = np.zeros(self.Nparam)
-        [p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
+        [p.dKdiag_dtheta(dL_dKdiag[s], X[s, i_s], target[ps]) for p, i_s, s, ps in zip(self.parts, self.input_slices, slices, self.param_slices)]
         return self._transform_gradients(target)
 
     def dKdiag_dX(self, dL_dKdiag, X, slices=None):
-        assert X.shape[1]==self.D
-        slices = self._process_slices(slices,False)
+        assert X.shape[1] == self.D
+        slices = self._process_slices(slices, False)
         target = np.zeros_like(X)
-        [p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
+        [p.dKdiag_dX(dL_dKdiag[s], X[s, i_s], target[s, i_s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
         return target
 
-    def psi0(self,Z,mu,S,slices=None):
-        slices = self._process_slices(slices,False)
+    def psi0(self, Z, mu, S, slices=None):
+        slices = self._process_slices(slices, False)
         target = np.zeros(mu.shape[0])
-        [p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
+        [p.psi0(Z, mu[s], S[s], target[s]) for p, s in zip(self.parts, slices)]
         return target
 
-    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
-        slices = self._process_slices(slices,False)
+    def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, slices=None):
+        slices = self._process_slices(slices, False)
         target = np.zeros(self.Nparam)
-        [p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
+        [p.dpsi0_dtheta(dL_dpsi0[s], Z, mu[s], S[s], target[ps]) for p, ps, s in zip(self.parts, self.param_slices, slices)]
         return self._transform_gradients(target)
 
-    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
-        slices = self._process_slices(slices,False)
-        target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
-        [p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
-        return target_mu,target_S
-
-    def psi1(self,Z,mu,S,slices1=None,slices2=None):
-        """Think N,M,Q """
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        target = np.zeros((mu.shape[0],Z.shape[0]))
-        [p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
-        return target
-
-    def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
-        """N,M,(Ntheta)"""
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        target = np.zeros((self.Nparam))
-        [p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
-        return self._transform_gradients(target)
-
-    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
-        """N,M,Q"""
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        target = np.zeros_like(Z)
-        [p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
-        return target
-
-    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
-        """return shapes are N,M,Q"""
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
-        [p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, slices=None):
+        slices = self._process_slices(slices, False)
+        target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
+        [p.dpsi0_dmuS(dL_dpsi0, Z, mu[s], S[s], target_mu[s], target_S[s]) for p, s in zip(self.parts, slices)]
         return target_mu, target_S
 
-    def psi2(self,Z,mu,S,slices1=None,slices2=None):
+    def psi1(self, Z, mu, S, slices1=None, slices2=None):
+        """Think N,M,Q """
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        target = np.zeros((mu.shape[0], Z.shape[0]))
+        [p.psi1(Z[s2], mu[s1], S[s1], target[s1, s2]) for p, s1, s2 in zip(self.parts, slices1, slices2)]
+        return target
+
+    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
+        """N,M,(Ntheta)"""
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        target = np.zeros((self.Nparam))
+        [p.dpsi1_dtheta(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, ps, s1, s2, i_s in zip(self.parts, self.param_slices, slices1, slices2, self.input_slices)]
+        return self._transform_gradients(target)
+
+    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
+        """N,M,Q"""
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        target = np.zeros_like(Z)
+        [p.dpsi1_dZ(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        return target
+
+    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
+        """return shapes are N,M,Q"""
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
+        [p.dpsi1_dmuS(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        return target_mu, target_S
+
+    def psi2(self, Z, mu, S, slices1=None, slices2=None):
         """
         :param Z: np.ndarray of inducing inputs (M x Q)
         :param mu, S: np.ndarrays of means and variances (each N x Q)
         :returns psi2: np.ndarray (N,M,M)
         """
-        target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        [p.psi2(Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s1, s2, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
 
-        #compute the "cross" terms
-        for p1, p2 in itertools.combinations(self.parts,2):
-            #white doesn;t combine with anything
-            if p1.name=='white' or p2.name=='white':
+        # compute the "cross" terms
+        for p1, p2 in itertools.combinations(self.parts, 2):
+            # white doesn;t combine with anything
+            if p1.name == 'white' or p2.name == 'white':
                 pass
-            #rbf X bias
-            elif p1.name=='bias' and p2.name=='rbf':
-                target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
-            elif p2.name=='bias' and p1.name=='rbf':
-                target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
-            #linear X bias
-            elif p1.name=='bias' and p2.name=='linear':
-                tmp = np.zeros((mu.shape[0],Z.shape[0]))
-                p2.psi1(Z,mu,S,tmp)
-                target += p1.variance*(tmp[:,:,None] + tmp[:,None,:])
-            elif p2.name=='bias' and p1.name=='linear':
-                tmp = np.zeros((mu.shape[0],Z.shape[0]))
-                p1.psi1(Z,mu,S,tmp)
-                target += p2.variance*(tmp[:,:,None] + tmp[:,None,:])
-            #rbf X linear
-            elif p1.name=='linear' and p2.name=='rbf':
-                raise NotImplementedError #TODO
-            elif p2.name=='linear' and p1.name=='rbf':
-                raise NotImplementedError #TODO
+            # rbf X bias
+            elif p1.name == 'bias' and p2.name == 'rbf':
+                target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
+            elif p2.name == 'bias' and p1.name == 'rbf':
+                target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
+            # linear X bias
+            elif p1.name == 'bias' and p2.name == 'linear':
+                tmp = np.zeros((mu.shape[0], Z.shape[0]))
+                p2.psi1(Z, mu, S, tmp)
+                target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
+            elif p2.name == 'bias' and p1.name == 'linear':
+                tmp = np.zeros((mu.shape[0], Z.shape[0]))
+                p1.psi1(Z, mu, S, tmp)
+                target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
+            # rbf X linear
+            elif p1.name == 'linear' and p2.name == 'rbf':
+                raise NotImplementedError  # TODO
+            elif p2.name == 'linear' and p1.name == 'rbf':
+                raise NotImplementedError  # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
         return target
 
-    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
         """Returns shape (N,M,M,Ntheta)"""
-        slices1, slices2 = self._process_slices(slices1,slices2)
+        slices1, slices2 = self._process_slices(slices1, slices2)
         target = np.zeros(self.Nparam)
-        [p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
+        [p.dpsi2_dtheta(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, i_s, s1, s2, ps in zip(self.parts, self.input_slices, slices1, slices2, self.param_slices)]
 
-        #compute the "cross" terms
-        #TODO: better looping
-        for i1, i2 in itertools.combinations(range(len(self.parts)),2):
-            p1,p2 = self.parts[i1], self.parts[i2]
-            ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
+        # compute the "cross" terms
+        # TODO: better looping
+        for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
+            p1, p2 = self.parts[i1], self.parts[i2]
+#             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
             ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
 
-            #white doesn;t combine with anything
-            if p1.name=='white' or p2.name=='white':
+            # white doesn;t combine with anything
+            if p1.name == 'white' or p2.name == 'white':
                 pass
-            #rbf X bias
-            elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
-            elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
-            #linear X bias
-            elif p1.name=='bias' and p2.name=='linear':
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2., Z, mu, S, target[ps1])
-            elif p2.name=='bias' and p1.name=='linear':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2., Z, mu, S, target[ps1])
-            #rbf X linear
-            elif p1.name=='linear' and p2.name=='rbf':
-                raise NotImplementedError #TODO
-            elif p2.name=='linear' and p1.name=='rbf':
-                raise NotImplementedError #TODO
+            # rbf X bias
+            elif p1.name == 'bias' and p2.name == 'rbf':
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
+            elif p2.name == 'bias' and p1.name == 'rbf':
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
+            # linear X bias
+            elif p1.name == 'bias' and p2.name == 'linear':
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target)
+            elif p2.name == 'bias' and p1.name == 'linear':
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target)
+                pass
+            # rbf X linear
+            elif p1.name == 'linear' and p2.name == 'rbf':
+                raise NotImplementedError  # TODO
+            elif p2.name == 'linear' and p1.name == 'rbf':
+                raise NotImplementedError  # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
         return self._transform_gradients(target)
 
-    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
-        slices1, slices2 = self._process_slices(slices1,slices2)
+    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
+        slices1, slices2 = self._process_slices(slices1, slices2)
         target = np.zeros_like(Z)
-        [p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        [p.dpsi2_dZ(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
 
-        #compute the "cross" terms
-        for p1, p2 in itertools.combinations(self.parts,2):
-            #white doesn;t combine with anything
-            if p1.name=='white' or p2.name=='white':
+        # compute the "cross" terms
+        for p1, p2 in itertools.combinations(self.parts, 2):
+            # white doesn;t combine with anything
+            if p1.name == 'white' or p2.name == 'white':
                 pass
-            #rbf X bias
-            elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
-            elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
-            #linear X bias
-            elif p1.name=='bias' and p2.name=='linear':
-                p2.dpsi1_dZ(dL_dpsi2.sum(1).T*p1.variance, Z, mu, S, target)
-            elif p2.name=='bias' and p1.name=='linear':
-                p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance, Z, mu, S, target)
-            #rbf X linear
-            elif p1.name=='linear' and p2.name=='rbf':
-                raise NotImplementedError #TODO
-            elif p2.name=='linear' and p1.name=='rbf':
-                raise NotImplementedError #TODO
+            # rbf X bias
+            elif p1.name == 'bias' and p2.name == 'rbf':
+                p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
+            elif p2.name == 'bias' and p1.name == 'rbf':
+                p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
+            # linear X bias
+            elif p1.name == 'bias' and p2.name == 'linear':
+                p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
+            elif p2.name == 'bias' and p1.name == 'linear':
+                p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
+            # rbf X linear
+            elif p1.name == 'linear' and p2.name == 'rbf':
+                raise NotImplementedError  # TODO
+            elif p2.name == 'linear' and p1.name == 'rbf':
+                raise NotImplementedError  # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
 
-        return target
+        return target * 2.
 
-    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
         """return shapes are N,M,M,Q"""
-        slices1, slices2 = self._process_slices(slices1,slices2)
-        target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
-        [p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        slices1, slices2 = self._process_slices(slices1, slices2)
+        target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
+        [p.dpsi2_dmuS(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
 
-        #compute the "cross" terms
-        for p1, p2 in itertools.combinations(self.parts,2):
-            #white doesn;t combine with anything
-            if p1.name=='white' or p2.name=='white':
+        # compute the "cross" terms
+        for p1, p2 in itertools.combinations(self.parts, 2):
+            # white doesn;t combine with anything
+            if p1.name == 'white' or p2.name == 'white':
                 pass
-            #rbf X bias
-            elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
-            elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
-            #linear X bias
-            elif p1.name=='bias' and p2.name=='linear':
-                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2., Z, mu, S, target_mu, target_S)
-            elif p2.name=='bias' and p1.name=='linear':
-                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2., Z, mu, S, target_mu, target_S)
-            #rbf X linear
-            elif p1.name=='linear' and p2.name=='rbf':
-                raise NotImplementedError #TODO
-            elif p2.name=='linear' and p1.name=='rbf':
-                raise NotImplementedError #TODO
+            # rbf X bias
+            elif p1.name == 'bias' and p2.name == 'rbf':
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
+            elif p2.name == 'bias' and p1.name == 'rbf':
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
+            # linear X bias
+            elif p1.name == 'bias' and p2.name == 'linear':
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
+            elif p2.name == 'bias' and p1.name == 'linear':
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
+            # rbf X linear
+            elif p1.name == 'linear' and p2.name == 'rbf':
+                raise NotImplementedError  # TODO
+            elif p2.name == 'linear' and p1.name == 'rbf':
+                raise NotImplementedError  # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
         return target_mu, target_S
 
-    def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):
-        if which_functions=='all':
-            which_functions = [True]*self.Nparts
+    def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
+        if which_functions == 'all':
+            which_functions = [True] * self.Nparts
         if self.D == 1:
             if x is None:
-                x = np.zeros((1,1))
+                x = np.zeros((1, 1))
             else:
                 x = np.asarray(x)
                 assert x.size == 1, "The size of the fixed variable x is not 1"
-                x = x.reshape((1,1))
+                x = x.reshape((1, 1))
 
             if plot_limits == None:
-                xmin, xmax = (x-5).flatten(), (x+5).flatten()
+                xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
             elif len(plot_limits) == 2:
                 xmin, xmax = plot_limits
             else:
                 raise ValueError, "Bad limits for plotting"
 
-            Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None]
-            Kx = self.K(Xnew,x,slices2=which_functions)
-            pb.plot(Xnew,Kx,*args,**kwargs)
-            pb.xlim(xmin,xmax)
+            Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
+            Kx = self.K(Xnew, x, slices2=which_functions)
+            pb.plot(Xnew, Kx, *args, **kwargs)
+            pb.xlim(xmin, xmax)
             pb.xlabel("x")
-            pb.ylabel("k(x,%0.1f)" %x)
+            pb.ylabel("k(x,%0.1f)" % x)
 
         elif self.D == 2:
             if x is None:
-                x = np.zeros((1,2))
+                x = np.zeros((1, 2))
             else:
                 x = np.asarray(x)
                 assert x.size == 2, "The size of the fixed variable x is not 2"
-                x = x.reshape((1,2))
+                x = x.reshape((1, 2))
 
             if plot_limits == None:
-                xmin, xmax = (x-5).flatten(), (x+5).flatten()
+                xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
             elif len(plot_limits) == 2:
                 xmin, xmax = plot_limits
             else:
                 raise ValueError, "Bad limits for plotting"
 
             resolution = resolution or 51
-            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            xg = np.linspace(xmin[0],xmax[0],resolution)
-            yg = np.linspace(xmin[1],xmax[1],resolution)
-            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
-            Kx = self.K(Xnew,x,slices2=which_functions)
-            Kx = Kx.reshape(resolution,resolution).T
-            pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet,*args,**kwargs)
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
+            xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
+            xg = np.linspace(xmin[0], xmax[0], resolution)
+            yg = np.linspace(xmin[1], xmax[1], resolution)
+            Xnew = np.vstack((xx.flatten(), yy.flatten())).T
+            Kx = self.K(Xnew, x, slices2=which_functions)
+            Kx = Kx.reshape(resolution, resolution).T
+            pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
+            pb.xlim(xmin[0], xmax[0])
+            pb.ylim(xmin[1], xmax[1])
             pb.xlabel("x1")
             pb.ylabel("x2")
-            pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) )
+            pb.title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1]))
         else:
             raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 4d9edacc..a6bd6b74 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -239,10 +239,10 @@ class sparse_GP(GP):
         """
         The derivative of the bound wrt the inducing inputs Z
         """
-        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
+        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z)  # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
         if self.has_uncertain_inputs:
             dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
-            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_variance) # 'stripes'
+            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
         else:
             dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
         return dL_dZ
diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index 22737ca1..c500f5d6 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -12,18 +12,17 @@ import itertools
 from GPy.core import model
 
 class PsiStatModel(model):
-    def __init__(self, which, X, X_variance, Z, M, kernel, mu_or_S, dL_=numpy.ones((1, 1))):
+    def __init__(self, which, X, X_variance, Z, M, kernel):
         self.which = which
-        self.dL_ = dL_
         self.X = X
         self.X_variance = X_variance
         self.Z = Z
         self.N, self.Q = X.shape
         self.M, Q = Z.shape
-        self.mu_or_S = mu_or_S
         assert self.Q == Q, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
         self.kern = kernel
         super(PsiStatModel, self).__init__()
+        self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
     def _get_param_names(self):
         Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.Q))]
         Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.Q))]
@@ -41,13 +40,12 @@ class PsiStatModel(model):
     def log_likelihood(self):
         return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
     def _log_likelihood_gradients(self):
-        psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
-        psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance)
+        psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
         try:
-            psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance)
+            psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
         except AttributeError:
             psiZ = numpy.zeros(self.M * self.Q)
-        thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(psi_), self.Z, self.X, self.X_variance).flatten()
+        thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
         return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
 
 class Test(unittest.TestCase):
@@ -72,15 +70,35 @@ class Test(unittest.TestCase):
 
     def testPsi1(self):
         for k in self.kernels:
-            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+            m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
                          M=self.M, kernel=k)
             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 
-    def testPsi2(self):
-        for k in self.kernels:
-            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
-                         M=self.M, kernel=k)
-            assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
+    def testPsi2_lin(self):
+        k = self.kernels[0]
+        m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     M=self.M, kernel=k)
+        assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
+    def testPsi2_lin_bia(self):
+        k = self.kernels[3]
+        m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     M=self.M, kernel=k)
+        assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
+    def testPsi2_rbf(self):
+        k = self.kernels[1]
+        m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     M=self.M, kernel=k)
+        assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
+    def testPsi2_rbf_bia(self):
+        k = self.kernels[-1]
+        m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     M=self.M, kernel=k)
+        assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
+    def testPsi2_bia(self):
+        k = self.kernels[2]
+        m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     M=self.M, kernel=k)
+        assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
 
 
 if __name__ == "__main__":
@@ -94,9 +112,13 @@ if __name__ == "__main__":
     Y = X.dot(numpy.random.randn(Q, D))
     kernel = GPy.kern.linear(Q)  # GPy.kern.bias(Q)  # GPy.kern.linear(Q) + GPy.kern.rbf(Q)
     m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1)))
+                     M=M, kernel=GPy.kern.linear(Q))
     m1 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1)))
+                     M=M, kernel=GPy.kern.bias(Q))
     m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL_=numpy.ones((1, 1, 1)))
+                     M=M, kernel=GPy.kern.rbf(Q))
+    m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
+    m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
 

From 743112c448f9753bb8e76654928aa09ffa852ad9 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 15:52:43 +0100
Subject: [PATCH 20/54] psi1 not working (strange transposes)

---
 GPy/testing/psi_stat_tests.py | 64 ++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index c500f5d6..6aeea60c 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -68,11 +68,11 @@ class Test(unittest.TestCase):
                          M=self.M, kernel=k)
             assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
 
-    def testPsi1(self):
-        for k in self.kernels:
-            m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
-                         M=self.M, kernel=k)
-            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
+#     def testPsi1(self):
+#         for k in self.kernels:
+#             m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
+#                      M=self.M, kernel=k)
+#             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi2_lin(self):
         k = self.kernels[0]
@@ -102,23 +102,39 @@ class Test(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    Q = 5
-    N = 50
-    M = 10
-    D = 10
-    X = numpy.random.randn(N, Q)
-    X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
-    Z = numpy.random.permutation(X)[:M]
-    Y = X.dot(numpy.random.randn(Q, D))
-    kernel = GPy.kern.linear(Q)  # GPy.kern.bias(Q)  # GPy.kern.linear(Q) + GPy.kern.rbf(Q)
-    m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=GPy.kern.linear(Q))
-    m1 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=GPy.kern.bias(Q))
-    m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=GPy.kern.rbf(Q))
-    m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
-    m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
+    import sys
+    interactive = 'i' in sys.argv
+    if interactive:
+        Q = 5
+        N = 50
+        M = 10
+        D = 10
+        X = numpy.random.randn(N, Q)
+        X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
+        Z = numpy.random.permutation(X)[:M]
+        Y = X.dot(numpy.random.randn(Q, D))
+        kernel = GPy.kern.bias(Q)
 
+        kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
+               GPy.kern.linear(Q) + GPy.kern.bias(Q),
+               GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
+
+        for k in kernels:
+            m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+                     M=M, kernel=k)
+            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
+#
+#         m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
+#                          M=M, kernel=GPy.kern.linear(Q))
+#         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+#                          M=M, kernel=kernel)
+        m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+                         M=M, kernel=kernel)
+        m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                         M=M, kernel=GPy.kern.rbf(Q))
+        m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                         M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
+        m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                         M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
+    else:
+        unittest.main()

From 389a04d2b55d46747d1cb3e10464a33deb351349 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 16:21:41 +0100
Subject: [PATCH 21/54] bugfix: cross term psi1   bias + linear

---
 GPy/kern/kern.py              | 11 ++++++++---
 GPy/kern/linear.py            |  2 +-
 GPy/testing/bgplvm_tests.py   |  2 +-
 GPy/testing/psi_stat_tests.py | 24 ++++++++++++++++++------
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index d1350be5..a65c2aa3 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -455,10 +455,15 @@ class kern(parameterised):
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
             # linear X bias
             elif p1.name == 'bias' and p2.name == 'linear':
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target)
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])  # [ps1])
+                psi1 = np.zeros((mu.shape[0], Z.shape[0]))
+                p2.psi1(Z, mu, S, psi1)
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
             elif p2.name == 'bias' and p1.name == 'linear':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target)
-                pass
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
+                psi1 = np.zeros((mu.shape[0], Z.shape[0]))
+                p1.psi1(Z, mu, S, psi1)
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
             # rbf X linear
             elif p1.name == 'linear' and p2.name == 'rbf':
                 raise NotImplementedError  # TODO
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 6d2a3e48..78a8732a 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -114,7 +114,7 @@ class linear(kernpart):
 
     def psi1(self,Z,mu,S,target):
         """the variance, it does nothing"""
-        self.K(mu,Z,target)
+        self._psi1 = self.K(mu, Z, target)
 
     def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
         """the variance, it does nothing"""
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index b11b4532..5396e175 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -60,7 +60,7 @@ class BGPLVMTests(unittest.TestCase):
 
     #@unittest.skip('psi2 cross terms are NotImplemented for this combination')
     def test_linear_bias_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
+        N, M, Q, D = 30, 5, 4, 30
         X = np.random.rand(N, Q)
         k = GPy.kern.linear(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
         K = k.K(X)
diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index 6aeea60c..1a14e088 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -105,6 +105,18 @@ if __name__ == "__main__":
     import sys
     interactive = 'i' in sys.argv
     if interactive:
+        N, M, Q, D = 30, 5, 4, 30
+        X = numpy.random.rand(N, Q)
+        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
+        m.ensure_default_constraints()
+        m.randomize()
+#         self.assertTrue(m.checkgrad())
+
         Q = 5
         N = 50
         M = 10
@@ -119,17 +131,17 @@ if __name__ == "__main__":
                GPy.kern.linear(Q) + GPy.kern.bias(Q),
                GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
 
-        for k in kernels:
-            m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=k)
-            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
+#         for k in kernels:
+#             m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+#                      M=M, kernel=k)
+#             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 #
 #         m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
 #                          M=M, kernel=GPy.kern.linear(Q))
 #         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
 #                          M=M, kernel=kernel)
-        m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
-                         M=M, kernel=kernel)
+#         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
+#                          M=M, kernel=kernel)
         m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
                          M=M, kernel=GPy.kern.rbf(Q))
         m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,

From ebc3b8756e9ba8bc8b9f94413ee2b818f699060d Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 16:34:01 +0100
Subject: [PATCH 22/54] psi_stat_test stash

---
 GPy/testing/psi_stat_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index 1a14e088..044f7fca 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -57,6 +57,7 @@ class Test(unittest.TestCase):
     X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
     Z = numpy.random.permutation(X)[:M]
     Y = X.dot(numpy.random.randn(Q, D))
+    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q)]
 
     kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
                GPy.kern.linear(Q) + GPy.kern.bias(Q),

From 264f0d21b61ec0ec964fa4df9e33171af40dcfac Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 16:34:31 +0100
Subject: [PATCH 23/54] kern stash conflict

---
 GPy/examples/dimensionality_reduction.py |  84 +++++++----
 GPy/inference/natural_gradient_scg.py    | 146 +++++++++++++++++++
 GPy/models/Bayesian_GPLVM.py             | 178 ++++++++++++++++++++++-
 GPy/models/mrd.py                        |  23 ---
 4 files changed, 370 insertions(+), 61 deletions(-)
 create mode 100644 GPy/inference/natural_gradient_scg.py

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 8c8e23fe..e5f50237 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -112,14 +112,14 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     s3 = s3(x)
     sS = sS(x)
 
-    s1 -= s1.mean()
-    s2 -= s2.mean()
-    s3 -= s3.mean()
-    sS -= sS.mean()
-    s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
-    s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
-    s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
-    sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
+#     s1 -= s1.mean()
+#     s2 -= s2.mean()
+#     s3 -= s3.mean()
+#     sS -= sS.mean()
+#     s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
+#     s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
+#     s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
+#     sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
 
     S1 = np.hstack([s1, sS])
     S2 = np.hstack([s2, sS])
@@ -129,9 +129,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
     Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
 
-    Y1 += .5 * np.random.randn(*Y1.shape)
-    Y2 += .5 * np.random.randn(*Y2.shape)
-    Y3 += .5 * np.random.randn(*Y3.shape)
+    Y1 += .3 * np.random.randn(*Y1.shape)
+    Y2 += .3 * np.random.randn(*Y2.shape)
+    Y3 += .3 * np.random.randn(*Y3.shape)
 
     Y1 -= Y1.mean(0)
     Y2 -= Y2.mean(0)
@@ -162,8 +162,11 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
 
     return slist, [S1, S2, S3], Ylist
 
-def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
-    D1, D2, D3, N, M, Q = 2000, 8, 8, 500, 2, 6
+def bgplvm_simulation(burnin='scg', plot_sim=False,
+                      max_burnin=100, true_X=False,
+                      do_opt=True,
+                      max_f_eval=1000):
+    D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
@@ -171,53 +174,73 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
     reload(mrd); reload(kern)
 
 
-    Y = Ylist[1]
+    Y = Ylist[0]
 
     k = kern.linear(Q, ARD=True) + kern.white(Q, .00001)  # + kern.bias(Q)
-    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
+#     k = kern.white(Q, .00001) + kern.bias(Q)
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
     # m.set('noise',)
+    m.ensure_default_constraints()
 #     m.auto_scale_factor = True
 #     m.scale_factor = 1.
-    m.ensure_default_constraints()
 
 
     if burnin:
         print "initializing beta"
         cstr = "noise"
-        m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 100.)
-        m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
+        m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 70.)
+        m.optimize(burnin, messages=1, max_f_eval=max_burnin)
 
         print "releasing beta"
         cstr = "noise"
         m.unconstrain(cstr);  m.constrain_positive(cstr)
 
-    true_X = np.hstack((slist[1], slist[3], 0. * np.ones((N, Q - 2))))
-    m.set('X_\d', true_X)
-    m.constrain_fixed("X_\d")
+    if true_X:
+        true_X = np.hstack((slist[0], slist[3], 0. * np.ones((N, Q - 2))))
+        m.set('X_\d', true_X)
+        m.constrain_fixed("X_\d")
 
-# #     cstr = 'variance'
-# #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
+        cstr = 'X_variance'
+#         m.unconstrain(cstr), m.constrain_fixed(cstr, .0001)
+        m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-7, .1)
+
+#     cstr = 'X_variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
+
+    m.set('X_var', np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01)
+
+#     cstr = "iip"
+#     m.unconstrain(cstr); m.constrain_fixed(cstr)
+
+#     cstr = 'variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
 #     cstr = 'X_\d'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, -100., 100.)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, -10., 10.)
 #
 #     cstr = 'noise'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-5, 1.)
 #
 #     cstr = 'white'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
 #
 #     cstr = 'linear_variance'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)  # m.constrain_positive(cstr)
-#
-#     cstr = 'X_variance'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)  # m.constrain_positive(cstr)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
+
+#     cstr = 'variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
 
 #     np.seterr(all='call')
 #     def ipdbonerr(errtype, flags):
 #         import ipdb; ipdb.set_trace()
 #     np.seterrcall(ipdbonerr)
 
-
+    if do_opt and burnin:
+        try:
+            m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
+        except:
+            pass
+        finally:
+            return m
     return m
 
 def mrd_simulation(plot_sim=False):
@@ -261,6 +284,7 @@ def mrd_simulation(plot_sim=False):
         m.set('{}_noise'.format(i + 1), Y.var() / 100.)
 
     m.ensure_default_constraints()
+    m.auto_scale_factor = True
 
 #     cstr = 'variance'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
diff --git a/GPy/inference/natural_gradient_scg.py b/GPy/inference/natural_gradient_scg.py
new file mode 100644
index 00000000..ca42acfe
--- /dev/null
+++ b/GPy/inference/natural_gradient_scg.py
@@ -0,0 +1,146 @@
+#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
+
+#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
+
+#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+#      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+#      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+#      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+#      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+#      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+#      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+#      HOWEVER CAUSED AND ON ANY THEORY OF
+#      LIABILITY, WHETHER IN CONTRACT, STRICT
+#      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#      POSSIBILITY OF SUCH DAMAGE.
+
+
+import numpy as np
+import sys
+
+def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
+    """
+    Optimisation through Scaled Conjugate Gradients (SCG)
+
+    f: the objective function
+    gradf : the gradient function (should return a 1D np.ndarray)
+    x : the initial condition
+
+    Returns
+    x the optimal value for x
+    flog : a list of all the objective values
+
+    """
+
+    sigma0 = 1.0e-4
+    fold = f(x, *optargs)	# Initial function value.
+    function_eval = 1
+    fnow = fold
+    gradnew = gradf(x, *optargs)	# Initial gradient.
+    gradold = gradnew.copy()
+    d = -gradnew				# Initial search direction.
+    success = True				# Force calculation of directional derivs.
+    nsuccess = 0				# nsuccess counts number of successes.
+    beta = 1.0				# Initial scale parameter.
+    betamin = 1.0e-15 			# Lower bound on scale.
+    betamax = 1.0e100			# Upper bound on scale.
+    status = "Not converged"
+
+    flog = [fold]
+
+    iteration = 0
+
+    # Main optimization loop.
+    while iteration < maxiters:
+
+        # Calculate first and second directional derivatives.
+        if success:
+            mu = np.dot(d, gradnew)
+            if mu >= 0:
+                d = -gradnew
+                mu = np.dot(d, gradnew)
+            kappa = np.dot(d, d)
+            sigma = sigma0/np.sqrt(kappa)
+            xplus = x + sigma*d
+            gplus = gradf(xplus, *optargs)
+            theta = np.dot(d, (gplus - gradnew))/sigma
+
+        # Increase effective curvature and evaluate step size alpha.
+        delta = theta + beta*kappa
+        if delta <= 0:
+            delta = beta*kappa
+            beta = beta - theta/kappa
+
+        alpha = - mu/delta
+
+        # Calculate the comparison ratio.
+        xnew = x + alpha*d
+        fnew = f(xnew, *optargs)
+        function_eval += 1
+
+        if function_eval >= max_f_eval:
+            status = "Maximum number of function evaluations exceeded"
+            return x, flog, function_eval, status
+
+        Delta = 2.*(fnew - fold)/(alpha*mu)
+        if Delta  >= 0.:
+            success = True
+            nsuccess += 1
+            x = xnew
+            fnow = fnew
+        else:
+            success = False
+            fnow = fold
+
+        # Store relevant variables
+        flog.append(fnow)		# Current function value
+
+        iteration += 1
+        if display:
+            print '\r',
+            print 'Iteration: {0:>5g}  Objective:{1:> 12e}  Scale:{2:> 12e}'.format(iteration, fnow, beta),
+            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+            sys.stdout.flush()
+
+        if success:
+            # Test for termination
+            if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
+                status='converged'
+                return x, flog, function_eval, status
+
+            else:
+                # Update variables for new position
+                fold = fnew
+                gradold = gradnew
+                gradnew = gradf(x, *optargs)
+                # If the gradient is zero then we are done.
+                if np.dot(gradnew,gradnew) == 0:
+                    return x, flog, function_eval, status
+
+        # Adjust beta according to comparison ratio.
+        if Delta < 0.25:
+            beta = min(4.0*beta, betamax)
+        if Delta > 0.75:
+            beta = max(0.5*beta, betamin)
+
+        # Update search direction using Polak-Ribiere formula, or re-start
+        # in direction of negative gradient after nparams steps.
+        if nsuccess == x.size:
+            d = -gradnew
+            nsuccess = 0
+        elif success:
+            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
+            d = gamma*d - gradnew
+
+    # If we get here, then we haven't terminated in the given number of
+    # iterations.
+    status = "maxiter exceeded"
+
+    return x, flog, function_eval, status
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index a23368de..0646b25f 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -10,6 +10,7 @@ from GPy.util.linalg import pdinv
 from ..likelihoods import Gaussian
 from .. import kern
 from numpy.linalg.linalg import LinAlgError
+import itertools
 
 class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
@@ -23,7 +24,9 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, oldpsave=5, **kwargs):
+    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
+                 Z=None, kernel=None, oldpsave=5, _debug=False,
+                 **kwargs):
         if X == None:
             X = self.initialise_latent(init, Q, Y)
 
@@ -39,6 +42,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
         self.oldpsave = oldpsave
         self._oldps = []
+        self._debug = _debug
+
+        if self._debug:
+            self._count = itertools.count()
+            self._savedklll = []
+            self._savedparams = []
 
         sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
 
@@ -70,16 +79,18 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
         return x
 
-    def _set_params(self, x, save_old=True):
+    def _set_params(self, x, save_old=True, save_count=0):
         try:
             N, Q = self.N, self.Q
             self.X = x[:self.X.size].reshape(N, Q).copy()
             self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
             sparse_GP._set_params(self, x[(2 * N * Q):])
             self.oldps = x
-        except (LinAlgError, FloatingPointError):
-            print "\rWARNING: Caught LinAlgError, reconstructing old state            "
-            self._set_params(self.oldps[-1], save_old=False)
+        except (LinAlgError, FloatingPointError, ZeroDivisionError):
+            print "\rWARNING: Caught LinAlgError, continueing without setting            "
+#             if save_count > 10:
+#                 raise
+#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
 
     def dKL_dmuS(self):
         dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@@ -103,15 +114,29 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
     def log_likelihood(self):
         ll = sparse_GP.log_likelihood(self)
         kl = self.KL_divergence()
-        return ll + kl
+
+#         if ll < -2E4:
+#             ll = -2E4 + np.random.randn()
+#         if kl > 5E4:
+#             kl = 5E4 + np.random.randn()
+
+        if self._debug:
+            f_call = self._count.next()
+            self._savedklll.append([f_call, ll, kl])
+            if f_call % 1 == 0:
+                self._savedparams.append([f_call, self._get_params()])
+
+
+        # print "\nkl:", kl, "ll:", ll
+        return ll - kl
 
     def _log_likelihood_gradients(self):
         dKL_dmu, dKL_dS = self.dKL_dmuS()
         dL_dmu, dL_dS = self.dL_dmuS()
         # TODO: find way to make faster
 
-        d_dmu = (dL_dmu + dKL_dmu).flatten()
-        d_dS = (dL_dS + dKL_dS).flatten()
+        d_dmu = (dL_dmu - dKL_dmu).flatten()
+        d_dS = (dL_dS - dKL_dS).flatten()
         # TEST KL: ====================
         # d_dmu = (dKL_dmu).flatten()
         # d_dS = (dKL_dS).flatten()
@@ -135,3 +160,140 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
         ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
         return ax
+
+    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
+        import pylab
+
+        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
+        if colors is None:
+            colors = pylab.gca()._get_lines.color_cycle
+            pylab.clf()
+        plots = []
+        for i in range(self.X.shape[1]):
+            if axes is None:
+                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
+            else:
+                ax = axes[i]
+            ax.plot(self.X, c='k', alpha=.3)
+            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
+            ax.fill_between(np.arange(self.X.shape[0]),
+                            self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
+                            self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
+                            facecolor=plots[-1].get_color(),
+                            alpha=.3)
+            ax.legend(borderaxespad=0.)
+            if i < self.X.shape[1] - 1:
+                ax.set_xticklabels('')
+        pylab.draw()
+        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
+        return fig
+
+    def _debug_filter_params(self, x):
+        start, end = 0, self.X.size,
+        X = x[start:end].reshape(self.N, self.Q)
+        start, end = end, end + self.X_variance.size
+        X_v = x[start:end].reshape(self.N, self.Q)
+        start, end = end, end + (self.M * self.Q)
+        Z = x[start:end].reshape(self.M, self.Q)
+        start, end = end, end + self.Q
+        theta = x[start:]
+        return X, X_v, Z, theta
+
+    def _debug_plot(self):
+        assert self._debug, "must enable _debug, to debug-plot"
+        import pylab
+        from mpl_toolkits.mplot3d import Axes3D
+        fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
+        fig.clf()
+
+        # log like
+        splotshape = (6, 4)
+        ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+        ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
+                 ha='center', va='center')
+        kllls = np.array(self._savedklll)
+        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], label=r'$\log p(\mathbf{Y})$', mew=1.5)
+        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
+        L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
+
+        drawn = dict(self._savedparams)
+        iters = np.array(drawn.keys())
+        self.showing = 0
+
+        ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+        ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
+                 ha='center', va='center')
+        ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
+                 ha='center', va='center')
+        ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
+                 ha='center', va='center')
+        ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
+                 ha='center', va='center')
+
+        X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+        Xlatentplts = ax2.plot(X, ls="-", marker="x")
+        Slatentplts = ax3.plot(S, ls="-", marker="x")
+        Zplts = ax4.plot(Z, ls="-", marker="x")
+        thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
+        ax5.set_xticks(np.arange(len(theta)))
+        ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
+
+        Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+                   borderaxespad=0, mode="expand")
+        Lleg = ax1.legend()
+        Lleg.draggable()
+        ax1.add_artist(Qleg)
+
+        indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
+        indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
+        indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
+
+        try:
+            pylab.draw()
+            pylab.tight_layout(box=(0, .1, 1, .9))
+        except:
+            pass
+
+        # parameter changes
+        # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
+        def onclick(event):
+            if event.inaxes is ax1 and event.button == 1:
+#               event.button, event.x, event.y, event.xdata, event.ydata)
+                tmp = np.abs(iters - event.xdata)
+                closest_hit = iters[tmp == tmp.min()][0]
+
+                if closest_hit != self.showing:
+                    self.showing = closest_hit
+                    # print closest_hit, iters, event.xdata
+
+                    indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
+                    indicatorKL.set_data(self.showing, kllls[self.showing, 2])
+                    indicatorL.set_data(self.showing, kllls[self.showing, 1])
+
+                    X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+                    for i, Xlatent in enumerate(Xlatentplts):
+                        Xlatent.set_ydata(X[:, i])
+                    for i, Slatent in enumerate(Slatentplts):
+                        Slatent.set_ydata(S[:, i])
+                    for i, Zlatent in enumerate(Zplts):
+                        Zlatent.set_ydata(Z[:, i])
+                    for p, t in zip(thetaplts, theta):
+                        p.set_height(t)
+
+                    ax2.relim()
+                    ax3.relim()
+                    ax4.relim()
+                    ax5.relim()
+                    ax2.autoscale()
+                    ax3.autoscale()
+                    ax4.autoscale()
+                    ax5.autoscale()
+                    fig.canvas.draw()
+
+        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+
+        return ax1, ax2, ax3, ax4, ax5
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 096c9cb9..4e0487b2 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -287,29 +287,6 @@ class MRD(model):
         else:
             return pylab.gcf()
 
-    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
-        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
-        if colors is None:
-            colors = pylab.gca()._get_lines.color_cycle
-            pylab.clf()
-        plots = []
-        for i in range(self.X.shape[1]):
-            if axes is None:
-                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
-            ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
-            ax.fill_between(numpy.arange(self.X.shape[0]),
-                            self.X.T[i] - 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            self.X.T[i] + 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            facecolor=plots[-1].get_color(),
-                            alpha=.3)
-            ax.legend(borderaxespad=0.)
-            if i < self.X.shape[1] - 1:
-                ax.set_xticklabels('')
-        pylab.draw()
-        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
-        return fig
-
     def plot_X(self, fig_num="MRD Predictions", axes=None):
         fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
         return fig

From 2b3b7350cd95db50efa5a59f6641572b4d15a115 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 16:37:13 +0100
Subject: [PATCH 24/54] kern conflict

---
 GPy/kern/kern.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 2ef07fa5..5b140193 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -440,7 +440,11 @@ class kern(parameterised):
         # TODO: better looping
         for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
             p1, p2 = self.parts[i1], self.parts[i2]
+<<<<<<< Updated upstream
 #             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
+=======
+            ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
+>>>>>>> Stashed changes
             ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
 
             # white doesn;t combine with anything
@@ -455,6 +459,7 @@ class kern(parameterised):
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
             # linear X bias
             elif p1.name == 'bias' and p2.name == 'linear':
+<<<<<<< Updated upstream
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])  # [ps1])
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p2.psi1(Z, mu, S, psi1)
@@ -464,6 +469,11 @@ class kern(parameterised):
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p1.psi1(Z, mu, S, psi1)
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
+=======
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps1])
+            elif p2.name == 'bias' and p1.name == 'linear':
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
+>>>>>>> Stashed changes
             # rbf X linear
             elif p1.name == 'linear' and p2.name == 'rbf':
                 raise NotImplementedError  # TODO

From d789e5548100c9b871cddec5fc6d281d74d08534 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 16:40:09 +0100
Subject: [PATCH 25/54] stupid kern stash merge

---
 GPy/kern/kern.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 5b140193..2ef07fa5 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -440,11 +440,7 @@ class kern(parameterised):
         # TODO: better looping
         for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
             p1, p2 = self.parts[i1], self.parts[i2]
-<<<<<<< Updated upstream
 #             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
-=======
-            ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
->>>>>>> Stashed changes
             ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
 
             # white doesn;t combine with anything
@@ -459,7 +455,6 @@ class kern(parameterised):
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
             # linear X bias
             elif p1.name == 'bias' and p2.name == 'linear':
-<<<<<<< Updated upstream
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])  # [ps1])
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p2.psi1(Z, mu, S, psi1)
@@ -469,11 +464,6 @@ class kern(parameterised):
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p1.psi1(Z, mu, S, psi1)
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
-=======
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps1])
-            elif p2.name == 'bias' and p1.name == 'linear':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
->>>>>>> Stashed changes
             # rbf X linear
             elif p1.name == 'linear' and p2.name == 'rbf':
                 raise NotImplementedError  # TODO

From e6165e6b35060f04f86c5dffaac3addcfb429fff Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@sheffield.ac.uk>
Date: Tue, 23 Apr 2013 17:09:52 +0100
Subject: [PATCH 26/54] re-added indepenent_output kern

---
 GPy/kern/independent_outputs.py | 97 +++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 GPy/kern/independent_outputs.py

diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
new file mode 100644
index 00000000..b94202d7
--- /dev/null
+++ b/GPy/kern/independent_outputs.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2012, James Hesnsman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import kernpart
+import numpy as np
+
+def index_to_slices(index):
+    """
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+
+    e.g.
+    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
+    returns
+    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
+
+    or, a more complicated example
+    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
+    returns
+    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
+    """
+
+    #contruct the return structure
+    ind = np.asarray(index,dtype=np.int64)
+    ret = [[] for i in range(ind.max()+1)]
+
+    #find the switchpoints
+    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
+    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
+
+    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
+    return ret
+
+class independent_outputs(kernpart):
+    """
+    A kernel part shich can reopresent several independent functions.
+    this kernel 'switches off' parts of the matrix where the output indexes are different.
+
+    The index of the functions is given by the last column in the input X
+    the rest of the columns of X are passed to the kernel for computation (in blocks).
+
+    """
+    def __init__(self,k):
+        self.D = k.D + 1
+        self.Nparam = k.Nparam
+        self.name = 'iops('+ k.name + ')'
+        self.k = k
+
+    def _get_params(self):
+        return self.k._get_params()
+
+    def _set_params(self,x):
+        self.k._set_params(x)
+        self.params = x
+
+    def _get_param_names(self):
+        return self.k._get_param_names()
+
+    def K(self,X,X2,target):
+        #Sort out the slices from the input data
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+
+        [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def Kdiag(self,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
+
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

From 43b9eacd8a67cb19915e249d8c51dcb420d45760 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@sheffield.ac.uk>
Date: Tue, 23 Apr 2013 17:11:04 +0100
Subject: [PATCH 27/54] re-enabled a previous bugfix which was lost in a merge

---
 GPy/core/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f70125fd..e7b993e0 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,6 +13,7 @@ import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
+import re
 
 class model(parameterised):
     def __init__(self):
@@ -239,7 +240,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(param_names[i])
+                    to_make_positive.append(re.escape(param_names[i]))
                     if warn:
                         print "Warning! constraining %s postive"%name
         if len(to_make_positive):

From 29115b0ec19ac3aa3c290d7fa5d8609ced60f72b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@sheffield.ac.uk>
Date: Tue, 23 Apr 2013 17:13:43 +0100
Subject: [PATCH 28/54] more re-enstating of some preiov commits.

---
 GPy/kern/__init__.py     |  6 +++-
 GPy/kern/constructors.py | 70 +++++++++++++++++++++++++---------------
 2 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index f062ee56..327bf69c 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,9 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
+try:
+    from constructors import rbf_sympy, sympykern # these depend on sympy
+except:
+    pass
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 6a968da4..9c2464a7 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
 from rbfcos import rbfcos as rbfcospart
+from independent_outputs import independent_outputs as independent_output_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
     part = Brownianpart(D,variance)
     return kern(D, [part])
 
-import sympy as sp
-from sympykern import spkern
-from sympy.parsing.sympy_parser import parse_expr
+try:
+    import sympy as sp
+    from sympykern import spkern
+    from sympy.parsing.sympy_parser import parse_expr
+    sympy_available = True
+except ImportError:
+    sympy_available = False
 
-def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
-    """
-    Radial Basis Function covariance.
-    """
-    X = [sp.var('x%i'%i) for i in range(D)]
-    Z = [sp.var('z%i'%i) for i in range(D)]
-    rbf_variance = sp.var('rbf_variance',positive=True)
-    if ARD:
-        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
-        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/2.)
-    else:
-        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-    return kern(D,[spkern(D,f)])
+if sympy_available:
+    def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
+        """
+        Radial Basis Function covariance.
+        """
+        X = [sp.var('x%i'%i) for i in range(D)]
+        Z = [sp.var('z%i'%i) for i in range(D)]
+        rbf_variance = sp.var('rbf_variance',positive=True)
+        if ARD:
+            rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/2.)
+        else:
+            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+            dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+        return kern(D,[spkern(D,f)])
 
-def sympykern(D,k):
-    """
-    A kernel from a symbolic sympy representation
-    """
-    return kern(D,[spkern(D,k)])
+    def sympykern(D,k):
+        """
+        A kernel from a symbolic sympy representation
+        """
+        return kern(D,[spkern(D,k)])
+del sympy_available
 
 def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
     """
@@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
     """
     part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
     return kern(D,[part])
+
+def independent_outputs(k):
+    """
+    Construct a kernel with independent outputs from an existing kernel
+    """
+    for sl in k.input_slices:
+        assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
+    parts = [independent_output_part(p) for p in k.parts]
+    return kern(k.D+1,parts)
+
+

From 70beeb5fe981165f2b966c2522bc3505186d34a1 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 24 Apr 2013 10:08:41 +0100
Subject: [PATCH 29/54] added m['ard'] gives all parameters matching 'ard', as
 well as setting m['ard'] = x to set all mrd parameters

---
 GPy/core/model.py | 182 ++++++++++++++++++++++++----------------------
 GPy/models/mrd.py |  23 ------
 2 files changed, 94 insertions(+), 111 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f70125fd..83baecfe 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -7,7 +7,7 @@ from scipy import optimize
 import sys, pdb
 import multiprocessing as mp
 from GPy.util.misc import opt_wrapper
-#import numdifftools as ndt
+# import numdifftools as ndt
 from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
@@ -24,14 +24,14 @@ class model(parameterised):
         self.preferred_optimizer = 'tnc'
     def _get_params(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
-    def _set_params(self,x):
+    def _set_params(self, x):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
 
-    def set_prior(self,which,what):
+    def set_prior(self, which, what):
         """
         Sets priors on the model parameters.
 
@@ -52,38 +52,44 @@ class model(parameterised):
 
         which = self.grep_param_names(which)
 
-        #check tied situation
-        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
+        # check tied situation
+        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
         if len(tie_partial_matches):
             raise ValueError, "cannot place prior across partial ties"
-        tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
-        if len(tie_matches)>1:
+        tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
+        if len(tie_matches) > 1:
             raise ValueError, "cannot place prior across multiple ties"
-        elif len(tie_matches)==1:
-            which = which[:1]# just place a prior object on the first parameter
+        elif len(tie_matches) == 1:
+            which = which[:1]  # just place a prior object on the first parameter
 
 
-        #check constraints are okay
+        # check constraints are okay
         if isinstance(what, (priors.gamma, priors.log_Gaussian)):
-            assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
-            assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
             unconst = np.setdiff1d(which, self.constrained_positive_indices)
             if len(unconst):
                 print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
+                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
                 print '\n'
                 self.constrain_positive(unconst)
-        elif isinstance(what,priors.Gaussian):
-            assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
+        elif isinstance(what, priors.Gaussian):
+            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
         else:
             raise ValueError, "prior not recognised"
 
 
-        #store the prior in a local list
+        # store the prior in a local list
         for w in which:
             self.priors[w] = what
 
-    def get(self,name, return_names=False):
+    def __getitem__(self, name):
+        return self.get(name)
+
+    def __setitem(self, name, val):
+        return self.set(name, val)
+
+    def get(self, name, return_names=False):
         """
         Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
@@ -94,9 +100,9 @@ class model(parameterised):
             else:
                 return self._get_params()[matches]
         else:
-            raise AttributeError, "no parameter matches %s"%name
+            raise AttributeError, "no parameter matches %s" % name
 
-    def set(self,name,val):
+    def set(self, name, val):
         """
         Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
         """
@@ -106,30 +112,30 @@ class model(parameterised):
             x[matches] = val
             self._set_params(x)
         else:
-            raise AttributeError, "no parameter matches %s"%name
+            raise AttributeError, "no parameter matches %s" % name
 
-    def get_gradient(self,name, return_names=False):
+    def get_gradient(self, name, return_names=False):
         """
         Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
         matches = self.grep_param_names(name)
         if len(matches):
             if return_names:
-                return self._log_likelihood_gradients()[matches],  np.asarray(self._get_param_names())[matches].tolist()
+                return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
             else:
                 return self._log_likelihood_gradients()[matches]
         else:
-            raise AttributeError, "no parameter matches %s"%name
+            raise AttributeError, "no parameter matches %s" % name
 
     def log_prior(self):
         """evaluate the prior"""
-        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
+        return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
 
     def _log_prior_gradients(self):
         """evaluate the gradients of the priors"""
         x = self._get_params()
         ret = np.zeros(x.size)
-        [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
+        [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
         return ret
 
     def _transform_gradients(self, g):
@@ -138,13 +144,13 @@ class model(parameterised):
         """
 
         x = self._get_params()
-        g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
-        g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
-        [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
+        g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
+        g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
+        [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
         if len(self.tied_indices) or len(self.constrained_fixed_indices):
-            to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
-            return np.delete(g,to_remove)
+            to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
+            return np.delete(g, to_remove)
         else:
             return g
 
@@ -154,15 +160,15 @@ class model(parameterised):
         Randomize the model.
         Make this draw from the prior if one exists, else draw from N(0,1)
         """
-        #first take care of all parameters (from N(0,1))
+        # first take care of all parameters (from N(0,1))
         x = self._get_params_transformed()
         x = np.random.randn(x.size)
         self._set_params_transformed(x)
-        #now draw from prior where possible
+        # now draw from prior where possible
         x = self._get_params()
-        [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
+        [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
         self._set_params(x)
-        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed())  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
     def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@@ -196,10 +202,10 @@ class model(parameterised):
                 pool = mp.Pool(processes=num_processes)
                 for i in range(Nrestarts):
                     self.randomize()
-                    job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
+                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
                     jobs.append(job)
 
-                pool.close() # signal that no more data coming in
+                pool.close()  # signal that no more data coming in
                 pool.join()  # wait for all the tasks to complete
             except KeyboardInterrupt:
                 print "Ctrl+c received, terminating and joining pool."
@@ -215,10 +221,10 @@ class model(parameterised):
                     self.optimization_runs.append(jobs[i].get())
 
                 if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
+                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
             except Exception as e:
                 if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
+                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
                 else:
                     raise e
 
@@ -228,11 +234,11 @@ class model(parameterised):
         else:
             self._set_params_transformed(initial_parameters)
 
-    def ensure_default_constraints(self,warn=False):
+    def ensure_default_constraints(self, warn=False):
         """
         Ensure that any variables which should clearly be positive have been constrained somehow.
         """
-        positive_strings = ['variance','lengthscale', 'precision']
+        positive_strings = ['variance', 'lengthscale', 'precision']
         param_names = self._get_param_names()
         currently_constrained = self.all_constrained_indices()
         to_make_positive = []
@@ -241,9 +247,9 @@ class model(parameterised):
                 if not (i in currently_constrained):
                     to_make_positive.append(param_names[i])
                     if warn:
-                        print "Warning! constraining %s postive"%name
+                        print "Warning! constraining %s postive" % name
         if len(to_make_positive):
-            self.constrain_positive('('+'|'.join(to_make_positive)+')')
+            self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
 
 
 
@@ -261,14 +267,14 @@ class model(parameterised):
         self._set_params_transformed(x)
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        return - LL_gradients - prior_gradients
+        return -LL_gradients - prior_gradients
 
     def objective_and_gradients(self, x):
         self._set_params_transformed(x)
-        obj_f =  -self.log_likelihood() - self.log_prior()
+        obj_f = -self.log_likelihood() - self.log_prior()
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        obj_grads = - LL_gradients - prior_gradients
+        obj_grads = -LL_gradients - prior_gradients
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -288,13 +294,13 @@ class model(parameterised):
             start = self._get_params_transformed()
 
         optimizer = optimization.get_optimizer(optimizer)
-        opt = optimizer(start, model = self, **kwargs)
+        opt = optimizer(start, model=self, **kwargs)
         opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
         self.optimization_runs.append(opt)
 
         self._set_params_transformed(opt.x_opt)
 
-    def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
+    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
         # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
         sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
         sgd.run()
@@ -302,8 +308,8 @@ class model(parameterised):
 
     def Laplace_covariance(self):
         """return the covariance matric of a Laplace approximatino at the current (stationary) point"""
-        #TODO add in the prior contributions for MAP estimation
-        #TODO fix the hessian for tied, constrained and fixed components
+        # TODO add in the prior contributions for MAP estimation
+        # TODO fix the hessian for tied, constrained and fixed components
         if hasattr(self, 'log_likelihood_hessian'):
             A = -self.log_likelihood_hessian()
 
@@ -317,8 +323,8 @@ class model(parameterised):
             A = -h(x)
             self._set_params(x)
         # check for almost zero components on the diagonal which screw up the cholesky
-        aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
-        A[aa,aa] = 0.
+        aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
+        A[aa, aa] = 0.
         return A
 
     def Laplace_evidence(self):
@@ -329,11 +335,11 @@ class model(parameterised):
             hld = np.sum(np.log(np.diag(jitchol(A)[0])))
         except:
             return np.nan
-        return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
+        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
         s = parameterised.__str__(self).split('\n')
-        #add priors to the string
+        # add priors to the string
         strs = [str(p) if p is not None else '' for p in self.priors]
         width = np.array(max([len(p) for p in strs] + [5])) + 4
 
@@ -344,16 +350,16 @@ class model(parameterised):
             obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
         obj_funct += '\n\n'
         s[0] = obj_funct + s[0]
-        s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
-        s[1] += '-'*(width + 1)
+        s[0] += "|{h:^{col}}".format(h='Prior', col=width)
+        s[1] += '-' * (width + 1)
 
-        for p in range(2, len(strs)+2):
-            s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
+        for p in range(2, len(strs) + 2):
+            s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
 
         return '\n'.join(s)
 
 
-    def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
         """
         Check the gradient of the model by comparing to a numerical estimate.
         If the verbose flag is passed, invividual components are tested (and printed)
@@ -373,27 +379,27 @@ class model(parameterised):
         x = self._get_params_transformed().copy()
 
         if not verbose:
-            #just check the global ratio
-            dx = step*np.sign(np.random.uniform(-1,1,x.size))
+            # just check the global ratio
+            dx = step * np.sign(np.random.uniform(-1, 1, x.size))
 
-            #evaulate around the point x
-            f1, g1 = self.objective_and_gradients(x+dx)
-            f2, g2 = self.objective_and_gradients(x-dx)
+            # evaulate around the point x
+            f1, g1 = self.objective_and_gradients(x + dx)
+            f2, g2 = self.objective_and_gradients(x - dx)
             gradient = self.objective_function_gradients(x)
 
-            numerical_gradient = (f1-f2)/(2*dx)
-            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
+            numerical_gradient = (f1 - f2) / (2 * dx)
+            global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
 
-            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
+            if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
                 return True
             else:
                 return False
         else:
-            #check the gradient of each parameter individually, and do some pretty printing
+            # check the gradient of each parameter individually, and do some pretty printing
             try:
                 names = self._get_param_names_transformed()
             except NotImplementedError:
-                names = ['Variable %i'%i for i in range(len(x))]
+                names = ['Variable %i' % i for i in range(len(x))]
 
             # Prepare for pretty-printing
             header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@@ -402,9 +408,9 @@ class model(parameterised):
             cols = [max_names]
             cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
             cols = np.array(cols) + 5
-            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+            header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
             header_string = map(lambda x: '|'.join(x), [header_string])
-            separator = '-'*len(header_string[0])
+            separator = '-' * len(header_string[0])
             print '\n'.join([header_string[0], separator])
 
             if target_param is None:
@@ -420,11 +426,11 @@ class model(parameterised):
                 f2, g2 = self.objective_and_gradients(xx)
                 gradient = self.objective_function_gradients(x)[i]
 
-                numerical_gradient = (f1-f2)/(2*step)
-                ratio = (f1-f2)/(2*step*gradient)
-                difference = np.abs((f1-f2)/2/step - gradient)
+                numerical_gradient = (f1 - f2) / (2 * step)
+                ratio = (f1 - f2) / (2 * step * gradient)
+                difference = np.abs((f1 - f2) / 2 / step - gradient)
 
-                if (np.abs(ratio-1)<tolerance):
+                if (np.abs(ratio - 1) < tolerance):
                     formatted_name = "\033[92m {0} \033[0m".format(names[i])
                 else:
                     formatted_name = "\033[91m {0} \033[0m".format(names[i])
@@ -432,7 +438,7 @@ class model(parameterised):
                 d = '%.6f' % float(difference)
                 g = '%.6f' % gradient
                 ng = '%.6f' % float(numerical_gradient)
-                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                 print grad_string
 
     def input_sensitivity(self):
@@ -443,21 +449,21 @@ class model(parameterised):
         TODO: proper sensitivity analysis
         """
 
-        if not hasattr(self,'kern'):
+        if not hasattr(self, 'kern'):
             raise ValueError, "this model has no kernel"
 
-        k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
-        if (not len(k)==1) or (not k[0].ARD):
+        k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
+        if (not len(k) == 1) or (not k[0].ARD):
             raise ValueError, "cannot determine sensitivity for this kernel"
         k = k[0]
 
-        if k.name=='rbf':
+        if k.name == 'rbf':
             return k.lengthscale
-        elif k.name=='linear':
-            return 1./k.variances
+        elif k.name == 'linear':
+            return 1. / k.variances
 
 
-    def pseudo_EM(self,epsilon=.1,**kwargs):
+    def pseudo_EM(self, epsilon=.1, **kwargs):
         """
         TODO: Should this not bein the GP class?
         EM - like algorithm  for Expectation Propagation and Laplace approximation
@@ -471,7 +477,7 @@ class model(parameterised):
         :type optimzer: string TODO: valid strings?
 
         """
-        assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
+        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
         ll_change = epsilon + 1.
         iteration = 0
         last_ll = -np.exp(1000)
@@ -491,9 +497,9 @@ class model(parameterised):
             ll_change = new_ll - last_ll
 
             if ll_change < 0:
-                self.likelihood = last_approximation #restore previous likelihood approximation
-                self._set_params(last_params) #restore model parameters
-                print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
+                self.likelihood = last_approximation  # restore previous likelihood approximation
+                self._set_params(last_params)  # restore model parameters
+                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                 stop = True
             else:
                 self.optimize(**kwargs)
@@ -502,5 +508,5 @@ class model(parameterised):
                     stop = True
             iteration += 1
             if stop:
-                print "%s iterations." %iteration
+                print "%s iterations." % iteration
 
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 096c9cb9..4e0487b2 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -287,29 +287,6 @@ class MRD(model):
         else:
             return pylab.gcf()
 
-    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
-        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
-        if colors is None:
-            colors = pylab.gca()._get_lines.color_cycle
-            pylab.clf()
-        plots = []
-        for i in range(self.X.shape[1]):
-            if axes is None:
-                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
-            ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
-            ax.fill_between(numpy.arange(self.X.shape[0]),
-                            self.X.T[i] - 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            self.X.T[i] + 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            facecolor=plots[-1].get_color(),
-                            alpha=.3)
-            ax.legend(borderaxespad=0.)
-            if i < self.X.shape[1] - 1:
-                ax.set_xticklabels('')
-        pylab.draw()
-        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
-        return fig
-
     def plot_X(self, fig_num="MRD Predictions", axes=None):
         fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
         return fig

From 000cd5dbb82e92106af8cebd392aa1f9599e46d9 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 24 Apr 2013 11:16:33 +0100
Subject: [PATCH 30/54] new getters and setters for self.params, added m['var']
 getter and setter

---
 GPy/core/model.py         |  31 -----
 GPy/core/parameterised.py | 252 ++++++++++++++++++++++++--------------
 2 files changed, 160 insertions(+), 123 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f4a79a28..3e771e9d 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -84,37 +84,6 @@ class model(parameterised):
         for w in which:
             self.priors[w] = what
 
-    def __getitem__(self, name):
-        return self.get(name)
-
-    def __setitem(self, name, val):
-        return self.set(name, val)
-
-    def get(self, name, return_names=False):
-        """
-        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
-        """
-        matches = self.grep_param_names(name)
-        if len(matches):
-            if return_names:
-                return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
-            else:
-                return self._get_params()[matches]
-        else:
-            raise AttributeError, "no parameter matches %s" % name
-
-    def set(self, name, val):
-        """
-        Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
-        """
-        matches = self.grep_param_names(name)
-        if len(matches):
-            x = self._get_params()
-            x[matches] = val
-            self._set_params(x)
-        else:
-            raise AttributeError, "no parameter matches %s" % name
-
     def get_gradient(self, name, return_names=False):
         """
         Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index b5d880a3..4d1d6992 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -8,24 +8,25 @@ import copy
 import cPickle
 import os
 from ..util.squashers import sigmoid
+import warnings
 
-def truncate_pad(string,width,align='m'):
+def truncate_pad(string, width, align='m'):
     """
     A helper function to make aligned strings for parameterised.__str__
     """
-    width=max(width,4)
-    if len(string)>width:
-        return string[:width-3]+'...'
-    elif len(string)==width:
+    width = max(width, 4)
+    if len(string) > width:
+        return string[:width - 3] + '...'
+    elif len(string) == width:
         return string
-    elif len(string)<width:
-        diff = width-len(string)
-        if align=='m':
-            return ' '*np.floor(diff/2.) + string + ' '*np.ceil(diff/2.)
-        elif align=='l':
-            return string + ' '*diff
-        elif align=='r':
-            return ' '*diff + string
+    elif len(string) < width:
+        diff = width - len(string)
+        if align == 'm':
+            return ' ' * np.floor(diff / 2.) + string + ' ' * np.ceil(diff / 2.)
+        elif align == 'l':
+            return string + ' ' * diff
+        elif align == 'r':
+            return ' ' * diff + string
         else:
             raise ValueError
 
@@ -37,15 +38,15 @@ class parameterised(object):
         self.tied_indices = []
         self.constrained_fixed_indices = []
         self.constrained_fixed_values = []
-        self.constrained_positive_indices = np.empty(shape=(0,),dtype=np.int64)
-        self.constrained_negative_indices = np.empty(shape=(0,),dtype=np.int64)
+        self.constrained_positive_indices = np.empty(shape=(0,), dtype=np.int64)
+        self.constrained_negative_indices = np.empty(shape=(0,), dtype=np.int64)
         self.constrained_bounded_indices = []
         self.constrained_bounded_uppers = []
         self.constrained_bounded_lowers = []
 
-    def pickle(self,filename,protocol=-1):
-        f = file(filename,'w')
-        cPickle.dump(self,f,protocol)
+    def pickle(self, filename, protocol= -1):
+        f = file(filename, 'w')
+        cPickle.dump(self, f, protocol)
         f.close()
 
     def copy(self):
@@ -55,18 +56,85 @@ class parameterised(object):
 
         return copy.deepcopy(self)
 
+    @property
+    def params(self):
+        """
+        Returns a **copy** of parameters in non transformed space
+        
+        :see_also: :py:func:`GPy.core.parameterised.params_transformed` 
+        """
+        return self._get_params()
+    @params.setter
+    def params(self, params):
+        self._set_params(params)
+
+    @property
+    def params_transformed(self):
+        """
+        Returns a **copy** of parameters in transformed space
+        
+        :see_also: :py:func:`GPy.core.parameterised.params` 
+        """
+        return self._get_params_transformed()
+    @params_transformed.setter
+    def params_transformed(self, params):
+        self._set_params_transformed(params)
+
+    _get_set_deprecation = """get and set methods wont be available at next minor release
+        in the next releases you will get and set with following syntax:
+        Assume m is a model class:
+        print m['var']          # > prints all parameters matching 'var'
+        m['var'] = 2.           # > sets all parameters matching 'var' to 2.
+        m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>        
+        """
+    def get(self, name):
+        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
+        return self[name]
+
+    def set(self, name, val):
+        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
+        self[name] = val
+
+    def __getitem__(self, name, return_names=False):
+        """
+        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            if return_names:
+                return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
+            else:
+                return self._get_params()[matches]
+        else:
+            raise AttributeError, "no parameter matches %s" % name
+
+    def __setitem__(self, name, val):
+        """
+        Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            val = np.array(val)
+            assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
+            x = self.params
+            x[matches] = val
+            self.params = x
+#             import ipdb;ipdb.set_trace()
+#             self.params[matches] = val
+        else:
+            raise AttributeError, "no parameter matches %s" % name
 
     def tie_params(self, which):
         matches = self.grep_param_names(which)
         assert matches.size > 0, "need at least something to tie together"
         if len(self.tied_indices):
-            assert not np.any(matches[:,None]==np.hstack(self.tied_indices)), "Some indices are already tied!"
+            assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
         self.tied_indices.append(matches)
-        #TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
-        if hasattr(self,'prior'):
+        # TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
+        if hasattr(self, 'prior'):
             pass
 
-        self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
+        self._set_params_transformed(self._get_params_transformed())  # sets tied parameters to single value
 
     def untie_everything(self):
         """Unties all parameters by setting tied_indices to an empty list."""
@@ -74,7 +142,7 @@ class parameterised(object):
 
     def all_constrained_indices(self):
         """Return a np array of all the constrained indices"""
-        ret =  [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
+        ret = [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
         if len(ret):
             return np.hstack(ret)
         else:
@@ -117,44 +185,44 @@ class parameterised(object):
         which -- np.array(dtype=int), or regular expression object or string
         """
         matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
         self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
         x = self._get_params()
-        for i,xx in enumerate(x):
-            if (xx<0) & (i in matches):
+        for i, xx in enumerate(x):
+            if (xx < 0) & (i in matches):
                 x[i] = -xx
         self._set_params(x)
 
 
-    def unconstrain(self,which):
+    def unconstrain(self, which):
         """Unconstrain matching parameters.  does not untie parameters"""
         matches = self.grep_param_names(which)
-        #positive/negative
-        self.constrained_positive_indices = np.delete(self.constrained_positive_indices,np.nonzero(np.sum(self.constrained_positive_indices[:,None]==matches[None,:],1))[0])
-        self.constrained_negative_indices = np.delete(self.constrained_negative_indices,np.nonzero(np.sum(self.constrained_negative_indices[:,None]==matches[None,:],1))[0])
-        #bounded
+        # positive/negative
+        self.constrained_positive_indices = np.delete(self.constrained_positive_indices, np.nonzero(np.sum(self.constrained_positive_indices[:, None] == matches[None, :], 1))[0])
+        self.constrained_negative_indices = np.delete(self.constrained_negative_indices, np.nonzero(np.sum(self.constrained_negative_indices[:, None] == matches[None, :], 1))[0])
+        # bounded
         if len(self.constrained_bounded_indices):
-            self.constrained_bounded_indices = [np.delete(a,np.nonzero(np.sum(a[:,None]==matches[None,:],1))[0]) for a in self.constrained_bounded_indices]
+            self.constrained_bounded_indices = [np.delete(a, np.nonzero(np.sum(a[:, None] == matches[None, :], 1))[0]) for a in self.constrained_bounded_indices]
             if np.hstack(self.constrained_bounded_indices).size:
-                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u,l,i) for u,l,i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
+                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u, l, i) for u, l, i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
                 self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = list(self.constrained_bounded_uppers), list(self.constrained_bounded_lowers), list(self.constrained_bounded_indices)
             else:
-                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [],[],[]
-        #fixed:
-        for i,indices in enumerate(self.constrained_fixed_indices):
-            self.constrained_fixed_indices[i] = np.delete(indices,np.nonzero(np.sum(indices[:,None]==matches[None,:],1))[0])
-        #remove empty elements
-        tmp = [(i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
+                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [], [], []
+        # fixed:
+        for i, indices in enumerate(self.constrained_fixed_indices):
+            self.constrained_fixed_indices[i] = np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0])
+        # remove empty elements
+        tmp = [(i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
         if tmp:
             self.constrained_fixed_indices, self.constrained_fixed_values = zip(*tmp)
             self.constrained_fixed_indices, self.constrained_fixed_values = list(self.constrained_fixed_indices), list(self.constrained_fixed_values)
         else:
-            self.constrained_fixed_indices, self.constrained_fixed_values = [],[]
+            self.constrained_fixed_indices, self.constrained_fixed_values = [], []
 
 
 
-    def constrain_negative(self,which):
+    def constrain_negative(self, which):
         """
         Set negative constraints.
 
@@ -163,12 +231,12 @@ class parameterised(object):
 
         """
         matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
         self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
         x = self._get_params()
-        for i,xx in enumerate(x):
-            if (xx>0.) and (i in matches):
+        for i, xx in enumerate(x):
+            if (xx > 0.) and (i in matches):
                 x[i] = -xx
         self._set_params(x)
 
@@ -184,20 +252,20 @@ class parameterised(object):
         lower -- (float) the lower bound on the constraint
         """
         matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
         assert lower < upper, "lower bound must be smaller than upper bound!"
         self.constrained_bounded_indices.append(matches)
         self.constrained_bounded_uppers.append(upper)
         self.constrained_bounded_lowers.append(lower)
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
         x = self._get_params()
-        for i,xx in enumerate(x):
-            if ((xx<=lower)|(xx>=upper)) & (i in matches):
-                x[i] = sigmoid(xx)*(upper-lower) + lower
+        for i, xx in enumerate(x):
+            if ((xx <= lower) | (xx >= upper)) & (i in matches):
+                x[i] = sigmoid(xx) * (upper - lower) + lower
         self._set_params(x)
 
 
-    def constrain_fixed(self, which, value = None):
+    def constrain_fixed(self, which, value=None):
         """
         Arguments
         ---------
@@ -211,14 +279,14 @@ class parameterised(object):
         To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
         """
         matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
         self.constrained_fixed_indices.append(matches)
         if value != None:
             self.constrained_fixed_values.append(value)
         else:
             self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
 
-        #self.constrained_fixed_values.append(value)
+        # self.constrained_fixed_values.append(value)
         self._set_params_transformed(self._get_params_transformed())
 
     def _get_params_transformed(self):
@@ -226,40 +294,40 @@ class parameterised(object):
         x = self._get_params()
         x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
         x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
-        [np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(x, i, np.log(np.clip(x[i] - l, 1e-10, np.inf) / np.clip(h - x[i], 1e-10, np.inf))) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
 
-        to_remove = self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]
+        to_remove = self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]
         if len(to_remove):
-            return np.delete(x,np.hstack(to_remove))
+            return np.delete(x, np.hstack(to_remove))
         else:
             return x
 
 
-    def _set_params_transformed(self,x):
+    def _set_params_transformed(self, x):
         """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
 
-        #work out how many places are fixed, and where they are. tricky logic!
+        # work out how many places are fixed, and where they are. tricky logic!
         Nfix_places = 0.
         if len(self.tied_indices):
-            Nfix_places += np.hstack(self.tied_indices).size-len(self.tied_indices)
+            Nfix_places += np.hstack(self.tied_indices).size - len(self.tied_indices)
         if len(self.constrained_fixed_indices):
             Nfix_places += np.hstack(self.constrained_fixed_indices).size
         if Nfix_places:
-            fix_places = np.hstack(self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])
+            fix_places = np.hstack(self.constrained_fixed_indices + [t[1:] for t in self.tied_indices])
         else:
             fix_places = []
 
-        free_places = np.setdiff1d(np.arange(Nfix_places+x.size,dtype=np.int),fix_places)
+        free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)
 
-        #put the models values in the vector xx
-        xx = np.zeros(Nfix_places+free_places.size,dtype=np.float64)
+        # put the models values in the vector xx
+        xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)
 
         xx[free_places] = x
-        [np.put(xx,i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
-        [np.put(xx,i,v) for i,v in [(t[1:],xx[t[0]]) for t in self.tied_indices] ]
+        [np.put(xx, i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
+        [np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
         xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
         xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
-        [np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(xx, i, low + sigmoid(xx[i]) * (high - low)) for i, low, high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
         self._set_params(xx)
 
     def _get_param_names_transformed(self):
@@ -267,33 +335,33 @@ class parameterised(object):
         Returns the parameter names as propagated after constraining,
         tying or fixing, i.e. a list of the same length as _get_params_transformed()
         """
-        n =  self._get_param_names()
+        n = self._get_param_names()
 
-        #remove/concatenate the tied parameter names
+        # remove/concatenate the tied parameter names
         if len(self.tied_indices):
             for t in self.tied_indices:
                 n[t[0]] = "<tie>".join([n[tt] for tt in t])
             remove = np.hstack([t[1:] for t in self.tied_indices])
         else:
-            remove=np.empty(shape=(0,),dtype=np.int)
+            remove = np.empty(shape=(0,), dtype=np.int)
 
-        #also remove the fixed params
+        # also remove the fixed params
         if len(self.constrained_fixed_indices):
             remove = np.hstack((remove, np.hstack(self.constrained_fixed_indices)))
 
-        #add markers to show that some variables are constrained
+        # add markers to show that some variables are constrained
         for i in self.constrained_positive_indices:
-            n[i] = n[i]+'(+ve)'
+            n[i] = n[i] + '(+ve)'
         for i in self.constrained_negative_indices:
-            n[i] = n[i]+'(-ve)'
-        for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
+            n[i] = n[i] + '(-ve)'
+        for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
             for ii in i:
-                n[ii] = n[ii]+'(bounded)'
+                n[ii] = n[ii] + '(bounded)'
 
-        n = [nn for i,nn in enumerate(n) if not i in remove]
+        n = [nn for i, nn in enumerate(n) if not i in remove]
         return n
 
-    def __str__(self,nw=30):
+    def __str__(self, nw=30):
         """
         Return a string describing the parameter names and their ties and constraints
         """
@@ -302,10 +370,10 @@ class parameterised(object):
 
         if not N:
             return "This object has no free parameters."
-        header = ['Name','Value','Constraints','Ties']
-        values = self._get_params() #map(str,self._get_params())
-        #sort out the constraints
-        constraints = ['']*len(names)
+        header = ['Name', 'Value', 'Constraints', 'Ties']
+        values = self._get_params()  # map(str,self._get_params())
+        # sort out the constraints
+        constraints = [''] * len(names)
         for i in self.constrained_positive_indices:
             constraints[i] = '(+ve)'
         for i in self.constrained_negative_indices:
@@ -313,14 +381,14 @@ class parameterised(object):
         for i in self.constrained_fixed_indices:
             for ii in i:
                 constraints[ii] = 'Fixed'
-        for i,u,l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
+        for i, u, l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
             for ii in i:
-                constraints[ii] = '('+str(l)+', '+str(u)+')'
-        #sort out the ties
-        ties = ['']*len(names)
-        for i,tie in enumerate(self.tied_indices):
+                constraints[ii] = '(' + str(l) + ', ' + str(u) + ')'
+        # sort out the ties
+        ties = [''] * len(names)
+        for i, tie in enumerate(self.tied_indices):
             for j in tie:
-                ties[j] = '('+str(i)+')'
+                ties[j] = '(' + str(i) + ')'
 
         values = ['%.4f' % float(v) for v in values]
         max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
@@ -330,10 +398,10 @@ class parameterised(object):
         cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
         columns = cols.sum()
 
-        header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+        header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
         header_string = map(lambda x: '|'.join(x), [header_string])
-        separator = '-'*len(header_string[0])
-        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n = names[i], v = values[i], c = constraints[i], t = ties[i], c0 = cols[0], c1 = cols[1], c2 = cols[2], c3 = cols[3]) for i in range(len(values))]
+        separator = '-' * len(header_string[0])
+        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
 
 
-        return ('\n'.join([header_string[0], separator]+param_string)) + '\n'
+        return ('\n'.join([header_string[0], separator] + param_string)) + '\n'

From f8c3cd669da9a7fe127245b4e606296192c208b0 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 24 Apr 2013 11:17:13 +0100
Subject: [PATCH 31/54] rewritten dim_reduction demo to match new style of
 getters and setters

---
 GPy/examples/dimensionality_reduction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index e5f50237..be60b5f4 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -207,7 +207,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
 #     cstr = 'X_variance'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
 
-    m.set('X_var', np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01)
+    m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
 
 #     cstr = "iip"
 #     m.unconstrain(cstr); m.constrain_fixed(cstr)

From 992a35b614848cd944a8f0f408cdbfd8ef412ed5 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 24 Apr 2013 16:38:40 +0100
Subject: [PATCH 32/54] baysian gplvm and example changes

---
 GPy/core/model.py            | 169 +++++++++++++++++------------------
 GPy/models/Bayesian_GPLVM.py |  19 +++-
 2 files changed, 100 insertions(+), 88 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 3e771e9d..f3542ce8 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -7,13 +7,12 @@ from scipy import optimize
 import sys, pdb
 import multiprocessing as mp
 from GPy.util.misc import opt_wrapper
-# import numdifftools as ndt
+#import numdifftools as ndt
 from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
-import re
 
 class model(parameterised):
     def __init__(self):
@@ -25,14 +24,14 @@ class model(parameterised):
         self.preferred_optimizer = 'tnc'
     def _get_params(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
-    def _set_params(self, x):
+    def _set_params(self,x):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
 
-    def set_prior(self, which, what):
+    def set_prior(self,which,what):
         """
         Sets priors on the model parameters.
 
@@ -53,59 +52,59 @@ class model(parameterised):
 
         which = self.grep_param_names(which)
 
-        # check tied situation
-        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
+        #check tied situation
+        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
         if len(tie_partial_matches):
             raise ValueError, "cannot place prior across partial ties"
-        tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
-        if len(tie_matches) > 1:
+        tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
+        if len(tie_matches)>1:
             raise ValueError, "cannot place prior across multiple ties"
-        elif len(tie_matches) == 1:
-            which = which[:1]  # just place a prior object on the first parameter
+        elif len(tie_matches)==1:
+            which = which[:1]# just place a prior object on the first parameter
 
 
-        # check constraints are okay
+        #check constraints are okay
         if isinstance(what, (priors.gamma, priors.log_Gaussian)):
-            assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
-            assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
+            assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
+            assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
             unconst = np.setdiff1d(which, self.constrained_positive_indices)
             if len(unconst):
                 print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
+                print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
                 print '\n'
                 self.constrain_positive(unconst)
-        elif isinstance(what, priors.Gaussian):
-            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
+        elif isinstance(what,priors.Gaussian):
+            assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
         else:
             raise ValueError, "prior not recognised"
 
 
-        # store the prior in a local list
+        #store the prior in a local list
         for w in which:
             self.priors[w] = what
 
-    def get_gradient(self, name, return_names=False):
+    def get_gradient(self,name, return_names=False):
         """
         Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
         matches = self.grep_param_names(name)
         if len(matches):
             if return_names:
-                return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
+                return self._log_likelihood_gradients()[matches],  np.asarray(self._get_param_names())[matches].tolist()
             else:
                 return self._log_likelihood_gradients()[matches]
         else:
-            raise AttributeError, "no parameter matches %s" % name
+            raise AttributeError, "no parameter matches %s"%name
 
     def log_prior(self):
         """evaluate the prior"""
-        return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
+        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
 
     def _log_prior_gradients(self):
         """evaluate the gradients of the priors"""
         x = self._get_params()
         ret = np.zeros(x.size)
-        [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
+        [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
         return ret
 
     def _transform_gradients(self, g):
@@ -114,13 +113,13 @@ class model(parameterised):
         """
 
         x = self._get_params()
-        g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
-        g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
-        [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
+        g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
+        g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
+        [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
         if len(self.tied_indices) or len(self.constrained_fixed_indices):
-            to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
-            return np.delete(g, to_remove)
+            to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
+            return np.delete(g,to_remove)
         else:
             return g
 
@@ -130,15 +129,15 @@ class model(parameterised):
         Randomize the model.
         Make this draw from the prior if one exists, else draw from N(0,1)
         """
-        # first take care of all parameters (from N(0,1))
+        #first take care of all parameters (from N(0,1))
         x = self._get_params_transformed()
         x = np.random.randn(x.size)
         self._set_params_transformed(x)
-        # now draw from prior where possible
+        #now draw from prior where possible
         x = self._get_params()
-        [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
+        [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
         self._set_params(x)
-        self._set_params_transformed(self._get_params_transformed())  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
     def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@@ -172,10 +171,10 @@ class model(parameterised):
                 pool = mp.Pool(processes=num_processes)
                 for i in range(Nrestarts):
                     self.randomize()
-                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
+                    job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
                     jobs.append(job)
 
-                pool.close()  # signal that no more data coming in
+                pool.close() # signal that no more data coming in
                 pool.join()  # wait for all the tasks to complete
             except KeyboardInterrupt:
                 print "Ctrl+c received, terminating and joining pool."
@@ -191,10 +190,10 @@ class model(parameterised):
                     self.optimization_runs.append(jobs[i].get())
 
                 if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
+                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
             except Exception as e:
                 if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
+                    print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
                 else:
                     raise e
 
@@ -204,22 +203,22 @@ class model(parameterised):
         else:
             self._set_params_transformed(initial_parameters)
 
-    def ensure_default_constraints(self, warn=False):
+    def ensure_default_constraints(self,warn=False):
         """
         Ensure that any variables which should clearly be positive have been constrained somehow.
         """
-        positive_strings = ['variance', 'lengthscale', 'precision']
+        positive_strings = ['variance','lengthscale', 'precision']
         param_names = self._get_param_names()
         currently_constrained = self.all_constrained_indices()
         to_make_positive = []
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(re.escape(param_names[i]))
+                    to_make_positive.append(param_names[i])
                     if warn:
-                        print "Warning! constraining %s postive" % name
+                        print "Warning! constraining %s postive"%name
         if len(to_make_positive):
-            self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
+            self.constrain_positive('('+'|'.join(to_make_positive)+')')
 
 
 
@@ -237,14 +236,14 @@ class model(parameterised):
         self._set_params_transformed(x)
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        return -LL_gradients - prior_gradients
+        return - LL_gradients - prior_gradients
 
     def objective_and_gradients(self, x):
         self._set_params_transformed(x)
-        obj_f = -self.log_likelihood() - self.log_prior()
+        obj_f =  -self.log_likelihood() - self.log_prior()
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        obj_grads = -LL_gradients - prior_gradients
+        obj_grads = - LL_gradients - prior_gradients
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -270,7 +269,7 @@ class model(parameterised):
 
         self._set_params_transformed(opt.x_opt)
 
-    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
+    def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
         # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
         sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
         sgd.run()
@@ -278,8 +277,8 @@ class model(parameterised):
 
     def Laplace_covariance(self):
         """return the covariance matric of a Laplace approximatino at the current (stationary) point"""
-        # TODO add in the prior contributions for MAP estimation
-        # TODO fix the hessian for tied, constrained and fixed components
+        #TODO add in the prior contributions for MAP estimation
+        #TODO fix the hessian for tied, constrained and fixed components
         if hasattr(self, 'log_likelihood_hessian'):
             A = -self.log_likelihood_hessian()
 
@@ -293,8 +292,8 @@ class model(parameterised):
             A = -h(x)
             self._set_params(x)
         # check for almost zero components on the diagonal which screw up the cholesky
-        aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
-        A[aa, aa] = 0.
+        aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
+        A[aa,aa] = 0.
         return A
 
     def Laplace_evidence(self):
@@ -305,11 +304,11 @@ class model(parameterised):
             hld = np.sum(np.log(np.diag(jitchol(A)[0])))
         except:
             return np.nan
-        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
+        return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
         s = parameterised.__str__(self).split('\n')
-        # add priors to the string
+        #add priors to the string
         strs = [str(p) if p is not None else '' for p in self.priors]
         width = np.array(max([len(p) for p in strs] + [5])) + 4
 
@@ -320,16 +319,16 @@ class model(parameterised):
             obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
         obj_funct += '\n\n'
         s[0] = obj_funct + s[0]
-        s[0] += "|{h:^{col}}".format(h='Prior', col=width)
-        s[1] += '-' * (width + 1)
+        s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
+        s[1] += '-'*(width + 1)
 
-        for p in range(2, len(strs) + 2):
-            s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
+        for p in range(2, len(strs)+2):
+            s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
 
         return '\n'.join(s)
 
 
-    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
+    def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
         """
         Check the gradient of the model by comparing to a numerical estimate.
         If the verbose flag is passed, invividual components are tested (and printed)
@@ -349,27 +348,27 @@ class model(parameterised):
         x = self._get_params_transformed().copy()
 
         if not verbose:
-            # just check the global ratio
-            dx = step * np.sign(np.random.uniform(-1, 1, x.size))
+            #just check the global ratio
+            dx = step*np.sign(np.random.uniform(-1,1,x.size))
 
-            # evaulate around the point x
-            f1, g1 = self.objective_and_gradients(x + dx)
-            f2, g2 = self.objective_and_gradients(x - dx)
+            #evaulate around the point x
+            f1, g1 = self.objective_and_gradients(x+dx)
+            f2, g2 = self.objective_and_gradients(x-dx)
             gradient = self.objective_function_gradients(x)
 
-            numerical_gradient = (f1 - f2) / (2 * dx)
-            global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
+            numerical_gradient = (f1-f2)/(2*dx)
+            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
 
-            if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
+            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
                 return True
             else:
                 return False
         else:
-            # check the gradient of each parameter individually, and do some pretty printing
+            #check the gradient of each parameter individually, and do some pretty printing
             try:
                 names = self._get_param_names_transformed()
             except NotImplementedError:
-                names = ['Variable %i' % i for i in range(len(x))]
+                names = ['Variable %i'%i for i in range(len(x))]
 
             # Prepare for pretty-printing
             header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@@ -378,9 +377,9 @@ class model(parameterised):
             cols = [max_names]
             cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
             cols = np.array(cols) + 5
-            header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
+            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
             header_string = map(lambda x: '|'.join(x), [header_string])
-            separator = '-' * len(header_string[0])
+            separator = '-'*len(header_string[0])
             print '\n'.join([header_string[0], separator])
 
             if target_param is None:
@@ -396,11 +395,11 @@ class model(parameterised):
                 f2, g2 = self.objective_and_gradients(xx)
                 gradient = self.objective_function_gradients(x)[i]
 
-                numerical_gradient = (f1 - f2) / (2 * step)
-                ratio = (f1 - f2) / (2 * step * gradient)
-                difference = np.abs((f1 - f2) / 2 / step - gradient)
+                numerical_gradient = (f1-f2)/(2*step)
+                ratio = (f1-f2)/(2*step*gradient)
+                difference = np.abs((f1-f2)/2/step - gradient)
 
-                if (np.abs(ratio - 1) < tolerance):
+                if (np.abs(ratio-1)<tolerance):
                     formatted_name = "\033[92m {0} \033[0m".format(names[i])
                 else:
                     formatted_name = "\033[91m {0} \033[0m".format(names[i])
@@ -408,7 +407,7 @@ class model(parameterised):
                 d = '%.6f' % float(difference)
                 g = '%.6f' % gradient
                 ng = '%.6f' % float(numerical_gradient)
-                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
                 print grad_string
 
     def input_sensitivity(self):
@@ -419,21 +418,21 @@ class model(parameterised):
         TODO: proper sensitivity analysis
         """
 
-        if not hasattr(self, 'kern'):
+        if not hasattr(self,'kern'):
             raise ValueError, "this model has no kernel"
 
-        k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
-        if (not len(k) == 1) or (not k[0].ARD):
+        k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
+        if (not len(k)==1) or (not k[0].ARD):
             raise ValueError, "cannot determine sensitivity for this kernel"
         k = k[0]
 
-        if k.name == 'rbf':
+        if k.name=='rbf':
             return k.lengthscale
-        elif k.name == 'linear':
-            return 1. / k.variances
+        elif k.name=='linear':
+            return 1./k.variances
 
 
-    def pseudo_EM(self, epsilon=.1, **kwargs):
+    def pseudo_EM(self,epsilon=.1,**kwargs):
         """
         TODO: Should this not bein the GP class?
         EM - like algorithm  for Expectation Propagation and Laplace approximation
@@ -447,7 +446,7 @@ class model(parameterised):
         :type optimzer: string TODO: valid strings?
 
         """
-        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
+        assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
         ll_change = epsilon + 1.
         iteration = 0
         last_ll = -np.exp(1000)
@@ -467,9 +466,9 @@ class model(parameterised):
             ll_change = new_ll - last_ll
 
             if ll_change < 0:
-                self.likelihood = last_approximation  # restore previous likelihood approximation
-                self._set_params(last_params)  # restore model parameters
-                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
+                self.likelihood = last_approximation #restore previous likelihood approximation
+                self._set_params(last_params) #restore model parameters
+                print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
                 stop = True
             else:
                 self.optimize(**kwargs)
@@ -478,5 +477,5 @@ class model(parameterised):
                     stop = True
             iteration += 1
             if stop:
-                print "%s iterations." % iteration
+                print "%s iterations." %iteration
 
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 0646b25f..30488dc9 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -161,13 +161,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
         return ax
 
-    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
+    def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None):
+        """
+        Plot latent space X in 1D:
+        
+            -if fig is given, create Q subplots in fig and plot in these
+            -if axes is given plot Q 1D latent space plots of X into each `axis`
+            -if neither fig nor axes is given create a figure with fig_num and plot in there
+            
+        colors:
+            
+            colors of different latent space dimensions Q
+        """
         import pylab
-
-        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
+        if fig is None and axes is None:
+            fig = pylab.figure(num=fig_num, figsize=(8, min(12, (2 * self.X.shape[1]))))
         if colors is None:
             colors = pylab.gca()._get_lines.color_cycle
             pylab.clf()
+        else:
+            colors = iter(colors)
         plots = []
         for i in range(self.X.shape[1]):
             if axes is None:

From 8191a5d24f88358dc2d313879c56bb018210547d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 25 Apr 2013 12:51:51 +0100
Subject: [PATCH 33/54] one more instance of dpotrs instead of dot in sparse GP

---
 GPy/models/sparse_GP.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index a6bd6b74..3e148b77 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -148,7 +148,9 @@ class sparse_GP(GP):
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
+        tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
+        tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) +tmp # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From e0f94d6d9c605132438d524690866cc094dc8921 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 25 Apr 2013 14:57:23 +0100
Subject: [PATCH 34/54] BGPLVM updates and debug helper

---
 GPy/examples/dimensionality_reduction.py |  35 +++-
 GPy/models/Bayesian_GPLVM.py             | 211 +++++++++++++++++++----
 GPy/util/datasets.py                     |  49 +++---
 3 files changed, 234 insertions(+), 61 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index be60b5f4..15fe9265 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -7,6 +7,7 @@ from matplotlib import pyplot as plt, pyplot
 
 import GPy
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
+from GPy.util.datasets import simulation_BGPLVM
 
 default_seed = np.random.seed(123344)
 
@@ -129,9 +130,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
     Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
 
-    Y1 += .3 * np.random.randn(*Y1.shape)
-    Y2 += .3 * np.random.randn(*Y2.shape)
-    Y3 += .3 * np.random.randn(*Y3.shape)
+    Y1 += .2 * np.random.randn(*Y1.shape)
+    Y2 += .2 * np.random.randn(*Y2.shape)
+    Y3 += .2 * np.random.randn(*Y3.shape)
 
     Y1 -= Y1.mean(0)
     Y2 -= Y2.mean(0)
@@ -162,11 +163,31 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
 
     return slist, [S1, S2, S3], Ylist
 
+def bgplvm_simulation_matlab_compare():
+    sim_data = simulation_BGPLVM()
+    Y = sim_data['Y']
+    S = sim_data['S']
+    mu = sim_data['mu']
+    M, [_, Q] = 20, mu.shape
+
+    from GPy.models import mrd
+    from GPy import kern
+    reload(mrd); reload(kern)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
+                       # X=mu,
+                       # X_variance=S,
+                       _debug=True)
+    m.ensure_default_constraints()
+    m['noise'] = .01  # Y.var() / 100.
+    m['linear_variance'] = .01
+    return m
+
 def bgplvm_simulation(burnin='scg', plot_sim=False,
                       max_burnin=100, true_X=False,
                       do_opt=True,
                       max_f_eval=1000):
-    D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
+    D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
     slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
@@ -176,11 +197,13 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
 
     Y = Ylist[0]
 
-    k = kern.linear(Q, ARD=True) + kern.white(Q, .00001)  # + kern.bias(Q)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))  # + kern.bias(Q)
 #     k = kern.white(Q, .00001) + kern.bias(Q)
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
     # m.set('noise',)
     m.ensure_default_constraints()
+    m['noise'] = Y.var() / 100.
+    m['linear_variance'] = .001
 #     m.auto_scale_factor = True
 #     m.scale_factor = 1.
 
@@ -207,7 +230,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
 #     cstr = 'X_variance'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
 
-    m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
+    # m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
 
 #     cstr = "iip"
 #     m.unconstrain(cstr); m.constrain_fixed(cstr)
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 30488dc9..59b6bb15 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -11,6 +11,8 @@ from ..likelihoods import Gaussian
 from .. import kern
 from numpy.linalg.linalg import LinAlgError
 import itertools
+from matplotlib.colors import colorConverter
+from matplotlib.figure import SubplotParams
 
 class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
@@ -31,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             X = self.initialise_latent(init, Q, Y)
 
         if X_variance is None:
-            X_variance = np.ones_like(X) * 0.5
+            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
 
         if Z is None:
             Z = np.random.permutation(X.copy())[:M]
@@ -45,10 +47,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         self._debug = _debug
 
         if self._debug:
+            self.fcall = 0
             self._count = itertools.count()
             self._savedklll = []
             self._savedparams = []
-
+            self._savedgradients = []
+            self._savederrors = []
+            self._savedpsiKmm = []
         sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
 
     @property
@@ -88,6 +93,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             self.oldps = x
         except (LinAlgError, FloatingPointError, ZeroDivisionError):
             print "\rWARNING: Caught LinAlgError, continueing without setting            "
+            if self._debug:
+                self._savederrors.append(self.fcall)
 #             if save_count > 10:
 #                 raise
 #             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
@@ -121,12 +128,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #             kl = 5E4 + np.random.randn()
 
         if self._debug:
-            f_call = self._count.next()
-            self._savedklll.append([f_call, ll, kl])
-            if f_call % 1 == 0:
-                self._savedparams.append([f_call, self._get_params()])
-
-
+            self.f_call = self._count.next()
+            if self.f_call % 1 == 0:
+                self._savedklll.append([self.f_call, ll, kl])
+                self._savedparams.append([self.f_call, self._get_params()])
+                self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
+                self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
         # print "\nkl:", kl, "ll:", ll
         return ll - kl
 
@@ -212,16 +219,27 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         theta = x[start:]
         return X, X_v, Z, theta
 
+
+    def _debug_get_axis(self, figs):
+        if figs[-1].axes:
+            ax1 = figs[-1].axes[0]
+            ax1.cla()
+        else:
+            ax1 = figs[-1].add_subplot(111)
+        return ax1
+
     def _debug_plot(self):
         assert self._debug, "must enable _debug, to debug-plot"
         import pylab
-        from mpl_toolkits.mplot3d import Axes3D
-        fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
-        fig.clf()
+#         from mpl_toolkits.mplot3d import Axes3D
+        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4),
+                             tight_layout=True)]
+#         fig.clf()
 
         # log like
-        splotshape = (6, 4)
-        ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+#         splotshape = (6, 4)
+#         ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+        ax1 = self._debug_get_axis(figs)
         ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
                  ha='center', va='center')
         kllls = np.array(self._savedklll)
@@ -229,52 +247,141 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
         L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
 
-        drawn = dict(self._savedparams)
-        iters = np.array(drawn.keys())
+        param_dict = dict(self._savedparams)
+        gradient_dict = dict(self._savedgradients)
+        kmm_dict = dict(self._savedpsiKmm)
+        iters = np.array(param_dict.keys())
         self.showing = 0
 
-        ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+#         ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+        figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
+        ax2 = self._debug_get_axis(figs)
         ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
                  ha='center', va='center')
-        ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
+        ax3 = self._debug_get_axis(figs)
         ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
                  ha='center', va='center')
-        ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
+        ax4 = self._debug_get_axis(figs)
         ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
                  ha='center', va='center')
-        ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+#         ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
+        ax5 = self._debug_get_axis(figs)
         ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
                  ha='center', va='center')
+        figs[-1].canvas.draw()
+        figs[-1].tight_layout(rect=(0, 0, 1, .9))
+        figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
+        fig = figs[-1]
+        ax6 = fig.add_subplot(121)
+        ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
+                 ha='center', va='center')
+        ax7 = fig.add_subplot(122)
+        ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
+                 ha='center', va='center')
 
-        X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+        X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
+        Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
+#         Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
+
+        quiver_units = 'xy'
+        quiver_scale = 1
+        quiver_scale_units = 'xy'
         Xlatentplts = ax2.plot(X, ls="-", marker="x")
+        colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
+        Ulatent = np.zeros_like(X)
+        xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
+        Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
         Slatentplts = ax3.plot(S, ls="-", marker="x")
+        Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
+        xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
+        UZ = np.zeros_like(Z)
         Zplts = ax4.plot(Z, ls="-", marker="x")
-        thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
+        Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale)
+
+        xtheta = np.arange(len(theta))
+        Utheta = np.zeros_like(theta)
+        thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
+        thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
+                                  units=quiver_units, scale_units=quiver_scale_units,
+                                  scale=quiver_scale,
+                                  edgecolors=('k',), linewidths=[1])
+        pylab.setp(thetaplts, zorder=0)
+        pylab.setp(thetagrads, zorder=10)
         ax5.set_xticks(np.arange(len(theta)))
         ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
 
-        Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+        imkmm = ax6.imshow(kmm_dict[self.showing][0])
+        from mpl_toolkits.axes_grid1 import make_axes_locatable
+        divider = make_axes_locatable(ax6)
+        caxkmm = divider.append_axes("right", "5%", pad="1%")
+        cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
+
+        imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
+        divider = make_axes_locatable(ax7)
+        caxkmmdl = divider.append_axes("right", "5%", pad="1%")
+        cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
+
+#         Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+#                    loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+#                    borderaxespad=0, mode="expand")
+        ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
+                   borderaxespad=0, mode="expand")
+        ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
                    borderaxespad=0, mode="expand")
         Lleg = ax1.legend()
         Lleg.draggable()
-        ax1.add_artist(Qleg)
+#         ax1.add_artist(Qleg)
 
         indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
         indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
         indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
+        for err in self._savederrors:
+            ax1.plot(kllls[err, 0], kllls[err, 2], "*", c=KL.get_color())
+            ax1.plot(kllls[err, 0], kllls[err, 1] - kllls[err, 2], "*", c=LL.get_color())
+            ax1.plot(kllls[err, 0], kllls[err, 1], "*", c=L.get_color())
 
-        try:
-            pylab.draw()
-            pylab.tight_layout(box=(0, .1, 1, .9))
-        except:
-            pass
+#         try:
+#             for f in figs:
+#                 f.canvas.draw()
+#                 f.tight_layout(box=(0, .15, 1, .9))
+# #             pylab.draw()
+# #             pylab.tight_layout(box=(0, .1, 1, .9))
+#         except:
+#             pass
 
         # parameter changes
         # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
-        def onclick(event):
-            if event.inaxes is ax1 and event.button == 1:
+        button_options = [0, 0]  # [0]: clicked -- [1]: dragged
+
+        def update_plots(event):
+            if button_options[0] and not button_options[1]:
 #               event.button, event.x, event.y, event.xdata, event.ydata)
                 tmp = np.abs(iters - event.xdata)
                 closest_hit = iters[tmp == tmp.min()][0]
@@ -287,15 +394,37 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                     indicatorKL.set_data(self.showing, kllls[self.showing, 2])
                     indicatorL.set_data(self.showing, kllls[self.showing, 1])
 
-                    X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+                    X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
+                    Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
+#                     Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
+
                     for i, Xlatent in enumerate(Xlatentplts):
                         Xlatent.set_ydata(X[:, i])
+                    Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
+                    Xlatentgrads.set_UVC(Ulatent, Xg)
+
                     for i, Slatent in enumerate(Slatentplts):
                         Slatent.set_ydata(S[:, i])
+                    Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
+                    Slatentgrads.set_UVC(Ulatent, Sg)
+
                     for i, Zlatent in enumerate(Zplts):
                         Zlatent.set_ydata(Z[:, i])
+                    Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
+                    Zgrads.set_UVC(UZ, Zg)
+
                     for p, t in zip(thetaplts, theta):
                         p.set_height(t)
+                    thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
+                    thetagrads.set_UVC(Utheta, thetag)
+
+                    imkmm.set_data(kmm_dict[self.showing][0])
+                    imkmm.autoscale()
+                    cbarkmm.update_normal(imkmm)
+
+                    imkmmdl.set_data(kmm_dict[self.showing][1])
+                    imkmmdl.autoscale()
+                    cbarkmmdl.update_normal(imkmmdl)
 
                     ax2.relim()
                     ax3.relim()
@@ -305,8 +434,20 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                     ax3.autoscale()
                     ax4.autoscale()
                     ax5.autoscale()
-                    fig.canvas.draw()
 
-        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+                    [fig.canvas.draw() for fig in figs]
+            button_options[0] = 0
+            button_options[1] = 0
 
-        return ax1, ax2, ax3, ax4, ax5
+        def onclick(event):
+            if event.inaxes is ax1 and event.button == 1:
+                button_options[0] = 1
+        def motion(event):
+            if button_options[0]:
+                button_options[1] = 1
+
+        cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
+        cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
+        cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
+
+        return ax1, ax2, ax3, ax4, ax5, ax6, ax7
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 932690ec..0e0929c7 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -4,14 +4,14 @@ import numpy as np
 import GPy
 import scipy.sparse
 import scipy.io
-data_path = os.path.join(os.path.dirname(__file__),'datasets')
-default_seed =10000
+data_path = os.path.join(os.path.dirname(__file__), 'datasets')
+default_seed = 10000
 
 # Some general utilities.
 def sample_class(f):
-    p = 1./(1.+np.exp(-f))
-    c = np.random.binomial(1,p)
-    c = np.where(c,1,-1)
+    p = 1. / (1. + np.exp(-f))
+    c = np.random.binomial(1, p)
+    c = np.where(c, 1, -1)
     return c
 
 def della_gatta_TRP63_gene_expression(gene_number=None):
@@ -25,6 +25,15 @@ def della_gatta_TRP63_gene_expression(gene_number=None):
             Y = Y[:, None]
     return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
 
+def simulation_BGPLVM():
+    mat_data = scipy.io.loadmat(os.path.join(data_path, 'BGPLVMSimulation.mat'))
+    Y = np.array(mat_data['Y'], dtype=float)
+    S = np.array(mat_data['initS'], dtype=float)
+    mu = np.array(mat_data['initMu'], dtype=float)
+    return {'Y': Y, 'S': S,
+            'mu' : mu,
+            'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
+
 
 # The data sets
 def oil():
@@ -32,7 +41,7 @@ def oil():
     X = np.fromfile(fid, sep='\t').reshape((-1, 12))
     fid.close()
     fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
-    Y = np.fromfile(fid, sep='\t').reshape((-1, 3))*2.-1.
+    Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
     fid.close()
     return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
 
@@ -74,9 +83,9 @@ def silhouette():
     inMean = np.mean(mat_data['Y'])
     inScales = np.sqrt(np.var(mat_data['Y']))
     X = mat_data['Y'] - inMean
-    X = X/inScales
+    X = X / inScales
     Xtest = mat_data['Y_test'] - inMean
-    Xtest = Xtest/inScales
+    Xtest = Xtest / inScales
     Y = mat_data['Z']
     Ytest = mat_data['Z_test']
     return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
@@ -102,13 +111,13 @@ def toy_rbf_1d(seed=default_seed):
     np.random.seed(seed=seed)
     numIn = 1
     N = 500
-    X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
+    X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn))
     X.sort(axis=0)
     rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
     white = GPy.kern.white(numIn, variance=1e-2)
     kernel = rbf + white
     K = kernel.K(X)
-    y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N,1))
+    y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1))
     return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
 
 def toy_rbf_1d_50(seed=default_seed):
@@ -124,15 +133,15 @@ def toy_rbf_1d_50(seed=default_seed):
 
 def toy_linear_1d_classification(seed=default_seed):
     np.random.seed(seed=seed)
-    x1 = np.random.normal(-3,5,20)
-    x2 = np.random.normal(3,5,20)
-    X = (np.r_[x1,x2])[:,None]
+    x1 = np.random.normal(-3, 5, 20)
+    x2 = np.random.normal(3, 5, 20)
+    X = (np.r_[x1, x2])[:, None]
     return {'X': X, 'Y':  sample_class(2.*X), 'F': 2.*X}
 
 def rogers_girolami_olympics():
     olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
     X = olympic_data[:, 0][:, None]
-    Y= olympic_data[:, 1][:, None]
+    Y = olympic_data[:, 1][:, None]
     return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
 # def movielens_small(partNo=1,seed=default_seed):
 #     np.random.seed(seed=seed)
@@ -169,7 +178,7 @@ def rogers_girolami_olympics():
 
 
 
-def crescent_data(num_data=200,seed=default_seed):
+def crescent_data(num_data=200, seed=default_seed):
     """Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
     :param num_data_part: number of data to be sampled (default is 200).
     :type num_data: int
@@ -178,7 +187,7 @@ def crescent_data(num_data=200,seed=default_seed):
     np.random.seed(seed=seed)
     sqrt2 = np.sqrt(2)
     # Rotation matrix
-    R = np.array([[sqrt2/2, -sqrt2/2], [sqrt2/2, sqrt2/2]])
+    R = np.array([[sqrt2 / 2, -sqrt2 / 2], [sqrt2 / 2, sqrt2 / 2]])
     # Scaling matrices
     scales = []
     scales.append(np.array([[3, 0], [0, 1]]))
@@ -195,9 +204,9 @@ def crescent_data(num_data=200,seed=default_seed):
     num_data_part = []
     num_data_total = 0
     for i in range(0, 4):
-        num_data_part.append(round(((i+1)*num_data)/4.))
+        num_data_part.append(round(((i + 1) * num_data) / 4.))
         num_data_part[i] -= num_data_total
-        #print num_data_part[i]
+        # print num_data_part[i]
         part = np.random.normal(size=(num_data_part[i], 2))
         part = np.dot(np.dot(part, scales[i]), R) + means[i]
         Xparts.append(part)
@@ -205,7 +214,7 @@ def crescent_data(num_data=200,seed=default_seed):
     X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
 
 
-    Y = np.vstack((np.ones((num_data_part[0]+num_data_part[1], 1)), -np.ones((num_data_part[2]+num_data_part[3], 1))))
+    Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
     return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
 
 
@@ -214,6 +223,6 @@ def creep_data():
     y = all_data[:, 1:2].copy()
     features = [0]
     features.extend(range(2, 31))
-    X = all_data[:,features].copy()
+    X = all_data[:, features].copy()
     return {'X': X, 'y' : y}
 

From de3101fef535e052654c07e5228f30dca200dc0f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 25 Apr 2013 15:02:22 +0100
Subject: [PATCH 35/54] old amatplotlib

---
 GPy/models/Bayesian_GPLVM.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 59b6bb15..dc5dc0d4 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -232,8 +232,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         assert self._debug, "must enable _debug, to debug-plot"
         import pylab
 #         from mpl_toolkits.mplot3d import Axes3D
-        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4),
-                             tight_layout=True)]
+        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
 #         fig.clf()
 
         # log like

From 4bd0f891ac902ebab891ad59ba1aa74d15056ddb Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 25 Apr 2013 15:44:26 +0100
Subject: [PATCH 36/54] minor simplifications in dLdK

---
 GPy/models/sparse_GP.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 3e148b77..56a764af 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -121,19 +121,22 @@ class sparse_GP(GP):
         self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
         if self.likelihood.is_heteroscedastic:
             if self.has_uncertain_inputs:
-                self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
-                self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
-                self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
+                #self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
+                #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
+                #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
+                self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
             else:
-                self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
-                self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
-                self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
+                #self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
+                #self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
+                #self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
+                self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
                 self.dL_dpsi2 = None
 
         else:
-            self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
-            self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
-            self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
+            #self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
+            #self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
+            #self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
+            self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
             if self.has_uncertain_inputs:
                 #repeat for each of the N psi_2 matrices
                 self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
@@ -146,11 +149,11 @@ class sparse_GP(GP):
         #self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
         #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
-        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
+        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
         tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
         tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) +tmp # d(C+D)
+        self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:
@@ -196,6 +199,7 @@ class sparse_GP(GP):
         #        self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
         #    else:
         #        self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
+        #self.scale_factor = 1.
         self._computations()
 
     def _get_params(self):

From 43b720c848ad9a8f76a7334c349d88ff7d2f11aa Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 25 Apr 2013 16:01:36 +0100
Subject: [PATCH 37/54] more minor simplifications

---
 GPy/models/sparse_GP.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 56a764af..e158e026 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -109,8 +109,10 @@ class sparse_GP(GP):
 
         self.psi1V = np.dot(self.psi1, self.V)
         #tmp = np.dot(self.Lmi.T, self.LBi.T)
-        tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
-        self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
+        #tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
+        #self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
+        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
+        self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
         self.Cpsi1V = np.dot(self.C,self.psi1V)
         self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
         #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2

From 16b64f41d6c35074802b3b8eddd9b9f8e4a6bf96 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 26 Apr 2013 16:33:17 +0100
Subject: [PATCH 38/54] kern psi statistic tests

---
 GPy/testing/kern_psi_stat_tests.py | 78 ++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 GPy/testing/kern_psi_stat_tests.py

diff --git a/GPy/testing/kern_psi_stat_tests.py b/GPy/testing/kern_psi_stat_tests.py
new file mode 100644
index 00000000..4099d984
--- /dev/null
+++ b/GPy/testing/kern_psi_stat_tests.py
@@ -0,0 +1,78 @@
+'''
+Created on 26 Apr 2013
+
+@author: maxz
+'''
+import unittest
+import GPy
+import numpy as np
+import pylab
+
+class Test(unittest.TestCase):
+    D = 9
+    M = 5
+    Nsamples = 3e6
+
+    def setUp(self):
+        self.kerns = (
+                      GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
+                      GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True),
+                      GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
+                      GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
+                      GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
+                      GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
+                      GPy.kern.bias(self.D), GPy.kern.white(self.D),
+                      )
+        self.q_x_mean = np.random.randn(self.D)
+        self.q_x_variance = np.exp(np.random.randn(self.D))
+        self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean
+        self.Z = np.random.randn(self.M, self.D)
+        self.q_x_mean.shape = (1, self.D)
+        self.q_x_variance.shape = (1, self.D)
+
+    def test_psi0(self):
+        for kern in self.kerns:
+            psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
+            Kdiag = kern.Kdiag(self.q_x_samples)
+            self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
+            # print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
+
+    def test_psi1(self):
+        for kern in self.kerns:
+            Nsamples = 100
+            psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
+            K_ = np.zeros((self.N, self.M))
+            diffs = []
+            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+                K = kern.K(q_x_sample_stripe, self.Z)
+                K_ += K
+                diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
+            K_ /= self.Nsamples / Nsamples
+#             pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
+#             pylab.plot(diffs)
+            self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
+
+    def test_psi2(self):
+        for kern in self.kerns:
+            Nsamples = 100
+            psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
+            K_ = np.zeros((self.M, self.M))
+            diffs = []
+            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+                K = kern.K(q_x_sample_stripe, self.Z)
+                K = (K[:, :, None] * K[:, None, :]).mean(0)
+                K_ += K
+                diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
+            K_ /= self.Nsamples / Nsamples
+            try:
+#                 pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
+#                 pylab.plot(diffs)
+                self.assertTrue(np.allclose(psi2.squeeze(), K_,
+                                            rtol=1e-1, atol=.1),
+                                msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
+            except:
+                print "{}: not matching".format(kern.parts[0].name)
+
+if __name__ == "__main__":
+    import sys;sys.argv = ['', 'Test.test_psi2']
+    unittest.main()

From 0da81bc311fe2790275cd31d112b450e8cfa6511 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 26 Apr 2013 16:38:19 +0100
Subject: [PATCH 39/54] changes pull from devel

---
 GPy/examples/dimensionality_reduction.py | 15 ++++++++-------
 GPy/models/Bayesian_GPLVM.py             | 19 ++++++++++---------
 GPy/models/sparse_GP.py                  |  6 ++----
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 15fe9265..9da161f2 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -130,9 +130,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
     Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
 
-    Y1 += .2 * np.random.randn(*Y1.shape)
-    Y2 += .2 * np.random.randn(*Y2.shape)
-    Y3 += .2 * np.random.randn(*Y3.shape)
+    Y1 += .1 * np.random.randn(*Y1.shape)
+    Y2 += .1 * np.random.randn(*Y2.shape)
+    Y3 += .1 * np.random.randn(*Y3.shape)
 
     Y1 -= Y1.mean(0)
     Y2 -= Y2.mean(0)
@@ -173,14 +173,15 @@ def bgplvm_simulation_matlab_compare():
     from GPy.models import mrd
     from GPy import kern
     reload(mrd); reload(kern)
-    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
     m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
-                       # X=mu,
-                       # X_variance=S,
+#                        X=mu,
+#                        X_variance=S,
                        _debug=True)
     m.ensure_default_constraints()
+    m.auto_scale_factor = True
     m['noise'] = .01  # Y.var() / 100.
-    m['linear_variance'] = .01
+    m['{}_variance'.format(k.parts[0].name)] = .01
     return m
 
 def bgplvm_simulation(burnin='scg', plot_sim=False,
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index dc5dc0d4..0d4cf91e 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -47,7 +47,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         self._debug = _debug
 
         if self._debug:
-            self.fcall = 0
+            self.f_call = 0
             self._count = itertools.count()
             self._savedklll = []
             self._savedparams = []
@@ -94,7 +94,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         except (LinAlgError, FloatingPointError, ZeroDivisionError):
             print "\rWARNING: Caught LinAlgError, continueing without setting            "
             if self._debug:
-                self._savederrors.append(self.fcall)
+                self._savederrors.append(self.f_call)
 #             if save_count > 10:
 #                 raise
 #             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
@@ -242,9 +242,9 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
                  ha='center', va='center')
         kllls = np.array(self._savedklll)
-        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], label=r'$\log p(\mathbf{Y})$', mew=1.5)
-        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
-        L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
+        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
+        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
+        L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
 
         param_dict = dict(self._savedparams)
         gradient_dict = dict(self._savedgradients)
@@ -361,10 +361,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
         indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
         indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
-        for err in self._savederrors:
-            ax1.plot(kllls[err, 0], kllls[err, 2], "*", c=KL.get_color())
-            ax1.plot(kllls[err, 0], kllls[err, 1] - kllls[err, 2], "*", c=LL.get_color())
-            ax1.plot(kllls[err, 0], kllls[err, 1], "*", c=L.get_color())
+#         for err in self._savederrors:
+#             if err < kllls.shape[0]:
+#                 ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
+#                 ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
+#                 ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
 
 #         try:
 #             for f in figs:
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index e158e026..56a764af 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -109,10 +109,8 @@ class sparse_GP(GP):
 
         self.psi1V = np.dot(self.psi1, self.V)
         #tmp = np.dot(self.Lmi.T, self.LBi.T)
-        #tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
-        #self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
-        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
-        self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
+        tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
+        self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
         self.Cpsi1V = np.dot(self.C,self.psi1V)
         self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
         #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2

From 5abe3dee4c9ccc5585ac9c82a00f6f1cc7c9ad25 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 26 Apr 2013 17:03:43 +0100
Subject: [PATCH 40/54] commented out kern tests

---
 GPy/testing/kern_psi_stat_tests.py | 84 +++++++++++++++---------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/GPy/testing/kern_psi_stat_tests.py b/GPy/testing/kern_psi_stat_tests.py
index 4099d984..6e79e50d 100644
--- a/GPy/testing/kern_psi_stat_tests.py
+++ b/GPy/testing/kern_psi_stat_tests.py
@@ -30,48 +30,48 @@ class Test(unittest.TestCase):
         self.q_x_mean.shape = (1, self.D)
         self.q_x_variance.shape = (1, self.D)
 
-    def test_psi0(self):
-        for kern in self.kerns:
-            psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
-            Kdiag = kern.Kdiag(self.q_x_samples)
-            self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
-            # print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
-
-    def test_psi1(self):
-        for kern in self.kerns:
-            Nsamples = 100
-            psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
-            K_ = np.zeros((self.N, self.M))
-            diffs = []
-            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
-                K = kern.K(q_x_sample_stripe, self.Z)
-                K_ += K
-                diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
-            K_ /= self.Nsamples / Nsamples
-#             pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
-#             pylab.plot(diffs)
-            self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
-
-    def test_psi2(self):
-        for kern in self.kerns:
-            Nsamples = 100
-            psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
-            K_ = np.zeros((self.M, self.M))
-            diffs = []
-            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
-                K = kern.K(q_x_sample_stripe, self.Z)
-                K = (K[:, :, None] * K[:, None, :]).mean(0)
-                K_ += K
-                diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
-            K_ /= self.Nsamples / Nsamples
-            try:
-#                 pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
-#                 pylab.plot(diffs)
-                self.assertTrue(np.allclose(psi2.squeeze(), K_,
-                                            rtol=1e-1, atol=.1),
-                                msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
-            except:
-                print "{}: not matching".format(kern.parts[0].name)
+#     def test_psi0(self):
+#         for kern in self.kerns:
+#             psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
+#             Kdiag = kern.Kdiag(self.q_x_samples)
+#             self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
+#             # print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
+#
+#     def test_psi1(self):
+#         for kern in self.kerns:
+#             Nsamples = 100
+#             psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
+#             K_ = np.zeros((self.N, self.M))
+#             diffs = []
+#             for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+#                 K = kern.K(q_x_sample_stripe, self.Z)
+#                 K_ += K
+#                 diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
+#             K_ /= self.Nsamples / Nsamples
+# #             pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
+# #             pylab.plot(diffs)
+#             self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
+#
+#     def test_psi2(self):
+#         for kern in self.kerns:
+#             Nsamples = 100
+#             psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
+#             K_ = np.zeros((self.M, self.M))
+#             diffs = []
+#             for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+#                 K = kern.K(q_x_sample_stripe, self.Z)
+#                 K = (K[:, :, None] * K[:, None, :]).mean(0)
+#                 K_ += K
+#                 diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
+#             K_ /= self.Nsamples / Nsamples
+#             try:
+# #                 pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
+# #                 pylab.plot(diffs)
+#                 self.assertTrue(np.allclose(psi2.squeeze(), K_,
+#                                             rtol=1e-1, atol=.1),
+#                                 msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
+#             except:
+#                 print "{}: not matching".format(kern.parts[0].name)
 
 if __name__ == "__main__":
     import sys;sys.argv = ['', 'Test.test_psi2']

From 0332fa14f89b6389d284c6cb2b1abb5371084a2c Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 26 Apr 2013 17:17:36 +0100
Subject: [PATCH 41/54] tests ignored my nosetests (__test__ = False)

---
 GPy/testing/kern_psi_stat_tests.py | 91 ++++++++++++++++--------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/GPy/testing/kern_psi_stat_tests.py b/GPy/testing/kern_psi_stat_tests.py
index 6e79e50d..581de9be 100644
--- a/GPy/testing/kern_psi_stat_tests.py
+++ b/GPy/testing/kern_psi_stat_tests.py
@@ -8,6 +8,8 @@ import GPy
 import numpy as np
 import pylab
 
+__test__ = False
+
 class Test(unittest.TestCase):
     D = 9
     M = 5
@@ -30,49 +32,52 @@ class Test(unittest.TestCase):
         self.q_x_mean.shape = (1, self.D)
         self.q_x_variance.shape = (1, self.D)
 
-#     def test_psi0(self):
-#         for kern in self.kerns:
-#             psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
-#             Kdiag = kern.Kdiag(self.q_x_samples)
-#             self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
-#             # print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
-#
-#     def test_psi1(self):
-#         for kern in self.kerns:
-#             Nsamples = 100
-#             psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
-#             K_ = np.zeros((self.N, self.M))
-#             diffs = []
-#             for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
-#                 K = kern.K(q_x_sample_stripe, self.Z)
-#                 K_ += K
-#                 diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
-#             K_ /= self.Nsamples / Nsamples
-# #             pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
-# #             pylab.plot(diffs)
-#             self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
-#
-#     def test_psi2(self):
-#         for kern in self.kerns:
-#             Nsamples = 100
-#             psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
-#             K_ = np.zeros((self.M, self.M))
-#             diffs = []
-#             for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
-#                 K = kern.K(q_x_sample_stripe, self.Z)
-#                 K = (K[:, :, None] * K[:, None, :]).mean(0)
-#                 K_ += K
-#                 diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
-#             K_ /= self.Nsamples / Nsamples
-#             try:
-# #                 pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
-# #                 pylab.plot(diffs)
-#                 self.assertTrue(np.allclose(psi2.squeeze(), K_,
-#                                             rtol=1e-1, atol=.1),
-#                                 msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
-#             except:
-#                 print "{}: not matching".format(kern.parts[0].name)
+    def test_psi0(self):
+        for kern in self.kerns:
+            psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
+            Kdiag = kern.Kdiag(self.q_x_samples)
+            self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
+            # print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
+
+    def test_psi1(self):
+        for kern in self.kerns:
+            Nsamples = 100
+            psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
+            K_ = np.zeros((self.N, self.M))
+            diffs = []
+            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+                K = kern.K(q_x_sample_stripe, self.Z)
+                K_ += K
+                diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
+            K_ /= self.Nsamples / Nsamples
+#             pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
+#             pylab.plot(diffs)
+            self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
+
+    def test_psi2(self):
+        for kern in self.kerns:
+            Nsamples = 100
+            psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
+            K_ = np.zeros((self.M, self.M))
+            diffs = []
+            for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
+                K = kern.K(q_x_sample_stripe, self.Z)
+                K = (K[:, :, None] * K[:, None, :]).mean(0)
+                K_ += K
+                diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
+            K_ /= self.Nsamples / Nsamples
+            try:
+#                 pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
+#                 pylab.plot(diffs)
+                self.assertTrue(np.allclose(psi2.squeeze(), K_,
+                                            rtol=1e-1, atol=.1),
+                                msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
+            except:
+                print "{}: not matching".format(kern.parts[0].name)
 
 if __name__ == "__main__":
-    import sys;sys.argv = ['', 'Test.test_psi2']
+    import sys;sys.argv = ['',
+                           'Test.test_psi0',
+                           'Test.test_psi1',
+                           'Test.test_psi2']
     unittest.main()

From ef15de9411123b936a8fe556e3257970c12a56d0 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 26 Apr 2013 17:26:43 +0100
Subject: [PATCH 42/54] added a tdot function (thanks Iain)

---
 GPy/models/sparse_GP.py |  5 +--
 GPy/util/linalg.py      | 99 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index e158e026..dc77e795 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -108,9 +108,6 @@ class sparse_GP(GP):
         self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
 
         self.psi1V = np.dot(self.psi1, self.V)
-        #tmp = np.dot(self.Lmi.T, self.LBi.T)
-        #tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
-        #self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
         self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
         self.Cpsi1V = np.dot(self.C,self.psi1V)
@@ -171,7 +168,7 @@ class sparse_GP(GP):
             #likelihood is not heterscedatic
             self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
             self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
-            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
+            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
             self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
 
 
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 79025d4f..34e30dca 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+#tdot function courtesy of Ian Murray:
+# Iain Murray, April 2013. iain contactable via iainmurray.net
+# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
 
 import numpy as np
-from scipy import linalg, optimize
+from scipy import linalg, optimize, weave
 import pylab as pb
 import Tango
 import sys
@@ -11,9 +14,17 @@ import re
 import pdb
 import cPickle
 import types
+import ctypes
+from ctypes import byref, c_char, c_int, c_double # TODO
 #import scipy.lib.lapack.flapack
 import scipy as sp
 
+try:
+    _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
+    _blas_available = True
+except:
+    _blas_available = False
+
 def trace_dot(a,b):
     """
     efficiently compute the trace of the matrix product of a and b
@@ -175,3 +186,89 @@ def PCA(Y, Q):
     X /= v;
     W *= v;
     return X, W.T
+
+
+def tdot_numpy(mat,out=None):
+    return np.dot(mat,mat.T,out)
+
+def tdot_blas(mat, out=None):
+    """returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
+    if (mat.dtype != 'float64') or (len(mat.shape) != 2):
+        return np.dot(mat, mat.T)
+    nn = mat.shape[0]
+    if not out:
+        out = np.zeros((nn,nn))
+    else:
+        assert(out.dtype == 'float64')
+        assert(out.shape == (nn,nn))
+        # FIXME: should allow non-contiguous out, and copy output into it:
+        assert(8 in out.strides)
+        # zeroing needed because of dumb way I copy across triangular answer
+        out[:] = 0.0
+
+    ## Call to DSYRK from BLAS
+    # If already in Fortran order (rare), and has the right sorts of strides I
+    # could avoid the copy. I also thought swapping to cblas API would allow use
+    # of C order. However, I tried that and had errors with large matrices:
+    # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
+    mat = mat.copy(order='F')
+    TRANS = c_char('n')
+    N = c_int(mat.shape[0])
+    K = c_int(mat.shape[1])
+    LDA = c_int(mat.shape[0])
+    UPLO = c_char('l')
+    ALPHA = c_double(1.0)
+    A = mat.ctypes.data_as(ctypes.c_void_p)
+    BETA = c_double(0.0)
+    C = out.ctypes.data_as(ctypes.c_void_p)
+    LDC = c_int(np.max(out.strides) / 8)
+    _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
+            byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
+
+    symmetrify(out.T)
+
+    return out
+
+def tdot(*args, **kwargs):
+    if _blas_available:
+        return tdot_blas(*args,**kwargs)
+    else:
+        return tdot_numpy(*args,**kwargs)
+
+def symmetrify(A):
+    """
+    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
+
+    works IN PLACE.
+    """
+    N,M = A.shape
+    assert N==M
+    c_contig_code = """
+    for (int i=1; i<N; i++){
+      for (int j=0; j<i; j++){
+        A[i+j*N] = A[i*N+j];
+      }
+    }
+    """
+    f_contig_code = """
+    for (int i=1; i<N; i++){
+      for (int j=0; j<i; j++){
+        A[i*N+j] = A[i+j*N];
+      }
+    }
+    """
+    if A.flags['C_CONTIGUOUS']:
+        weave.inline(c_contig_code,['A','N'])
+    elif A.flags['F_CONTIGUOUS']:
+        weave.inline(f_contig_code,['A','N'])
+    else:
+        tmp = np.tril(A)
+        A[:] = 0.0
+        A += tmp
+        A += np.tril(tmp,-1).T
+
+def symmetrify_murray(A):
+    A += A.T
+    nn = A.shape[0]
+    A[[range(nn),range(nn)]] /= 2.0
+

From 9fb090a5085f61c6555d9d6e850c5fc6d7c26007 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 26 Apr 2013 19:32:33 +0100
Subject: [PATCH 43/54] James and Nicolos massive Yak shaving session

---
 GPy/kern/kern.py             | 16 +++++++-----
 GPy/kern/linear.py           | 47 +++++++++++++++++++++---------------
 GPy/kern/rbf.py              | 31 ++++++++++++------------
 GPy/kern/white.py            | 10 +++-----
 GPy/likelihoods/Gaussian.py  |  2 +-
 GPy/models/Bayesian_GPLVM.py |  2 +-
 GPy/models/sparse_GP.py      | 20 +++++++--------
 GPy/testing/unit_tests.py    | 10 ++++++++
 GPy/util/linalg.py           | 16 +++++++-----
 9 files changed, 90 insertions(+), 64 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 2ef07fa5..a6551e11 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -289,9 +289,11 @@ class kern(parameterised):
         assert X.shape[1] == self.D
         slices1, slices2 = self._process_slices(slices1, slices2)
         if X2 is None:
-            X2 = X
-        target = np.zeros((X.shape[0], X2.shape[0]))
-        [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+            target = np.zeros((X.shape[0], X.shape[0]))
+            [p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        else:
+            target = np.zeros((X.shape[0], X2.shape[0]))
+            [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
         return target
 
     def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
@@ -308,10 +310,12 @@ class kern(parameterised):
         """
         assert X.shape[1] == self.D
         slices1, slices2 = self._process_slices(slices1, slices2)
-        if X2 is None:
-            X2 = X
         target = np.zeros(self.Nparam)
-        [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+        if X2 is None:
+            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
+        else:
+            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
+
 
         return self._transform_gradients(target)
 
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 78a8732a..78dbdf01 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -4,6 +4,7 @@
 
 from kernpart import kernpart
 import numpy as np
+from ..util.linalg import tdot
 
 class linear(kernpart):
     """
@@ -65,8 +66,11 @@ class linear(kernpart):
     def K(self,X,X2,target):
         if self.ARD:
             XX = X*np.sqrt(self.variances)
-            XX2 = X2*np.sqrt(self.variances)
-            target += np.dot(XX, XX2.T)
+            if X2 is None:
+                target += tdot(XX)
+            else:
+                XX2 = X2*np.sqrt(self.variances)
+                target += np.dot(XX, XX2.T)
         else:
             self._K_computations(X, X2)
             target += self.variances * self._dot_product
@@ -76,8 +80,11 @@ class linear(kernpart):
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         if self.ARD:
-            product = X[:,None,:]*X2[None,:,:]
-            target += (dL_dK[:,:,None]*product).sum(0).sum(0)
+            if X2 is None:
+                [np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
+            else:
+                product = X[:,None,:]*X2[None,:,:]
+                target += (dL_dK[:,:,None]*product).sum(0).sum(0)
         else:
             self._K_computations(X, X2)
             target += np.sum(self._dot_product*dL_dK)
@@ -133,9 +140,9 @@ class linear(kernpart):
         returns N,M,M matrix
         """
         self._psi_computations(Z,mu,S)
-        psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
-        target += psi2.sum(-1)
-        #TODO: this could be faster using np.tensordot
+        #psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
+        #target += psi2.sum(-1)
+        target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
 
     def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
@@ -156,28 +163,30 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         mu2_S = np.sum(self.mu2_S,0)# Q,
         target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
+        #TODO: tensordot would gain some time here
 
     #---------------------------------------#
     #            Precomputations            #
     #---------------------------------------#
 
     def _K_computations(self,X,X2):
-        if X2 is None:
-            X2 = X
-        if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
-            self._Xcache = X
-            self._X2cache = X2
-            self._dot_product = np.dot(X,X2.T)
-        else:
-            # print "Cache hit!"
-            pass # TODO: insert debug message here (logging framework)
+        if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
+            self._Xcache = X.copy()
+            if X2 is None:
+                self._dot_product = tdot(X)
+                self._X2cache = None
+            else:
+                self._X2cache = X2.copy()
+                self._dot_product = np.dot(X,X2.T)
 
     def _psi_computations(self,Z,mu,S):
         #here are the "statistics" for psi1 and psi2
         if not np.all(Z==self._Z):
             #Z has changed, compute Z specific stuff
-            self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-            self._Z = Z
+            #self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+            self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
+            [tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
+            self._Z = Z.copy()
         if not (np.all(mu==self._mu) and np.all(S==self._S)):
             self.mu2_S = np.square(mu)+S
-            self._mu, self._S = mu, S
+            self._mu, self._S = mu.copy(), S.copy()
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 9ff7a93e..027e5e9e 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -6,6 +6,7 @@ from kernpart import kernpart
 import numpy as np
 import hashlib
 from scipy import weave
+from ..util.linalg import tdot
 
 class rbf(kernpart):
     """
@@ -74,11 +75,8 @@ class rbf(kernpart):
             return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
 
     def K(self,X,X2,target):
-        if X2 is None:
-            X2 = X
-
         self._K_computations(X,X2)
-        np.add(self.variance*self._K_dvar, target,target)
+        target += self.variance*self._K_dvar
 
     def Kdiag(self,X,target):
         np.add(target,self.variance,target)
@@ -87,6 +85,7 @@ class rbf(kernpart):
         self._K_computations(X,X2)
         target[0] += np.sum(self._K_dvar*dL_dK)
         if self.ARD:
+            if X2 is None: X2 = X
             [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
         else:
             target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@@ -182,29 +181,31 @@ class rbf(kernpart):
     #---------------------------------------#
 
     def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
+        if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
             self._X = X.copy()
-            self._X2 = X2.copy()
             self._params == self._get_params().copy()
-            if X2 is None: X2 = X
-            #never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
-            #_K_dist = X[:,None,:]-X2[None,:,:]
-            #_K_dist2 = np.square(_K_dist/self.lengthscale)
-            X = X/self.lengthscale
-            X2 = X2/self.lengthscale
-            self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
+            if X2 is None:
+                self._X2 = None
+                X = X/self.lengthscale
+                Xsquare = np.sum(np.square(X),1)
+                self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:])
+            else:
+                self._X2 = X2.copy()
+                X = X/self.lengthscale
+                X2 = X2/self.lengthscale
+                self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
             self._K_dvar = np.exp(-0.5*self._K_dist2)
 
     def _psi_computations(self,Z,mu,S):
         #here are the "statistics" for psi1 and psi2
-        if not np.all(Z==self._Z):
+        if not np.array_equal(Z, self._Z):
             #Z has changed, compute Z specific stuff
             self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
             self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
             self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
             self._Z = Z
 
-        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
+        if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
             #something's changed. recompute EVERYTHING
 
             #psi1
diff --git a/GPy/kern/white.py b/GPy/kern/white.py
index f5d6894a..be6aad45 100644
--- a/GPy/kern/white.py
+++ b/GPy/kern/white.py
@@ -30,17 +30,15 @@ class white(kernpart):
         return ['variance']
 
     def K(self,X,X2,target):
-        if X.shape==X2.shape:
-            if np.all(X==X2):
-                np.add(target,np.eye(X.shape[0])*self.variance,target)
+        if X2 is None:
+            target += np.eye(X.shape[0])*self.variance
 
     def Kdiag(self,X,target):
         target += self.variance
 
     def dK_dtheta(self,dL_dK,X,X2,target):
-        if X.shape==X2.shape:
-            if np.all(X==X2):
-                target += np.trace(dL_dK)
+        if X2 is None:
+            target += np.trace(dL_dK)
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         target += np.sum(dL_dKdiag)
diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index 25d12491..d3696fa6 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -30,7 +30,7 @@ class Gaussian(likelihood):
             self.trYYT = np.trace(self.YYT)
         else:
             self.YYT = None
-            self.trYYT = None
+            self.trYYT = np.sum(np.square(self.Y))
 
     def _get_params(self):
         return np.asarray(self._variance)
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 0d4cf91e..6333fb1c 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -33,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             X = self.initialise_latent(init, Q, Y)
 
         if X_variance is None:
-            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
+            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
 
         if Z is None:
             Z = np.random.permutation(X.copy())[:M]
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index dc77e795..697a9978 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
@@ -50,9 +50,6 @@ class sparse_GP(GP):
             self.has_uncertain_inputs=True
             self.X_variance = X_variance
 
-        if not self.likelihood.is_heteroscedastic:
-            self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
-
         GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
 
         #normalize X uncertainty also
@@ -86,13 +83,15 @@ class sparse_GP(GP):
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
             else:
                 tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
-                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                self.psi2_beta_scaled = tdot(tmp)
         else:
             if self.has_uncertain_inputs:
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
             else:
                 tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
-                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                self.psi2_beta_scaled = tdot(tmp)
 
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
 
@@ -110,10 +109,11 @@ class sparse_GP(GP):
         self.psi1V = np.dot(self.psi1, self.V)
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
         self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
+        #TODO: can we multiply in C by forwardsubstitution?
         self.Cpsi1V = np.dot(self.C,self.psi1V)
         self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
-        #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
-        self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
+        #self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
+        self.E = tdot(self.Cpsi1V/sf)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
@@ -166,9 +166,9 @@ class sparse_GP(GP):
             #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
         else:
             #likelihood is not heterscedatic
-            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
+            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
             self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
-            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
+            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
             self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
 
 
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 55a1fb65..ee8368ac 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
         bias = GPy.kern.bias(2)
         self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
 
+    def test_GP_regression_linear_kern_1D_ARD(self):
+        ''' Testing the GP regression with linear kernel on 1d data '''
+        linear = GPy.kern.linear(1,ARD=True)
+        self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
+
+    def test_GP_regression_linear_kern_2D_ARD(self):
+        ''' Testing the GP regression with linear kernel on 2d data '''
+        linear = GPy.kern.linear(2,ARD=True)
+        self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
+
     def test_GP_regression_linear_kern_1D(self):
         ''' Testing the GP regression with linear kernel on 1d data '''
         linear = GPy.kern.linear(1)
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 34e30dca..b19aa2b6 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -196,7 +196,7 @@ def tdot_blas(mat, out=None):
     if (mat.dtype != 'float64') or (len(mat.shape) != 2):
         return np.dot(mat, mat.T)
     nn = mat.shape[0]
-    if not out:
+    if out is None:
         out = np.zeros((nn,nn))
     else:
         assert(out.dtype == 'float64')
@@ -211,7 +211,7 @@ def tdot_blas(mat, out=None):
     # could avoid the copy. I also thought swapping to cblas API would allow use
     # of C order. However, I tried that and had errors with large matrices:
     # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
-    mat = mat.copy(order='F')
+    mat = np.asfortranarray(mat)
     TRANS = c_char('n')
     N = c_int(mat.shape[0])
     K = c_int(mat.shape[1])
@@ -225,7 +225,7 @@ def tdot_blas(mat, out=None):
     _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
             byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
 
-    symmetrify(out.T)
+    symmetrify(out,upper=True)
 
     return out
 
@@ -235,7 +235,7 @@ def tdot(*args, **kwargs):
     else:
         return tdot_numpy(*args,**kwargs)
 
-def symmetrify(A):
+def symmetrify(A,upper=False):
     """
     Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
 
@@ -257,9 +257,13 @@ def symmetrify(A):
       }
     }
     """
-    if A.flags['C_CONTIGUOUS']:
+    if A.flags['C_CONTIGUOUS'] and upper:
+        weave.inline(f_contig_code,['A','N'])
+    elif A.flags['C_CONTIGUOUS'] and not upper:
         weave.inline(c_contig_code,['A','N'])
-    elif A.flags['F_CONTIGUOUS']:
+    elif A.flags['F_CONTIGUOUS'] and upper:
+        weave.inline(c_contig_code,['A','N'])
+    elif A.flags['F_CONTIGUOUS'] and not upper:
         weave.inline(f_contig_code,['A','N'])
     else:
         tmp = np.tril(A)

From 8306bb652ccd26d818f102a6aa35a84e01cea9c3 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Fri, 26 Apr 2013 21:35:15 +0100
Subject: [PATCH 44/54] Added first draft of acclaim mocap functionality.

---
 GPy/util/mocap.py | 606 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 606 insertions(+)

diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index e66a36b9..0cc2f20b 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -1,6 +1,611 @@
 import os
 import numpy as np
+import math
 
+class vertex:
+    def __init__(self, name, id, parents=[], children=[], meta = {}):
+        self.name = name
+        self.id = id
+        self.parents = parents
+        self.children = children
+        self.meta = meta
+
+    def __str__(self):
+        return self.name + '(' + str(self.id) + ').'
+        
+class tree:
+    def __init__(self):
+        self.vertices = []
+        self.vertices.append(vertex(name='root', id=0))
+
+    def __str__(self):
+        index = self.find_root()
+        return self.branch_str(index)
+
+    def branch_str(self, index, indent=''):
+        out = indent + str(self.vertices[index]) + '\n'
+        for child in self.vertices[index].children:
+            out+=self.branch_str(child, indent+'  ')
+        return out
+
+    def find_children(self):
+        """Take a tree and set the children according to the parents.
+
+        Takes a tree structure which lists the parents of each vertex
+        and computes the children for each vertex and places them in."""
+        for i in range(len(self.vertices)):
+            self.vertices[i].children = []
+        for i in range(len(self.vertices)):
+            for parent in self.vertices[i].parents:
+                if i not in self.vertices[parent].children:
+                    self.vertices[parent].children.append(i) 
+
+    def find_parents(self):
+        """Take a tree and set the parents according to the children
+
+        Takes a tree structure which lists the children of each vertex
+        and computes the parents for each vertex and places them in."""
+        for i in range(len(self.vertices)):
+            self.vertices[i].parents = []
+        for i in range(len(self.vertices)):
+            for child in self.vertices[i].children:
+                if i not in self.vertices[child].parents:
+                    self.vertices[child].parents.append(i) 
+                    
+    def find_root(self):
+        """Finds the index of the root node of the tree."""
+        self.find_parents()
+        index = 0
+        while len(self.vertices[index].parents)>0:
+            index = self.vertices[index].parents[0]
+        return index
+            
+    def get_index_by_id(self, id):
+        """Give the index associated with a given vertex id."""
+        for i in range(len(self.vertices)):
+            if self.vertices[i].id == id:
+                return i
+        raise Error, 'Reverse look up of id failed.'
+
+    def get_index_by_name(self, name):
+        """Give the index associated with a given vertex name."""
+        for i in range(len(self.vertices)):
+            if self.vertices[i].name == name:
+                return i
+        raise Error, 'Reverse look up of name failed.'
+
+    def order_vertices(self):
+        """Order vertices in the graph such that parents always have a lower index than children."""
+        
+        ordered = False
+        while ordered == False:
+            for i in range(len(self.vertices)):
+                ordered = True
+                for parent in self.vertices[i].parents:
+                    if parent>i:
+                        ordered = False
+                        self.swap_vertices(i, parent)
+
+
+
+
+    def swap_vertices(self, i, j):
+        """Swap two vertices in the tree structure array.
+        swap_vertex swaps the location of two vertices in a tree structure array. 
+         ARG tree : the tree for which two vertices are to be swapped.
+         ARG i : the index of the first vertex to be swapped.
+         ARG j : the index of the second vertex to be swapped.
+         RETURN tree : the tree structure with the two vertex locations
+         swapped.
+        """
+        store_vertex_i = self.vertices[i]
+        store_vertex_j = self.vertices[j]
+        self.vertices[j] = store_vertex_i
+        self.vertices[i] = store_vertex_j
+        for k in range(len(self.vertices)):
+            for swap_list in [self.vertices[k].children, self.vertices[k].parents]:
+                if i in swap_list:
+                    swap_list[swap_list.index(i)] = -1
+                if j in swap_list:
+                    swap_list[swap_list.index(j)] = i
+                if -1 in swap_list:
+                    swap_list[swap_list.index(-1)] = j
+
+
+
+def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
+
+    """Compute the rotation matrix for an angle in each direction.
+    This is a helper function for computing the rotation matrix for a given set of angles in a given order.
+     ARG xangle : rotation for x-axis.
+     ARG yangle : rotation for y-axis.
+     ARG zangle : rotation for z-axis.
+     ARG order : the order for the rotations."""
+    if degrees:
+        xangle = math.radians(xangle)
+        yangle = math.radians(yangle)
+        zangle = math.radians(zangle)
+
+    # Here we assume we rotate z, then x then y.
+    c1 = math.cos(xangle) # The x angle
+    c2 = math.cos(yangle) # The y angle
+    c3 = math.cos(zangle) # the z angle
+    s1 = math.sin(xangle)
+    s2 = math.sin(yangle)
+    s3 = math.sin(zangle)
+
+    # see http://en.wikipedia.org/wiki/Rotation_matrix for
+    # additional info.
+
+    if order=='zxy':
+        rot_mat = np.array([[c2*c3-s1*s2*s3, c2*s3+s1*s2*c3, -s2*c1],[-c1*s3, c1*c3, s1],[s2*c3+c2*s1*s3, s2*s3-c2*s1*c3, c2*c1]])
+    else:
+        rot_mat = np.eye(3)
+        for i in range(len(order)):
+            if order[i]=='x':
+                rot_mat = np.dot(np.array([[1, 0, 0], [0,  c1, s1], [0, -s1, c1]]),rot_mat)
+            elif order[i] == 'y':
+                rot_mat = np.dot(np.array([[c2, 0, -s2], [0, 1, 0], [s2, 0, c2]]),rot_mat)
+            elif order[i] == 'z':
+                rot_mat = np.dot(np.array([[c3, s3, 0], [-s3, c3, 0], [0, 0, 1]]),rot_mat)
+
+    return rot_mat
+
+
+# Motion capture data routines.
+class skeleton(tree):
+    def __init__(self):
+        tree.__init__(self)
+
+    def to_xyz(self, channels):
+        raise NotImplementedError, "this needs to be implemented to use the skeleton class"
+
+
+    def finalize(self):
+        """After loading in a skeleton ensure parents are correct, vertex orders are correct and rotation matrices are correct."""
+
+        self.find_parents()
+        self.order_vertices()
+        self.set_rotation_matrices()
+
+    def smooth_angle_channels(self, channels):
+        """Remove discontinuities in angle channels so that they don't cause artifacts in algorithms that rely on the smoothness of the functions."""
+        for vertex in self.vertices:
+            for col in vertex.meta['rot_ind']:
+                if col:
+                    for k in range(1, channels.shape[0]):
+                        diff=channels[k, col]-channels[k-1, col]
+                    if abs(diff+360.)<abs(diff):
+                        channels[k:, col]=channels[k:, col]+360.
+                    elif abs(diff-360.)<abs(diff):
+                        channels[k:, col]=channels[k:, col]-360.
+
+# class bvh_skeleton(skeleton):
+#     def __init__(self):
+#         skeleton.__init__(self)
+
+#     def to_xyz(self, channels):
+        
+class acclaim_skeleton(skeleton):
+    def __init__(self, file_name=None):
+        skeleton.__init__(self)
+        self.documentation = []
+        self.angle = 'deg'
+        self.length = 1.0
+        self.mass = 1.0
+        self.type = 'acclaim'
+        self.vertices[0] = vertex(name='root', id=0,
+                             parents = [0], children=[],
+                             meta = {'orientation': [], 
+                                     'axis': [0., 0., 0.], 
+                                     'axis_order': [], 
+                                     'C': np.eye(3), 
+                                     'Cinv': np.eye(3), 
+                                     'channels': [], 
+                                     'bodymass': [], 
+                                     'confmass': [], 
+                                     'order': [], 
+                                     'rot_ind': [], 
+                                     'pos_ind': [], 
+                                     'limits': [],
+                                     'xyz': np.array([0., 0., 0.]),
+                                     'rot': np.eye(3)})
+
+        if file_name:
+            self.load_skel(file_name)
+
+    def to_xyz(self, channels):
+        rot_val = self.vertices[0].meta['orientation']
+        for i in range(len(self.vertices[0].meta['rot_ind'])):
+            rind = self.vertices[0].meta['rot_ind'][i]
+            if rind != 0:
+                rot_val[i] += channels[rind]
+
+        self.vertices[0].meta['rot'] = rotation_matrix(rot_val[0],
+                                                       rot_val[1],
+                                                       rot_val[2],
+                                                       self.vertices[0].meta['axis_order'],
+                                                       degrees=True)
+        # vertex based store of the xyz location
+        self.vertices[0].meta['xyz'] = self.vertices[0].meta['offset']
+
+        for i in range(len(self.vertices[0].meta['pos_ind'])):
+            pind = self.vertices[0].meta['pos_ind'][i]
+            if pind != 0:
+                self.vertices[0].meta['xyz'][i] += channels[pind]
+
+
+        for i in range(len(self.vertices[0].children)):
+            ind = self.vertices[0].children[i]
+            self.get_child_xyz(ind, channels)
+
+        xyz = []
+        for vertex in self.vertices:
+            xyz.append(vertex.meta['xyz'])
+        return np.array(xyz)
+
+
+
+    def get_child_xyz(self, ind, channels):
+
+        parent = self.vertices[ind].parents[0]
+        children = self.vertices[ind].children
+        rot_val = np.zeros(3)
+        for j in range(len(self.vertices[ind].meta['rot_ind'])):
+            rind = self.vertices[ind].meta['rot_ind'][j]
+            if rind != 0:
+                rot_val[j] = channels[rind]
+            else:
+                rot_val[j] = 0
+        tdof = rotation_matrix(rot_val[0], rot_val[1], rot_val[2],
+                               self.vertices[ind].meta['order'],
+                               degrees=True)
+
+        torient = rotation_matrix(self.vertices[ind].meta['axis'][0],
+                                  self.vertices[ind].meta['axis'][1],
+                                  self.vertices[ind].meta['axis'][2],
+                                  self.vertices[ind].meta['axis_order'],
+                                  degrees=True)
+
+        torient_inv = rotation_matrix(-self.vertices[ind].meta['axis'][0],
+                                      -self.vertices[ind].meta['axis'][1],
+                                      -self.vertices[ind].meta['axis'][2],
+                                      self.vertices[ind].meta['axis_order'][::-1],
+                                      degrees=True)
+
+        self.vertices[ind].meta['rot'] = np.dot(np.dot(np.dot(torient_inv,tdof),torient),self.vertices[parent].meta['rot'])
+
+        self.vertices[ind].meta['xyz'] += np.dot(self.vertices[ind].meta['offset'],self.vertices[ind].meta['rot'])
+
+        for i in range(len(children)):
+            cind = children[i]
+            self.get_child_xyz(cind, channels)
+
+
+    def load_channels(self, file_name):
+
+        fid=open(file_name, 'r')
+        channels = self.read_channels(fid)
+        fid.close()
+        return channels
+
+    def load_skel(self, file_name):
+
+        """Loads an ASF file into a skeleton structure.
+        loads skeleton structure from an acclaim skeleton file.
+         ARG file_name : the file name to load in.
+         RETURN skel : the skeleton for the file."""         
+
+        fid = open(file_name, 'r')
+        self.read_skel(fid)
+        fid.close()
+        self.name = file_name
+
+
+    def read_bonedata(self, fid):
+        """Read bone data from an acclaim skeleton file stream."""
+
+        bone_count = 0
+        lin = self.read_line(fid)
+        while lin[0]!=':':
+            parts = lin.split()
+            if parts[0] == 'begin':
+                bone_count += 1
+                self.vertices.append(vertex(name = '', id=np.NaN,
+                                       meta={'name': [],
+                                             'id': [], 
+                                             'offset': [], 
+                                             'orientation': [], 
+                                             'axis': [0., 0., 0.], 
+                                             'axis_order': [], 
+                                             'C': np.eye(3), 
+                                             'Cinv': np.eye(3), 
+                                             'channels': [], 
+                                             'bodymass': [], 
+                                             'confmass': [], 
+                                             'order': [], 
+                                             'rot_ind': [], 
+                                             'pos_ind': [], 
+                                             'limits': [],
+                                             'xyz': np.array([0., 0., 0.]),
+                                             'rot': np.eye(3)}))
+                lin = self.read_line(fid)
+
+
+            elif parts[0]=='id':
+                self.vertices[bone_count].id = int(parts[1])
+                lin = self.read_line(fid)
+
+                self.vertices[bone_count].children = []
+
+            elif parts[0]=='name':
+                self.vertices[bone_count].name = parts[1]
+                lin = self.read_line(fid)
+
+
+            elif parts[0]=='direction':
+                direction = np.array([float(parts[1]), float(parts[2]), float(parts[3])])
+                lin = self.read_line(fid)
+
+
+            elif parts[0]=='length':
+                lgth =  float(parts[1])
+                lin = self.read_line(fid)
+
+
+            elif parts[0]=='axis':
+                self.vertices[bone_count].meta['axis'] = np.array([float(parts[1]),
+                                                         float(parts[2]),
+                                                         float(parts[3])])
+                # order is reversed compared to bvh
+                self.vertices[bone_count].meta['axis_order'] =  parts[-1][::-1].lower()
+                lin = self.read_line(fid)
+
+            elif parts[0]=='dof':
+                order = []
+                for i in range(1, len(parts)):
+                    if parts[i]== 'rx':
+                        chan = 'Xrotation'
+                        order.append('x')
+                    elif parts[i] =='ry':
+                        chan = 'Yrotation'
+                        order.append('y')
+                    elif parts[i] == 'rz':
+                        chan = 'Zrotation'
+                        order.append('z')
+                    elif parts[i] == 'tx':
+                        chan = 'Xposition'
+                    elif parts[i] == 'ty':
+                        chan = 'Yposition'
+                    elif parts[i] == 'tz':
+                        chan = 'Zposition'
+                    elif parts[i] == 'l':
+                        chan = 'length'
+                    self.vertices[bone_count].meta['channels'].append(chan)
+                    # order is reversed compared to bvh
+                self.vertices[bone_count].meta['order'] = order[::-1]
+                lin = self.read_line(fid)
+
+            elif parts[0]=='limits':
+                self.vertices[bone_count].meta['limits'] = [[float(parts[1][1:]),  float(parts[2][:-1])]]
+
+                lin = self.read_line(fid)
+
+                while lin !='end':
+                    parts = lin.split()
+
+                    self.vertices[bone_count].meta['limits'].append([float(parts[0][1:]), float(parts[1][:-1])])
+                    lin = self.read_line(fid)
+                self.vertices[bone_count].meta['limits'] = np.array(self.vertices[bone_count].meta['limits'])
+
+            elif parts[0]=='end':
+                self.vertices[bone_count].meta['offset'] = direction*lgth
+                lin = self.read_line(fid)
+
+        return lin
+
+    def read_channels(self, fid):
+        """Read channels from an acclaim file."""
+        bones = [[] for i in self.vertices]
+        num_channels = 0
+        for vertex in self.vertices:
+            num_channels = num_channels + len(vertex.meta['channels'])
+
+        lin = self.read_line(fid)
+        while lin != ':DEGREES':
+            lin = self.read_line(fid)
+
+        counter = 0
+        lin = self.read_line(fid)
+        while lin:
+            parts = lin.split()
+            if len(parts)==1:
+                frame_no = int(parts[0])
+                if frame_no:
+                    counter += 1
+                    if counter != frame_no:
+                        raise Error, 'Unexpected frame number.'
+                else:
+                    raise Error, 'Single bone name  ...'
+            else:
+                ind = self.get_index_by_name(parts[0])
+                bones[ind].append(np.array([float(channel) for channel in parts[1:]]))
+            lin = self.read_line(fid)
+
+        num_frames = counter
+
+        channels = np.zeros((num_frames, num_channels))
+
+        end_val = 0
+        for i in range(len(self.vertices)):
+            vertex = self.vertices[i]
+            if len(vertex.meta['channels'])>0:                
+                start_val = end_val
+                end_val = end_val + len(vertex.meta['channels'])
+                for j in range(num_frames):
+                    channels[j, start_val:end_val] = bones[i][j]
+            self.resolve_indices(i, start_val)
+
+        self.smooth_angle_channels(channels)
+        return channels
+
+
+    def read_documentation(self, fid):
+        """Read documentation from an acclaim skeleton file stream."""
+
+        lin = self.read_line(fid)
+        while lin[0] != ':':
+            self.documentation.append(lin)
+            lin = self.read_line(fid)
+        return lin
+
+    def read_hierarchy(self, fid):
+        """Read hierarchy information from acclaim skeleton file stream."""
+
+        lin = self.read_line(fid)
+                    
+        while lin != 'end':
+            parts = lin.split()
+            if lin != 'begin':
+                ind = self.get_index_by_name(parts[0])
+                for i in range(1, len(parts)):
+                    self.vertices[ind].children.append(self.get_index_by_name(parts[i]))
+            lin = self.read_line(fid)
+        lin = self.read_line(fid)
+        return lin
+
+    def read_line(self, fid):
+        """Read a line from a file string and check it isn't either empty or commented before returning."""
+        lin = '#'
+        while lin[0] == '#':
+            lin = fid.readline().strip()
+            if lin == '':
+                return lin
+        return lin
+
+    
+    def read_root(self, fid):
+        """Read the root node from an acclaim skeleton file stream."""
+        lin = self.read_line(fid)                    
+        while lin[0] != ':':
+            parts = lin.split()
+            if parts[0]=='order':
+                order = []
+                for i in range(1, len(parts)):
+                    if parts[i].lower()=='rx':
+                        chan = 'Xrotation'
+                        order.append('x')
+                    elif parts[i].lower()=='ry':
+                        chan = 'Yrotation'
+                        order.append('y')
+                    elif parts[i].lower()=='rz':
+                        chan = 'Zrotation'
+                        order.append('z')
+                    elif parts[i].lower()=='tx':
+                        chan = 'Xposition'
+                    elif parts[i].lower()=='ty':
+                        chan = 'Yposition'
+                    elif parts[i].lower()=='tz':
+                        chan = 'Zposition'
+                    elif parts[i].lower()=='l':
+                        chan = 'length'
+                    self.vertices[0].meta['channels'].append(chan)
+                    # order is reversed compared to bvh
+                self.vertices[0].meta['order'] = order[::-1]
+
+            elif parts[0]=='axis':
+                # order is reversed compared to bvh
+                self.vertices[0].meta['axis_order'] = parts[1][::-1].lower()
+            elif parts[0]=='position':
+                self.vertices[0].meta['offset'] = [float(parts[1]),
+                                       float(parts[2]),
+                                       float(parts[3])]
+            elif parts[0]=='orientation':
+                self.vertices[0].meta['orientation'] =  [float(parts[1]),
+                                             float(parts[2]),
+                                             float(parts[3])]
+            lin = self.read_line(fid)
+        return lin
+    
+    def read_skel(self, fid):
+        """Loads an acclaim skeleton format from a file stream."""
+        lin = self.read_line(fid)
+        while lin:
+            if lin[0]==':':
+                if lin[1:]== 'name':
+                    lin = self.read_line(fid)
+                    self.name = lin
+                elif lin[1:]=='units':
+                    lin = self.read_units(fid)
+                elif lin[1:]=='documentation':
+                    lin = self.read_documentation(fid)
+                elif lin[1:]=='root':
+                    lin = self.read_root(fid)
+                elif lin[1:]=='bonedata':
+                    lin = self.read_bonedata(fid)
+                elif lin[1:]=='hierarchy':
+                    lin = self.read_hierarchy(fid)
+                elif lin[1:8]=='version':
+                    lin = self.read_line(fid)
+                    continue
+                else: 
+                    if not lin:
+                        self.finalize()
+                        return
+                    lin = self.read_line(fid)
+            else:
+                raise Error, 'Unrecognised file format'
+            
+    def read_units(self, fid):
+        """Read units from an acclaim skeleton file stream."""
+        lin = self.read_line(fid)                   
+        while lin[0] != ':':
+            parts = lin.split()
+            if parts[0]=='mass':
+                self.mass = float(parts[1])
+            elif parts[0]=='length':
+                self.length = float(parts[1])
+            elif parts[0]=='angle':
+                self.angle = parts[1]
+            lin = self.read_line(fid)
+        return lin
+
+    def resolve_indices(self, index, start_val):
+        """Get indices for the skeleton from the channels when loading in channel data."""
+
+        channels = self.vertices[index].meta['channels']
+        base_channel = start_val - 1
+        rot_ind = np.zeros(3)
+        pos_ind = np.zeros(3)
+        for i in range(len(channels)):
+            if channels[i]== 'Xrotation':
+                rot_ind[0] = base_channel + i
+            elif channels[i]=='Yrotation':
+                rot_ind[1] = base_channel + i
+            elif channels[i]=='Zrotation':
+                rot_ind[2] = base_channel + i
+            elif channels[i]=='Xposition':
+                pos_ind[0] = base_channel + i
+            elif channels[i]=='Yposition':
+                pos_ind[1] = base_channel + i
+            elif channels[i]=='Zposition':
+                pos_ind[2] = base_channel + i
+        self.vertices[index].meta['rot_ind'] = list(rot_ind)
+        self.vertices[index].meta['pos_ind'] = list(pos_ind)
+
+    def set_rotation_matrices(self):
+        """Set the meta information at each vertex to contain the correct matrices C and Cinv as prescribed by the rotations and rotation orders."""
+        for i in range(len(self.vertices)):
+            self.vertices[i].meta['C'] = rotation_matrix(self.vertices[i].meta['axis'][0], 
+                                                         self.vertices[i].meta['axis'][1], 
+                                                         self.vertices[i].meta['axis'][2], 
+                                                         self.vertices[i].meta['axis_order'],
+                                                         degrees=True)
+            # Todo: invert this by applying angle operations in reverse order
+            self.vertices[i].meta['Cinv'] = np.linalg.inv(self.vertices[i].meta['C'])
+            
+
+# Utilities for loading in x,y,z data.
 def load_text_data(dataset, directory, centre=True):
     """Load in a data set of marker points from the Ohio State University C3D motion capture files (http://accad.osu.edu/research/mocap/mocap_data.htm)."""
 
@@ -72,3 +677,4 @@ def read_connections(file_name, point_names):
 
     
   
+skel = acclaim_skeleton()

From 8b00c5a8279c5d10f7caefafc587a8ec243e01d4 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Fri, 26 Apr 2013 23:37:48 +0100
Subject: [PATCH 45/54] Fixed two bugs in to_xyz, checked on a test version of
 MATLAB code.

---
 GPy/util/mocap.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 0cc2f20b..2eec687d 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -215,10 +215,10 @@ class acclaim_skeleton(skeleton):
             self.load_skel(file_name)
 
     def to_xyz(self, channels):
-        rot_val = self.vertices[0].meta['orientation']
+        rot_val = list(self.vertices[0].meta['orientation'])
         for i in range(len(self.vertices[0].meta['rot_ind'])):
             rind = self.vertices[0].meta['rot_ind'][i]
-            if rind != 0:
+            if rind != -1:
                 rot_val[i] += channels[rind]
 
         self.vertices[0].meta['rot'] = rotation_matrix(rot_val[0],
@@ -227,11 +227,11 @@ class acclaim_skeleton(skeleton):
                                                        self.vertices[0].meta['axis_order'],
                                                        degrees=True)
         # vertex based store of the xyz location
-        self.vertices[0].meta['xyz'] = self.vertices[0].meta['offset']
+        self.vertices[0].meta['xyz'] = list(self.vertices[0].meta['offset'])
 
         for i in range(len(self.vertices[0].meta['pos_ind'])):
             pind = self.vertices[0].meta['pos_ind'][i]
-            if pind != 0:
+            if pind != -1:
                 self.vertices[0].meta['xyz'][i] += channels[pind]
 
 
@@ -253,7 +253,7 @@ class acclaim_skeleton(skeleton):
         rot_val = np.zeros(3)
         for j in range(len(self.vertices[ind].meta['rot_ind'])):
             rind = self.vertices[ind].meta['rot_ind'][j]
-            if rind != 0:
+            if rind != -1:
                 rot_val[j] = channels[rind]
             else:
                 rot_val[j] = 0
@@ -275,7 +275,8 @@ class acclaim_skeleton(skeleton):
 
         self.vertices[ind].meta['rot'] = np.dot(np.dot(np.dot(torient_inv,tdof),torient),self.vertices[parent].meta['rot'])
 
-        self.vertices[ind].meta['xyz'] += np.dot(self.vertices[ind].meta['offset'],self.vertices[ind].meta['rot'])
+
+        self.vertices[ind].meta['xyz'] = self.vertices[parent].meta['xyz'] + np.dot(self.vertices[ind].meta['offset'],self.vertices[ind].meta['rot'])
 
         for i in range(len(children)):
             cind = children[i]
@@ -524,6 +525,7 @@ class acclaim_skeleton(skeleton):
                 self.vertices[0].meta['orientation'] =  [float(parts[1]),
                                              float(parts[2]),
                                              float(parts[3])]
+                print self.vertices[0].meta['orientation']
             lin = self.read_line(fid)
         return lin
     
@@ -574,9 +576,9 @@ class acclaim_skeleton(skeleton):
         """Get indices for the skeleton from the channels when loading in channel data."""
 
         channels = self.vertices[index].meta['channels']
-        base_channel = start_val - 1
-        rot_ind = np.zeros(3)
-        pos_ind = np.zeros(3)
+        base_channel = start_val 
+        rot_ind = -np.ones(3, dtype=int)
+        pos_ind = -np.ones(3, dtype=int)
         for i in range(len(channels)):
             if channels[i]== 'Xrotation':
                 rot_ind[0] = base_channel + i

From d7ac1d025b6c384e12e44e3a8d43c8801be3d971 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Sat, 27 Apr 2013 00:52:10 +0100
Subject: [PATCH 46/54] Added CMU 35 motion capture data.

---
 GPy/util/datasets.py | 47 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 0e0929c7..d326f31b 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -217,7 +217,6 @@ def crescent_data(num_data=200, seed=default_seed):
     Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
     return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
 
-
 def creep_data():
     all_data = np.loadtxt(os.path.join(data_path, 'creep', 'taka'))
     y = all_data[:, 1:2].copy()
@@ -226,3 +225,49 @@ def creep_data():
     X = all_data[:, features].copy()
     return {'X': X, 'y' : y}
 
+def cmu_35_walk_jog():
+
+    skel = GPy.util.mocap.acclaim_skeleton(os.path.join(data_path, 'mocap', 'cmu', '35', '35.asf'))
+    examples = ['01', '02', '03', '04', '05', '06',
+                '07', '08', '09', '10', '11', '12',
+                '13', '14', '15', '16', '17', '19',
+                '20', '21', '22', '23', '24', '25',
+                '26', '28', '30', '31', '32', '33', '34']
+    test_examples = ['18', '29']
+    # Label differently for each sequence
+    exlbls = np.eye(31)
+    testexlbls = np.eye(2)
+    tot_length = 0
+    tot_test_length = 0
+    tY = []
+    tlbls = []
+    for i in range(len(examples)):
+        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + examples[i] + '.amc'))
+        tY.append(tmpchan[::4, :])
+        tlbls.append(np.tile(exlbls[i, :], (tY[i].shape[0], 1)))
+        tot_length += tY[i].shape[0]
+    Y = np.zeros((tot_length, tY[0].shape[1]))
+    lbls = np.zeros((tot_length, tlbls[0].shape[1]))
+    endInd = 0
+    for i in range(len(tY)):
+        startInd = endInd 
+        endInd += tY[i].shape[0]
+        Y[startInd:endInd, :] = tY[i]
+        lbls[startInd:endInd, :] = tlbls[i]
+    tYtest = []
+    tlblstest = [] 
+    for i in range(len(test_examples)):
+        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + test_examples[i] + '.amc'))
+        tYtest.append(tmpchan[::4, :])
+        tlblstest.append(np.tile(testexlbls[i, :], (tYtest[i].shape[0], 1)))
+        tot_test_length += tYtest[i].shape[0]
+
+    Ytest = np.zeros((tot_test_length, tYtest[0].shape[1]))
+    lblstest = np.zeros((tot_test_length, tlblstest[0].shape[1]))
+    endInd = 0
+    for i in range(len(tYtest)):
+        startInd = endInd 
+        endInd += tYtest[i].shape[0]
+        Ytest[startInd:endInd, :] = tYtest[i]
+        lblstest[startInd:endInd, :] = tlblstest[i]
+    return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': "Walk and jog data from CMU data base subject 35."}

From ac842d51e6e68cf8eac3bb7c4fb8268d1ec3f301 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Sat, 27 Apr 2013 10:39:55 +0100
Subject: [PATCH 47/54] cmu_mocap() example mostly working except some fiddling
 with axes for visualization. Also changes to naming of scaling and offset
 parameters in GP.py and deal with the case where the scale parameter is zero.

---
 GPy/likelihoods/Gaussian.py |  33 ++++++---
 GPy/models/GP.py            |   2 -
 GPy/models/GPLVM.py         |   4 +-
 GPy/util/datasets.py        | 123 +++++++++++++++++++++++-----------
 GPy/util/mocap.py           |   8 +++
 GPy/util/visualize.py       | 130 ++++++++++++++++++++++++------------
 6 files changed, 202 insertions(+), 98 deletions(-)

diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index d3696fa6..23ab216e 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -2,19 +2,30 @@ import numpy as np
 from likelihood import likelihood
 
 class Gaussian(likelihood):
+    """
+    Likelihood class for doing Expectation propagation
+
+    :param Y: observed output (Nx1 numpy.darray)
+    ..Note:: Y values allowed depend on the likelihood_function used
+    :param variance : 
+    :param normalize:  whether to normalize the data before computing (predictions will be in original scales)
+    :type normalize: False|True
+    """
     def __init__(self,data,variance=1.,normalize=False):
         self.is_heteroscedastic = False
         self.Nparams = 1
         self.Z = 0. # a correction factor which accounts for the approximation made
         N, self.D = data.shape
 
-        #normaliztion
+        #normalization
         if normalize:
-            self._mean = data.mean(0)[None,:]
-            self._std = data.std(0)[None,:]
+            self._bias = data.mean(0)[None,:]
+            self._scale = data.std(0)[None,:]
+            # Don't scale outputs which have zero variance to zero. 
+            self._scale[np.nonzero(self._scale==0.)] = 1.0e-3
         else:
-            self._mean = np.zeros((1,self.D))
-            self._std = np.ones((1,self.D))
+            self._bias = np.zeros((1,self.D))
+            self._scale = np.ones((1,self.D))
 
         self.set_data(data)
 
@@ -24,7 +35,7 @@ class Gaussian(likelihood):
         self.data = data
         self.N,D = data.shape
         assert D == self.D
-        self.Y = (self.data - self._mean)/self._std
+        self.Y = (self.data - self._bias)/self._scale
         if D > self.N:
             self.YYT = np.dot(self.Y,self.Y.T)
             self.trYYT = np.trace(self.YYT)
@@ -47,19 +58,19 @@ class Gaussian(likelihood):
         """
         Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
         """
-        mean = mu*self._std + self._mean
+        mean = mu*self._scale + self._bias
         if full_cov:
             if self.D >1:
                 raise NotImplementedError, "TODO"
                 #Note. for D>1, we need to re-normalise all the outputs independently.
                 # This will mess up computations of diag(true_var), below.
                 #note that the upper, lower quantiles should be the same shape as mean
-            true_var = (var + np.eye(var.shape[0])*self._variance)*self._std**2
-            _5pc = mean + - 2.*np.sqrt(np.diag(true_var))
+            true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2
+            _5pc = mean - 2.*np.sqrt(np.diag(true_var))
             _95pc = mean + 2.*np.sqrt(np.diag(true_var))
         else:
-            true_var = (var + self._variance)*self._std**2
-            _5pc = mean + - 2.*np.sqrt(true_var)
+            true_var = (var + self._variance)*self._scale**2
+            _5pc = mean - 2.*np.sqrt(true_var)
             _95pc = mean + 2.*np.sqrt(true_var)
         return mean, true_var, _5pc, _95pc
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 74bb5915..c6e46bea 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -19,8 +19,6 @@ class GP(model):
     :parm likelihood: a GPy likelihood
     :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_X: False|True
-    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_Y: False|True
     :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
     :rtype: model object
     :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index bd56ff12..c0d9429a 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -24,12 +24,12 @@ class GPLVM(GP):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
+    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
         if X is None:
             X = self.initialise_latent(init, Q, Y)
         if kernel is None:
             kernel = kern.rbf(Q) + kern.bias(Q)
-        likelihood = Gaussian(Y)
+        likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, **kwargs)
 
     def initialise_latent(self, init, Q, Y):
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index d326f31b..ab290dd8 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -225,49 +225,92 @@ def creep_data():
     X = all_data[:, features].copy()
     return {'X': X, 'y' : y}
 
-def cmu_35_walk_jog():
+def cmu_mocap_49_balance():
+    """Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
+    train_motions = ['18', '19']
+    test_motions = ['20']
+    data = cmu_mocap('49', train_motions, test_motions, sample_every=4)
+    data['info'] = "One legged balancing motions from CMU data base subject 49. As used in Alvarez, Luengo and Lawrence at AISTATS 2009. It consists of " + data['info']
+    return data
 
-    skel = GPy.util.mocap.acclaim_skeleton(os.path.join(data_path, 'mocap', 'cmu', '35', '35.asf'))
-    examples = ['01', '02', '03', '04', '05', '06',
+def cmu_mocap_35_walk_jog():
+    """Load CMU subject 35's walking and jogging motions, the same data that was used by Taylor, Roweis and Hinton at NIPS 2007. but without their preprocessing. Also used by Lawrence at AISTATS 2007."""
+    train_motions = ['01', '02', '03', '04', '05', '06',
                 '07', '08', '09', '10', '11', '12',
                 '13', '14', '15', '16', '17', '19',
                 '20', '21', '22', '23', '24', '25',
                 '26', '28', '30', '31', '32', '33', '34']
-    test_examples = ['18', '29']
-    # Label differently for each sequence
-    exlbls = np.eye(31)
-    testexlbls = np.eye(2)
-    tot_length = 0
-    tot_test_length = 0
-    tY = []
-    tlbls = []
-    for i in range(len(examples)):
-        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + examples[i] + '.amc'))
-        tY.append(tmpchan[::4, :])
-        tlbls.append(np.tile(exlbls[i, :], (tY[i].shape[0], 1)))
-        tot_length += tY[i].shape[0]
-    Y = np.zeros((tot_length, tY[0].shape[1]))
-    lbls = np.zeros((tot_length, tlbls[0].shape[1]))
-    endInd = 0
-    for i in range(len(tY)):
-        startInd = endInd 
-        endInd += tY[i].shape[0]
-        Y[startInd:endInd, :] = tY[i]
-        lbls[startInd:endInd, :] = tlbls[i]
-    tYtest = []
-    tlblstest = [] 
-    for i in range(len(test_examples)):
-        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + test_examples[i] + '.amc'))
-        tYtest.append(tmpchan[::4, :])
-        tlblstest.append(np.tile(testexlbls[i, :], (tYtest[i].shape[0], 1)))
-        tot_test_length += tYtest[i].shape[0]
+    test_motions = ['18', '29']
+    data = cmu_mocap('35', train_motions, test_motions, sample_every=4)
+    data['info'] = "Walk and jog data from CMU data base subject 35. As used in Tayor, Roweis and Hinton at NIPS 2007, but without their pre-processing (i.e. as used by Lawrence at AISTATS 2007). It consists of " + data['info']
+    return data
 
-    Ytest = np.zeros((tot_test_length, tYtest[0].shape[1]))
-    lblstest = np.zeros((tot_test_length, tlblstest[0].shape[1]))
-    endInd = 0
-    for i in range(len(tYtest)):
-        startInd = endInd 
-        endInd += tYtest[i].shape[0]
-        Ytest[startInd:endInd, :] = tYtest[i]
-        lblstest[startInd:endInd, :] = tlblstest[i]
-    return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': "Walk and jog data from CMU data base subject 35."}
+def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
+    """Load a given subject's training and test motions from the CMU motion capture data."""
+
+    # Load in subject skeleton.
+    subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject)
+    skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf'))
+
+    # Set up labels for each sequence
+    exlbls = np.eye(len(train_motions))
+
+    # Load sequences
+    tot_length = 0
+    temp_Y = []
+    temp_lbls = []
+    for i in range(len(train_motions)):
+        temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + train_motions[i] + '.amc'))
+        temp_Y.append(temp_chan[::sample_every, :])
+        temp_lbls.append(np.tile(exlbls[i, :], (temp_Y[i].shape[0], 1)))
+        tot_length += temp_Y[i].shape[0]
+
+    Y = np.zeros((tot_length, temp_Y[0].shape[1]))
+    lbls = np.zeros((tot_length, temp_lbls[0].shape[1]))
+
+    end_ind = 0
+    for i in range(len(temp_Y)):
+        start_ind = end_ind 
+        end_ind += temp_Y[i].shape[0]
+        Y[start_ind:end_ind, :] = temp_Y[i]
+        lbls[start_ind:end_ind, :] = temp_lbls[i]
+    if len(test_motions)>0:
+        temp_Ytest = []
+        temp_lblstest = [] 
+
+        testexlbls = np.eye(len(test_motions))
+        tot_test_length = 0
+        for i in range(len(test_motions)):
+            temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + test_motions[i] + '.amc'))
+            temp_Ytest.append(temp_chan[::sample_every, :])
+            temp_lblstest.append(np.tile(testexlbls[i, :], (temp_Ytest[i].shape[0], 1)))
+            tot_test_length += temp_Ytest[i].shape[0]
+
+        # Load test data
+        Ytest = np.zeros((tot_test_length, temp_Ytest[0].shape[1]))
+        lblstest = np.zeros((tot_test_length, temp_lblstest[0].shape[1]))
+
+        end_ind = 0
+        for i in range(len(temp_Ytest)):
+            start_ind = end_ind 
+            end_ind += temp_Ytest[i].shape[0]
+            Ytest[start_ind:end_ind, :] = temp_Ytest[i]
+            lblstest[start_ind:end_ind, :] = temp_lblstest[i]
+    else:
+        Ytest = None
+        lblstest = None
+
+    info = 'Subject: ' + subject + '. Training motions: '
+    for motion in train_motions:
+        info += motion + ', '
+    info = info[:-2]
+    if len(test_motions)>0:
+        info += '. Test motions: '
+        for motion in test_motions:
+            info += motion + ', '
+        info = info[:-2] + '.'
+    else:
+        info += '.'
+    if sample_every != 1:
+        info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.'
+    return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}
diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 2eec687d..76650086 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -157,6 +157,13 @@ class skeleton(tree):
     def __init__(self):
         tree.__init__(self)
 
+    def connection_matrix(self):
+        connection = np.zeros((len(self.vertices), len(self.vertices)), dtype=bool)
+        for i in range(len(self.vertices)):
+            for j in range(len(self.vertices[i].children)):
+                connection[i, self.vertices[i].children[j]] = True
+        return connection
+
     def to_xyz(self, channels):
         raise NotImplementedError, "this needs to be implemented to use the skeleton class"
 
@@ -557,6 +564,7 @@ class acclaim_skeleton(skeleton):
                     lin = self.read_line(fid)
             else:
                 raise Error, 'Unrecognised file format'
+            self.finalize()
             
     def read_units(self, fid):
         """Read units from an acclaim skeleton file stream."""
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 482cc687..9754db63 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -184,71 +184,115 @@ class image_show(data_show):
         #if self.invert:
         #    self.vals = -self.vals
 
-class stick_show(data_show):
-    """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
+
+class mocap_data_show(data_show):
+    """Base class for visualizing motion capture data."""
 
     def __init__(self, vals, axes=None, connect=None):
         if axes==None:
             fig = plt.figure()
             axes = fig.add_subplot(111, projection='3d')
         data_show.__init__(self, vals, axes)
-        self.vals = vals.reshape((3, vals.shape[1]/3)).T
-        self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
-        self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
-        self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
-        self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
-        self.axes.set_xlim(self.x_lim)
-        self.axes.set_ylim(self.y_lim)
-        self.axes.set_zlim(self.z_lim)
-        self.axes.set_aspect(1)
-        self.axes.autoscale(enable=False)
 
         self.connect = connect
-        if not self.connect==None:
-            x = []
-            y = []
-            z = []
-            self.I, self.J = np.nonzero(self.connect)
-            for i in range(len(self.I)):
-                x.append(self.vals[self.I[i], 0])
-                x.append(self.vals[self.J[i], 0])
-                x.append(np.NaN)
-                y.append(self.vals[self.I[i], 1])
-                y.append(self.vals[self.J[i], 1])
-                y.append(np.NaN)
-                z.append(self.vals[self.I[i], 2])
-                z.append(self.vals[self.J[i], 2])
-                z.append(np.NaN)
-            self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
+        self.process_values(vals)
+        self.initialize_axes()
+        self.draw_vertices()
+        self.finalize_axes()
+        self.draw_edges()
         self.axes.figure.canvas.draw()
 
-    def modify(self, vals):
-        self.points_handle.remove()
-        self.line_handle[0].remove()
-        self.vals = vals.reshape((3, vals.shape[1]/3)).T
+    def draw_vertices(self):
         self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
-        self.axes.set_xlim(self.x_lim)
-        self.axes.set_ylim(self.y_lim)
-        self.axes.set_zlim(self.z_lim)
+        
+    def draw_edges(self):
         self.line_handle = []
         if not self.connect==None:
             x = []
             y = []
             z = []
             self.I, self.J = np.nonzero(self.connect)
-            for i in range(len(self.I)):
-                x.append(self.vals[self.I[i], 0])
-                x.append(self.vals[self.J[i], 0])
+            for i, j in zip(self.I, self.J):
+                x.append(self.vals[i, 0])
+                x.append(self.vals[j, 0])
                 x.append(np.NaN)
-                y.append(self.vals[self.I[i], 1])
-                y.append(self.vals[self.J[i], 1])
+                y.append(self.vals[i, 1])
+                y.append(self.vals[j, 1])
                 y.append(np.NaN)
-                z.append(self.vals[self.I[i], 2])
-                z.append(self.vals[self.J[i], 2])
+                z.append(self.vals[i, 2])
+                z.append(self.vals[j, 2])
                 z.append(np.NaN)
             self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
-
+            
+    def modify(self, vals):
+        self.process_values(vals)
+        self.initialize_axes_modify()
+        self.draw_vertices()
+        self.finalize_axes_modify()
+        self.draw_edges()
         self.axes.figure.canvas.draw()
 
+    def process_values(self, vals):
+        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+
+    def initialize_axes(self):
+        """Set up the axes with the right limits and scaling."""
+        self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
+        self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
+        self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
+
+    def initialize_axes_modify(self):
+        self.points_handle.remove()
+        self.line_handle[0].remove()
+
+    def finalize_axes(self):
+        self.axes.set_xlim(self.x_lim)
+        self.axes.set_ylim(self.y_lim)
+        self.axes.set_zlim(self.z_lim)
+        self.axes.set_aspect(1)
+        self.axes.autoscale(enable=False)
+
+    def finalize_axes_modify(self):
+        self.axes.set_xlim(self.x_lim)
+        self.axes.set_ylim(self.y_lim)
+        self.axes.set_zlim(self.z_lim)
 
 
+class stick_show(mocap_data_show):
+    """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
+    def __init__(self, vals, axes=None, connect=None):
+        mocap_data_show.__init__(self, vals, axes, connect)
+
+    def process_values(self, vals):
+        self.vals = vals.reshape((3, vals.shape[1]/3)).T
+    
+class skeleton_show(mocap_data_show):
+    """data_show class for visualizing motion capture data encoded as a skeleton with angles."""
+    def __init__(self, vals, skel, padding=0, axes=None):
+        self.skel = skel
+        self.padding = padding
+        connect = skel.connection_matrix()
+        mocap_data_show.__init__(self, vals, axes, connect)
+
+    def process_values(self, vals):
+        if self.padding>0:
+            channels = np.zeros((vals.shape[0], vals.shape[1]+self.padding))
+            channels[:, 0:vals.shape[0]] = vals
+        else:
+            channels = vals
+        vals_mat = self.skel.to_xyz(channels.flatten())
+        self.vals = vals_mat
+        # Flip the Y and Z axes
+        self.vals[:, 0] = vals_mat[:, 0]
+        self.vals[:, 1] = vals_mat[:, 2]
+        self.vals[:, 2] = vals_mat[:, 1]
+        
+    def wrap_around(vals, lim, connect):
+        quot = lim[1] - lim[0]
+        vals = rem(vals, quot)+lim[0]
+        nVals = floor(vals/quot)
+        for i in range(connect.shape[0]):
+            for j in find(connect[i, :]):
+                if nVals[i] != nVals[j]:
+                    connect[i, j] = False
+        return vals, connect

From 52ba8e4ba36fdfbcb0f0e643c7e1a366065fe250 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 28 Apr 2013 17:22:04 +0100
Subject: [PATCH 48/54] remo0ved slices from models

slices are now handles by special indexing kern parts, such as
coregionalisation, independent_outputs. The old slicing functionality
has been removed simply to clean up the code a little.

Now that input_slices still exist (and will continue to be useful) in
kern.py. They do need a little work though, for the psi-statistics
---
 GPy/kern/kern.py                   | 152 ++++++++++-------------------
 GPy/models/GP.py                   |  63 +++++-------
 GPy/models/GP_regression.py        |  10 +-
 GPy/models/generalized_FITC.py     |  17 ++--
 GPy/models/sparse_GP.py            |  20 ++--
 GPy/models/sparse_GP_regression.py |  12 +--
 GPy/models/warped_GP.py            |   4 +-
 7 files changed, 103 insertions(+), 175 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index a6551e11..4547fadc 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -13,15 +13,9 @@ from prod import prod
 class kern(parameterised):
     def __init__(self, D, parts=[], input_slices=None):
         """
-        This kernel does 'compound' structures.
+        This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
 
-        The compund structure enables many features of GPy, including
-         - Hierarchical models
-         - Correleated output models
-         - multi-view learning
-
-        Hadamard product and outer-product kernels will require a new class.
-        This feature is currently WONTFIX. for small number sof inputs, you can use the sympy kernel for this.
+        The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
 
         :param D: The dimensioality of the kernel's input space
         :type D: int
@@ -94,34 +88,6 @@ class kern(parameterised):
             self.param_slices.append(slice(count, count + p.Nparam))
             count += p.Nparam
 
-    def _process_slices(self, slices1=None, slices2=None):
-        """
-        Format the slices so that they can easily be used.
-        Both slices can be any of three things:
-         - If None, the new points covary through every kernel part (default)
-         - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
-         - If a list of booleans, specifying which kernel parts are active
-
-        if the second arg is False, return only slices1
-
-        returns actual lists of slice objects
-        """
-        if slices1 is None:
-            slices1 = [slice(None)] * self.Nparts
-        elif all([type(s_i) is bool for s_i in slices1]):
-            slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
-        else:
-            assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
-        if slices2 is None:
-            slices2 = [slice(None)] * self.Nparts
-        elif slices2 is False:
-            return slices1
-        elif all([type(s_i) is bool for s_i in slices2]):
-            slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
-        else:
-            assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
-        return slices1, slices2
-
     def __add__(self, other):
         assert self.D == other.D
         newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
@@ -142,7 +108,7 @@ class kern(parameterised):
         :param other: the other kernel to be added
         :type other: GPy.kern
         """
-        return self +other
+        return self + other
 
     def add_orthogonal(self, other):
         """
@@ -285,18 +251,19 @@ class kern(parameterised):
 
         return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
 
-    def K(self, X, X2=None, slices1=None, slices2=None):
+    def K(self, X, X2=None, which_parts='all'):
+        if which_parts=='all':
+            which_parts = [True]*self.Nparts
         assert X.shape[1] == self.D
-        slices1, slices2 = self._process_slices(slices1, slices2)
         if X2 is None:
             target = np.zeros((X.shape[0], X.shape[0]))
-            [p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+            [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
         else:
             target = np.zeros((X.shape[0], X2.shape[0]))
-            [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+            [p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
         return target
 
-    def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
+    def dK_dtheta(self, dL_dK, X, X2=None):
         """
         :param dL_dK: An array of dL_dK derivaties, dL_dK
         :type dL_dK: Np.ndarray (N x M)
@@ -304,109 +271,94 @@ class kern(parameterised):
         :type X: np.ndarray (N x D)
         :param X2: Observed dara inputs (optional, defaults to X)
         :type X2: np.ndarray (M x D)
-        :param slices1: a slice object for each kernel part, describing which data are affected by each kernel part
-        :type slices1: list of slice objects, or list of booleans
-        :param slices2: slices for X2
         """
         assert X.shape[1] == self.D
-        slices1, slices2 = self._process_slices(slices1, slices2)
         target = np.zeros(self.Nparam)
         if X2 is None:
-            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
+            [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
         else:
-            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
-
+            [p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
 
         return self._transform_gradients(target)
 
-    def dK_dX(self, dL_dK, X, X2=None, slices1=None, slices2=None):
+    def dK_dX(self, dL_dK, X, X2=None):
         if X2 is None:
             X2 = X
-        slices1, slices2 = self._process_slices(slices1, slices2)
         target = np.zeros_like(X)
-        [p.dK_dX(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        if X2 is None:
+            [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
+        else:
+            [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def Kdiag(self, X, slices=None):
+    def Kdiag(self, X, which_parts='all'):
+        if which_parts=='all':
+            which_parts = [True]*self.Nparts
         assert X.shape[1] == self.D
-        slices = self._process_slices(slices, False)
         target = np.zeros(X.shape[0])
-        [p.Kdiag(X[s, i_s], target=target[s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
+        [p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def dKdiag_dtheta(self, dL_dKdiag, X, slices=None):
+    def dKdiag_dtheta(self, dL_dKdiag, X):
         assert X.shape[1] == self.D
-        assert len(dL_dKdiag.shape) == 1
         assert dL_dKdiag.size == X.shape[0]
-        slices = self._process_slices(slices, False)
         target = np.zeros(self.Nparam)
-        [p.dKdiag_dtheta(dL_dKdiag[s], X[s, i_s], target[ps]) for p, i_s, s, ps in zip(self.parts, self.input_slices, slices, self.param_slices)]
+        [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
         return self._transform_gradients(target)
 
-    def dKdiag_dX(self, dL_dKdiag, X, slices=None):
+    def dKdiag_dX(self, dL_dKdiag, X):
         assert X.shape[1] == self.D
-        slices = self._process_slices(slices, False)
         target = np.zeros_like(X)
-        [p.dKdiag_dX(dL_dKdiag[s], X[s, i_s], target[s, i_s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
+        [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def psi0(self, Z, mu, S, slices=None):
-        slices = self._process_slices(slices, False)
+    def psi0(self, Z, mu, S):
         target = np.zeros(mu.shape[0])
-        [p.psi0(Z, mu[s], S[s], target[s]) for p, s in zip(self.parts, slices)]
+        [p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, slices=None):
-        slices = self._process_slices(slices, False)
+    def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
         target = np.zeros(self.Nparam)
-        [p.dpsi0_dtheta(dL_dpsi0[s], Z, mu[s], S[s], target[ps]) for p, ps, s in zip(self.parts, self.param_slices, slices)]
+        [p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
         return self._transform_gradients(target)
 
-    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, slices=None):
-        slices = self._process_slices(slices, False)
+    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
         target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
-        [p.dpsi0_dmuS(dL_dpsi0, Z, mu[s], S[s], target_mu[s], target_S[s]) for p, s in zip(self.parts, slices)]
+        [p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target_mu, target_S
 
-    def psi1(self, Z, mu, S, slices1=None, slices2=None):
-        """Think N,M,Q """
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def psi1(self, Z, mu, S):
         target = np.zeros((mu.shape[0], Z.shape[0]))
-        [p.psi1(Z[s2], mu[s1], S[s1], target[s1, s2]) for p, s1, s2 in zip(self.parts, slices1, slices2)]
+        [p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
-        """N,M,(Ntheta)"""
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
         target = np.zeros((self.Nparam))
-        [p.dpsi1_dtheta(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, ps, s1, s2, i_s in zip(self.parts, self.param_slices, slices1, slices2, self.input_slices)]
+        [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
         return self._transform_gradients(target)
 
-    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
-        """N,M,Q"""
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
         target = np.zeros_like(Z)
-        [p.dpsi1_dZ(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target
 
-    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
+    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
         """return shapes are N,M,Q"""
-        slices1, slices2 = self._process_slices(slices1, slices2)
         target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
-        [p.dpsi1_dmuS(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target_mu, target_S
 
-    def psi2(self, Z, mu, S, slices1=None, slices2=None):
+    def psi2(self, Z, mu, S):
         """
         :param Z: np.ndarray of inducing inputs (M x Q)
         :param mu, S: np.ndarrays of means and variances (each N x Q)
         :returns psi2: np.ndarray (N,M,M)
         """
         target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
-        slices1, slices2 = self._process_slices(slices1, slices2)
-        [p.psi2(Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s1, s2, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
 
         # compute the "cross" terms
+        #TODO: input_slices needed
         for p1, p2 in itertools.combinations(self.parts, 2):
             # white doesn;t combine with anything
             if p1.name == 'white' or p2.name == 'white':
@@ -434,14 +386,12 @@ class kern(parameterised):
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
         return target
 
-    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
-        """Returns shape (N,M,M,Ntheta)"""
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
         target = np.zeros(self.Nparam)
-        [p.dpsi2_dtheta(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, i_s, s1, s2, ps in zip(self.parts, self.input_slices, slices1, slices2, self.param_slices)]
+        [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
 
         # compute the "cross" terms
-        # TODO: better looping
+        # TODO: better looping, input_slices
         for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
             p1, p2 = self.parts[i1], self.parts[i2]
 #             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
@@ -478,12 +428,12 @@ class kern(parameterised):
 
         return self._transform_gradients(target)
 
-    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
         target = np.zeros_like(Z)
-        [p.dpsi2_dZ(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
 
         # compute the "cross" terms
+        #TODO: we need input_slices here.
         for p1, p2 in itertools.combinations(self.parts, 2):
             # white doesn;t combine with anything
             if p1.name == 'white' or p2.name == 'white':
@@ -506,16 +456,14 @@ class kern(parameterised):
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
-
         return target * 2.
 
-    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
-        """return shapes are N,M,M,Q"""
-        slices1, slices2 = self._process_slices(slices1, slices2)
+    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
         target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
-        [p.dpsi2_dmuS(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
 
         # compute the "cross" terms
+        #TODO: we need input_slices here.
         for p1, p2 in itertools.combinations(self.parts, 2):
             # white doesn;t combine with anything
             if p1.name == 'white' or p2.name == 'white':
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index c6e46bea..45ed61ca 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -19,7 +19,6 @@ class GP(model):
     :parm likelihood: a GPy likelihood
     :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_X: False|True
-    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
     :rtype: model object
     :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
     :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
@@ -28,10 +27,9 @@ class GP(model):
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
     """
-    def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
+    def __init__(self, X, likelihood, kernel, normalize_X=False):
 
         # parse arguments
-        self.Xslices = Xslices
         self.X = X
         assert len(self.X.shape) == 2
         self.N, self.Q = self.X.shape
@@ -64,12 +62,12 @@ class GP(model):
         return np.zeros_like(self.Z)
 
     def _set_params(self, p):
-        self.kern._set_params_transformed(p[:self.kern.Nparam])
+        self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
         # self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
         self.likelihood._set_params(p[self.kern.Nparam_transformed():])  # test by Nicolas
 
 
-        self.K = self.kern.K(self.X, slices1=self.Xslices, slices2=self.Xslices)
+        self.K = self.kern.K(self.X)
         self.K += self.likelihood.covariance_matrix
 
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@@ -92,7 +90,7 @@ class GP(model):
         """
         Approximates a non-gaussian likelihood using Expectation Propagation
 
-        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
+        For a Gaussian likelihood, no iteration is required:
         this function does nothing
         """
         self.likelihood.fit_full(self.kern.K(self.X))
@@ -122,31 +120,33 @@ class GP(model):
         """
         The gradient of all parameters.
 
-        For the kernel parameters, use the chain rule via dL_dK
-
-        For the likelihood parameters, pass in alpha = K^-1 y
+        Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
         """
-        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X, slices1=self.Xslices, slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
-    def _raw_predict(self, _Xnew, slices=None, full_cov=False):
+    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
         """
         Internal helper function for making predictions, does not account
         for normalization or likelihood
+
+         #TODO: which_parts does nothing
+
+
         """
-        Kx = self.kern.K(self.X, _Xnew, slices1=self.Xslices, slices2=slices)
+        Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
         mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
         KiKx = np.dot(self.Ki, Kx)
         if full_cov:
-            Kxx = self.kern.K(_Xnew, slices1=slices, slices2=slices)
+            Kxx = self.kern.K(_Xnew, which_parts=which_parts)
             var = Kxx - np.dot(KiKx.T, Kx)
         else:
-            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
+            Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
             var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
             var = var[:, None]
         return mu, var
 
 
-    def predict(self, Xnew, slices=None, full_cov=False):
+    def predict(self, Xnew, which_parts='all', full_cov=False):
         """
         Predict the function(s) at the new point(s) Xnew.
 
@@ -154,19 +154,14 @@ class GP(model):
         ---------
         :param Xnew: The points at which to make a prediction
         :type Xnew: np.ndarray, Nnew x self.Q
-        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
-        :type slices: (None, list of slice objects, list of ints)
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
         :param full_cov: whether to return the folll covariance matrix, or just the diagonal
         :type full_cov: bool
         :rtype: posterior mean,  a Numpy array, Nnew x self.D
         :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
         :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.D
 
-        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
-
-             - If None, the new points covary throigh every kernel part (default)
-             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
-             - If a list of booleans, specifying which kernel parts are active
 
            If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
            This is to allow for different normalizations of the output dimensions.
@@ -174,15 +169,15 @@ class GP(model):
         """
         # normalize X values
         Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var = self._raw_predict(Xnew, slices, full_cov)
+        mu, var = self._raw_predict(Xnew, which_parts, full_cov)
 
-        # now push through likelihood TODO
+        # now push through likelihood
         mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
 
         return mean, var, _025pm, _975pm
 
 
-    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
+    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
         """
         Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
 
@@ -190,8 +185,8 @@ class GP(model):
         :param which_data: which if the training data to plot (default all)
         :type which_data: 'all' or a slice object to slice self.X, self.Y
         :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_functions: which of the kernel functions to plot (additively)
-        :type which_functions: list of bools
+        :param which_parts: which of the kernel functions to plot (additively)
+        :type which_parts: 'all', or list of bools
         :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
 
         Plot the posterior of the GP.
@@ -202,19 +197,17 @@ class GP(model):
         Can plot only part of the data and part of the posterior functions using which_data and which_functions
         Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
         """
-        if which_functions == 'all':
-            which_functions = [True] * self.kern.Nparts
         if which_data == 'all':
             which_data = slice(None)
 
         if self.X.shape[1] == 1:
             Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
             if samples == 0:
-                m, v = self._raw_predict(Xnew, slices=which_functions)
+                m, v = self._raw_predict(Xnew, which_parts=which_parts)
                 gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
                 pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
             else:
-                m, v = self._raw_predict(Xnew, slices=which_functions, full_cov=True)
+                m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
                 Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
                 gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
                 for i in range(samples):
@@ -230,7 +223,7 @@ class GP(model):
         elif self.X.shape[1] == 2:
             resolution = resolution or 50
             Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
-            m, v = self._raw_predict(Xnew, slices=which_functions)
+            m, v = self._raw_predict(Xnew, which_parts=which_parts)
             m = m.reshape(resolution, resolution).T
             pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
             pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
@@ -246,8 +239,6 @@ class GP(model):
 
         """
         # TODO include samples
-        if which_functions == 'all':
-            which_functions = [True] * self.kern.Nparts
         if which_data == 'all':
             which_data = slice(None)
 
@@ -256,7 +247,7 @@ class GP(model):
             Xu = self.X * self._Xstd + self._Xmean  # NOTE self.X are the normalized values now
 
             Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
-            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
+            m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
             gpplot(Xnew, m, lower, upper)
             pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
             if self.has_uncertain_inputs:
@@ -277,7 +268,7 @@ class GP(model):
             resolution = resolution or 50
             Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
             x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
-            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
+            m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
             m = m.reshape(resolution, resolution).T
             pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
             Yf = self.likelihood.Y.flatten()
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index 5f9f9f3e..7f2673a6 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -11,26 +11,24 @@ class GP_regression(GP):
     """
     Gaussian Process model for regression
 
-    This is a thin wrapper around the GP class, with a set of sensible defalts
+    This is a thin wrapper around the models.GP class, with a set of sensible defalts
 
     :param X: input observations
     :param Y: observed values
-    :param kernel: a GPy kernel, defaults to rbf+white
+    :param kernel: a GPy kernel, defaults to rbf
     :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_X: False|True
     :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_Y: False|True
-    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
-    :rtype: model object
 
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
     """
 
-    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
+    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
         if kernel is None:
             kernel = kern.rbf(X.shape[1])
 
         likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
-        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
+        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index 26875f64..25b6c18f 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -23,20 +23,19 @@ class generalized_FITC(sparse_GP):
     :type X_variance: np.ndarray (N x Q) | None
     :param Z: inducing inputs (optional, see note)
     :type Z: np.ndarray (M x Q) | None
-    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
     :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
     :type M: int
     :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
     :type normalize_(X|Y): bool
     """
 
-    def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
+    def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
 
         self.Z = Z
         self.M = self.Z.shape[0]
         self._precision = likelihood.precision
 
-        sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False)
+        sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
 
     def _set_params(self, p):
         self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
@@ -145,7 +144,7 @@ class generalized_FITC(sparse_GP):
         D = 0.5*np.trace(self.Cpsi1VVpsi1)
         return A+C+D
 
-    def _raw_predict(self, Xnew, slices, full_cov=False):
+    def _raw_predict(self, Xnew, which_parts, full_cov=False):
         if self.likelihood.is_heteroscedastic:
             """
             Make a prediction for the generalized FITC model
@@ -174,16 +173,16 @@ class generalized_FITC(sparse_GP):
             self.mu_H = mu_H
             Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
             # q(f_star|y) = N(f_star|mu_star,sigma2_star)
-            Kx = self.kern.K(self.Z, Xnew)
+            Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
             KR0T = np.dot(Kx.T,self.Lmi.T)
             mu_star = np.dot(KR0T,mu_H)
             if full_cov:
-                Kxx = self.kern.K(Xnew)
+                Kxx = self.kern.K(Xnew,which_parts=which_parts)
                 var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
             else:
-                Kxx = self.kern.Kdiag(Xnew)
-                Kxx_ = self.kern.K(Xnew)
-                var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
+                Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
+                Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
+                var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
                 var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
             return mu_star[:,None],var
         else:
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 697a9978..20caa1a8 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -9,10 +9,6 @@ from .. import kern
 from GP import GP
 from scipy import linalg
 
-#Still TODO:
-# make use of slices properly (kernel can now do this)
-# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
-
 class sparse_GP(GP):
     """
     Variational sparse GP model
@@ -27,19 +23,16 @@ class sparse_GP(GP):
     :type X_variance: np.ndarray (N x Q) | None
     :param Z: inducing inputs (optional, see note)
     :type Z: np.ndarray (M x Q) | None
-    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
     :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
     :type M: int
     :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
     :type normalize_(X|Y): bool
     """
 
-    def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
+    def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
         self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
         self.auto_scale_factor = False
         self.Z = Z
-        self.Zslices = Zslices
-        self.Xslices = Xslices
         self.M = Z.shape[0]
         self.likelihood = likelihood
 
@@ -50,7 +43,7 @@ class sparse_GP(GP):
             self.has_uncertain_inputs=True
             self.X_variance = X_variance
 
-        GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
+        GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
 
         #normalize X uncertainty also
         if self.has_uncertain_inputs:
@@ -65,13 +58,12 @@ class sparse_GP(GP):
             self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
             self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
         else:
-            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
+            self.psi0 = self.kern.Kdiag(self.X)
             self.psi1 = self.kern.K(self.Z,self.X)
             self.psi2 = None
 
     def _computations(self):
         #TODO: find routine to multiply triangular matrices
-        #TODO: slices for psi statistics (easy enough)
 
         sf = self.scale_factor
         sf2 = sf**2
@@ -252,16 +244,16 @@ class sparse_GP(GP):
             dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
         return dL_dZ
 
-    def _raw_predict(self, Xnew, slices, full_cov=False):
+    def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
         """Internal helper function for making predictions, does not account for normalization"""
 
         Kx = self.kern.K(self.Z, Xnew)
         mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
         if full_cov:
-            Kxx = self.kern.K(Xnew)
+            Kxx = self.kern.K(Xnew,which_parts=which_parts)
             var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
         else:
-            Kxx = self.kern.Kdiag(Xnew)
+            Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
             var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
 
         return mu,var[:,None]
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 0ef78c32..84a5d37c 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -13,7 +13,7 @@ class sparse_GP_regression(sparse_GP):
     """
     Gaussian Process model for regression
 
-    This is a thin wrapper around the GP class, with a set of sensible defalts
+    This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
 
     :param X: input observations
     :param Y: observed values
@@ -22,25 +22,25 @@ class sparse_GP_regression(sparse_GP):
     :type normalize_X: False|True
     :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_Y: False|True
-    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
     :rtype: model object
 
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
     """
 
-    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
-        #kern defaults to rbf
+    def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
+        #kern defaults to rbf (plus white for stability)
         if kernel is None:
             kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
 
         #Z defaults to a subset of the data
         if Z is None:
-            Z = np.random.permutation(X.copy())[:M]
+            i = np.random.permutation(X.shape[0])[:M]
+            Z = X[i].copy()
         else:
             assert Z.shape[1]==X.shape[1]
 
         #likelihood defaults to Gaussian
         likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
-        sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
+        sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index 052f8d8e..9c3ce401 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -14,7 +14,7 @@ from .. import likelihoods
 from .. import kern
 
 class warpedGP(GP):
-    def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False, Xslices=None):
+    def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
 
         if kernel is None:
             kernel = kern.rbf(X.shape[1])
@@ -28,7 +28,7 @@ class warpedGP(GP):
         self.predict_in_warped_space = False
         likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
 
-        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
+        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
 
     def _set_params(self, x):
         self.warping_params = x[:self.warping_function.num_parameters]

From 7d9352c7330d9c826c21c9e8f8cb4aee930037b5 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 28 Apr 2013 21:37:36 +0100
Subject: [PATCH 49/54] weaved coregionalise. much performance gained

---
 GPy/kern/coregionalise.py   | 62 ++++++++++++++++++++++++++++++++++---
 GPy/kern/kern.py            |  1 +
 GPy/kern/prod.py            |  9 ++++--
 GPy/kern/prod_orthogonal.py |  9 ++++--
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index a76bb31e..a4d22c2d 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -5,10 +5,11 @@ from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
 import pdb
+from scipy import weave
 
 class coregionalise(kernpart):
     """
-    Kernel for Intrisec Corregionalization Models
+    Kernel for Intrinsic Corregionalization Models
     """
     def __init__(self,Nout,R=1, W=None, kappa=None):
         self.D = 1
@@ -42,19 +43,70 @@ class coregionalise(kernpart):
 
     def K(self,index,index2,target):
         index = np.asarray(index,dtype=np.int)
+
+        #here's the old code (numpy)
+        #if index2 is None:
+            #index2 = index
+        #else:
+            #index2 = np.asarray(index2,dtype=np.int)
+        #false_target = target.copy()
+        #ii,jj = np.meshgrid(index,index2)
+        #ii,jj = ii.T, jj.T
+        #false_target += self.B[ii,jj]
+
         if index2 is None:
-            index2 = index
+            code="""
+            for(int i=0;i<N; i++){
+              target[i+i*N] += B[index[i]+Nout*index[i]];
+              for(int j=0; j<i; j++){
+                  target[j+i*N] += B[index[i]+Nout*index[j]];
+                  target[i+j*N] += target[j+i*N];
+                }
+              }
+            """
+            N,B,Nout = index.size, self.B, self.Nout
+            weave.inline(code,['target','index','N','B','Nout'])
         else:
             index2 = np.asarray(index2,dtype=np.int)
-        ii,jj = np.meshgrid(index,index2)
-        ii,jj = ii.T, jj.T
-        target += self.B[ii,jj]
+            code="""
+            for(int i=0;i<M; i++){
+              for(int j=0; j<N; j++){
+                  target[i+j*M] += B[Nout*index[j]+index2[i]];
+                }
+              }
+            """
+            N,M,B,Nout = index.size,index2.size, self.B, self.Nout
+            weave.inline(code,['target','index','index2','N','M','B','Nout'])
+
 
     def Kdiag(self,index,target):
         target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
 
     def dK_dtheta(self,dL_dK,index,index2,target):
         index = np.asarray(index,dtype=np.int)
+        dL_dK_small = np.zeros_like(self.B)
+        if index2 is None:
+            index2 = index
+        else:
+            index2 = np.asarray(index2,dtype=np.int)
+
+        code="""
+        for(int i=0; i<M; i++){
+          for(int j=0; j<N; j++){
+            dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*M];
+          }
+        }
+        """
+        N, M, Nout = index.size, index2.size, self.Nout
+        weave.inline(code, ['N','M','Nout','dL_dK','dL_dK_small','index','index2'])
+
+        dkappa = np.diag(dL_dK_small)
+        dL_dK_small += dL_dK_small.T
+        dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
+
+        target += np.hstack([dW.flatten(),dkappa])
+
+    def dK_dtheta_old(self,dL_dK,index,index2,target):
         if index2 is None:
             index2 = index
         else:
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 4547fadc..67333765 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -9,6 +9,7 @@ from kernpart import kernpart
 import itertools
 from prod_orthogonal import prod_orthogonal
 from prod import prod
+from ..util.linalg import symmetrify
 
 class kern(parameterised):
     def __init__(self, D, parts=[], input_slices=None):
diff --git a/GPy/kern/prod.py b/GPy/kern/prod.py
index 6a59c220..c16d6034 100644
--- a/GPy/kern/prod.py
+++ b/GPy/kern/prod.py
@@ -40,9 +40,12 @@ class prod(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        target1 = np.zeros((X.shape[0],X2.shape[0]))
-        target2 = np.zeros((X.shape[0],X2.shape[0]))
+        if X2 is None:
+            target1 = np.zeros((X.shape[0],X2.shape[0]))
+            target2 = np.zeros((X.shape[0],X2.shape[0]))
+        else:
+            target1 = np.zeros((X.shape[0],X.shape[0]))
+            target2 = np.zeros((X.shape[0],X.shape[0]))
         self.k1.K(X,X2,target1)
         self.k2.K(X,X2,target2)
         target += target1 * target2
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index fc349da8..6ba9965f 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -39,11 +39,14 @@ class prod_orthogonal(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
         target1 = np.zeros_like(target)
         target2 = np.zeros_like(target)
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
+        if X2 is None:
+            self.k1.K(X[:,:self.k1.D],None,target1)
+            self.k2.K(X[:,self.k1.D:],None,target2)
+        else:
+            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
+            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
         target += target1 * target2
 
     def dK_dtheta(self,dL_dK,X,X2,target):

From 6dced9ac39485c1d9bfe9ccc3b1e1c032521fee7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 28 Apr 2013 22:32:37 +0100
Subject: [PATCH 50/54] reimplemented caching in prod_orthogonal...

---
 GPy/kern/prod_orthogonal.py | 59 +++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index 6ba9965f..cc15a94e 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -21,44 +21,35 @@ class prod_orthogonal(kernpart):
         self.name = k1.name + '<times>' + k2.name
         self.k1 = k1
         self.k2 = k2
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
         self._set_params(np.hstack((k1._get_params(),k2._get_params())))
 
     def _get_params(self):
         """return the value of the parameters."""
-        return self.params
+        return np.hstack((self.k1._get_params(), self.k2._get_params()))
 
     def _set_params(self,x):
         """set the value of the parameters."""
         self.k1._set_params(x[:self.k1.Nparam])
         self.k2._set_params(x[self.k1.Nparam:])
-        self.params = x
 
     def _get_param_names(self):
         """return parameter names."""
         return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
 
     def K(self,X,X2,target):
-        """Compute the covariance matrix between X and X2."""
-        target1 = np.zeros_like(target)
-        target2 = np.zeros_like(target)
-        if X2 is None:
-            self.k1.K(X[:,:self.k1.D],None,target1)
-            self.k2.K(X[:,self.k1.D:],None,target2)
-        else:
-            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
-            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
-        target += target1 * target2
+        self._K_computations(X,X2)
+        target += self._K1 * self._K2
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
-
-        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+        self._K_computations(X,X2)
+        if X2 is None:
+            self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], None, target[:self.k1.Nparam])
+            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], None, target[self.k1.Nparam:])
+        else:
+            self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -78,14 +69,9 @@ class prod_orthogonal(kernpart):
 
     def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
-
-        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+        self._K_computations(X,X2)
+        self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
 
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
@@ -96,3 +82,20 @@ class prod_orthogonal(kernpart):
         self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
         self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
 
+    def _K_computations(self,X,X2):
+        if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
+            self._X = X.copy()
+            self._params == self._get_params().copy()
+            if X2 is None:
+                self._X2 = None
+                self._K1 = np.zeros((X.shape[0],X.shape[0]))
+                self._K2 = np.zeros((X.shape[0],X.shape[0]))
+                self.k1.K(X[:,:self.k1.D],None,self._K1)
+                self.k2.K(X[:,self.k1.D:],None,self._K2)
+            else:
+                self._X2 = X2.copy()
+                self._K1 = np.zeros((X.shape[0],X2.shape[0]))
+                self._K2 = np.zeros((X.shape[0],X2.shape[0]))
+                self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
+                self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
+

From 23bde6f3ddd56b938279451f1fcb55a84e00ced5 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 29 Apr 2013 09:11:36 +0100
Subject: [PATCH 51/54] removed uncollapsed sparse GP. superceeded by the
 forthcoming svigp package

---
 GPy/models/uncollapsed_sparse_GP.py | 151 ----------------------------
 1 file changed, 151 deletions(-)
 delete mode 100644 GPy/models/uncollapsed_sparse_GP.py

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
deleted file mode 100644
index d2638784..00000000
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2012 James Hensman
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import numpy as np
-import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
-from .. import kern
-from ..likelihoods import likelihood
-from sparse_GP import sparse_GP
-
-class uncollapsed_sparse_GP(sparse_GP):
-    """
-    Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly
-
-    :param X: inputs
-    :type X: np.ndarray (N x Q)
-    :param likelihood: GPy likelihood class, containing observed data
-    :param q_u: canonical parameters of the distribution squasehd into a 1D array
-    :type q_u: np.ndarray
-    :param kernel : the kernel/covariance function. See link kernels
-    :type kernel: a GPy kernel
-    :param Z: inducing inputs (optional, see note)
-    :type Z: np.ndarray (M x Q) | None
-    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
-    :param normalize_X : whether to normalize the data before computing (predictions will be in original scales)
-    :type normalize_X: bool
-    """
-
-    def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs):
-        self.M = Z.shape[0]
-        if q_u is None:
-            q_u = np.hstack((np.random.randn(self.M*likelihood.D),-0.5*np.eye(self.M).flatten()))
-            self.likelihood = likelihood
-        self.set_vb_param(q_u)
-        sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs)
-
-    def _computations(self):
-        # kernel computations, using BGPLVM notation
-        self.Kmm = self.kern.K(self.Z)
-        if self.has_uncertain_inputs:
-            raise NotImplementedError
-        else:
-            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
-            self.psi1 = self.kern.K(self.Z,self.X)
-            if self.likelihood.is_heteroscedastic:
-                raise NotImplementedError
-            else:
-                tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
-            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
-            self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
-
-
-        self.V = self.likelihood.precision*self.Y
-        self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
-        self.psi1V = np.dot(self.psi1, self.V)
-        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
-        self.B = np.eye(self.M) + self.A
-        self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
-        self.trace_K = self.psi0 - np.trace(self.A)/self.beta
-        self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
-
-        # Compute dL_dpsi
-        self.dL_dpsi0 = - 0.5 * self.likelihood.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
-        self.dL_dpsi2 = 0.5 * self.beta * self.likelihood.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
-
-        # Compute dL_dKmm
-        tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
-        tmp += tmp.T
-        tmp += self.likelihood.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
-        self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
-
-        #Compute the gradient of the log likelihood wrt noise variance
-        #TODO: suport heteroscedatic noise
-        dbeta =   0.5 * self.N*self.likelihood.D/self.beta
-        dbeta += - 0.5 * self.likelihood.D * self.trace_K
-        dbeta += - 0.5 * self.likelihood.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
-        dbeta += - 0.5 * self.trYYT
-        dbeta += np.sum(np.dot(self.Y.T,self.projected_mean))
-        self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
-
-    def log_likelihood(self):
-        """
-        Compute the (lower bound on the) log marginal likelihood
-        """
-        A = -0.5*self.N*self.likelihood.D*(np.log(2.*np.pi) - np.log(self.beta))
-        B = -0.5*self.beta*self.likelihood.D*self.trace_K
-        C = -0.5*self.likelihood.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
-        D = -0.5*self.beta*self.trYYT
-        E = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B+C+D+E
-
-    def _raw_predict(self, Xnew, slices,full_cov=False):
-        """Internal helper function for making predictions, does not account for normalization"""
-        Kx = self.kern.K(Xnew,self.Z)
-        mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
-
-        tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
-        if full_cov:
-            Kxx = self.kern.K(Xnew)
-            var = Kxx - mdot(Kx,tmp,Kx.T)
-        else:
-            Kxx = self.kern.Kdiag(Xnew)
-            var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
-        return mu,var
-
-
-    def set_vb_param(self,vb_param):
-        """set the distribution q(u) from the canonical parameters"""
-        self.q_u_prec = -2.*vb_param[-self.M**2:].reshape(self.M, self.M)
-        self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
-        self.q_u_logdet = -tmp
-        self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.likelihood.D].reshape(self.M,self.likelihood.D))
-
-        self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.likelihood.D)
-
-        self.q_u_canonical = (np.dot(self.q_u_prec, self.q_u_mean),-0.5*self.q_u_prec)
-        #TODO: computations now?
-
-    def get_vb_param(self):
-        """
-        Return the canonical parameters of the distribution q(u)
-        """
-        return np.hstack([e.flatten() for e in self.q_u_canonical])
-
-    def vb_grad_natgrad(self):
-        """
-        Compute the gradients of the lower bound wrt the canonical and
-        Expectation parameters of u.
-
-        Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
-        """
-        dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
-        dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
-
-        #dL_dSim =
-        #dL_dmhSi =
-
-        return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten()))  # natgrad only, grad TODO
-
-
-    def plot(self, *args, **kwargs):
-        """
-        add the distribution q(u) to the plot from sparse_GP
-        """
-        sparse_GP.plot(self,*args,**kwargs)
-        if self.Q==1:
-            pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
-

From 5fca43f980711becc89fc13efde81a753662cf55 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 29 Apr 2013 11:37:08 +0100
Subject: [PATCH 52/54] more stabilisation of sparse GP

---
 GPy/models/__init__.py  |  1 -
 GPy/models/sparse_GP.py | 15 ++++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index d63adaf1..4be8d360 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -9,7 +9,6 @@ from sparse_GP_regression import sparse_GP_regression
 from GPLVM import GPLVM
 from warped_GP import warpedGP
 from sparse_GPLVM import sparse_GPLVM
-from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from Bayesian_GPLVM import Bayesian_GPLVM
 from mrd import MRD
 from generalized_FITC import generalized_FITC
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 20caa1a8..a085090d 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -92,7 +92,7 @@ class sparse_GP(GP):
         #Compute A = L^-1 psi2 beta L^-T
         #self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
-        self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asarray(tmp.T,order='F'),lower=1)[0]
+        self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0]
 
         self.B = np.eye(self.M)/sf2 + self.A
 
@@ -101,12 +101,17 @@ class sparse_GP(GP):
         self.psi1V = np.dot(self.psi1, self.V)
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
         self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
-        #TODO: can we multiply in C by forwardsubstitution?
-        self.Cpsi1V = np.dot(self.C,self.psi1V)
-        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
-        #self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
+
+        #self.Cpsi1V = np.dot(self.C,self.psi1V)
+        #back substutue C into psi1V
+        tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
+        tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
+        self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
+
+        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #TODO: stabilize?
         self.E = tdot(self.Cpsi1V/sf)
 
+
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
         self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)

From 50a68e1a65f95386b5dccee5cd968fec03038356 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 29 Apr 2013 12:11:36 +0100
Subject: [PATCH 53/54] Yak shaving

---
 GPy/models/GPLVM.py     | 6 +++---
 GPy/models/sparse_GP.py | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index c0d9429a..157fe1c3 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
@@ -91,8 +91,8 @@ class GPLVM(GP):
         Xtest_full[:, :2] = Xtest
         mu, var, low, up = self.predict(Xtest_full)
         var = var[:, :1]
-        ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
-                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
+        ax.imshow(var.reshape(resolution, resolution).T,
+                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
 
         for i,ul in enumerate(np.unique(labels)):
             if type(ul) is np.string_:
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index a085090d..58f02cca 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -102,13 +102,14 @@ class sparse_GP(GP):
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
         self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
 
-        #self.Cpsi1V = np.dot(self.C,self.psi1V)
         #back substutue C into psi1V
         tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
         tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
         self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
+        #self.Cpsi1V = np.dot(self.C,self.psi1V)
+
+        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
 
-        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #TODO: stabilize?
         self.E = tdot(self.Cpsi1V/sf)
 
 

From aaaa1b5251d194dee235944fa6413165bc6f81a5 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 29 Apr 2013 16:21:38 +0100
Subject: [PATCH 54/54] model re compilation added

---
 GPy/core/model.py | 188 +++++++++++++++++++++++-----------------------
 1 file changed, 95 insertions(+), 93 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f3542ce8..dac6d55e 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -2,17 +2,19 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-import numpy as np
-from scipy import optimize
-import sys, pdb
-import multiprocessing as mp
-from GPy.util.misc import opt_wrapper
-#import numdifftools as ndt
-from parameterised import parameterised, truncate_pad
-import priors
-from ..util.linalg import jitchol
-from ..inference import optimization
 from .. import likelihoods
+from ..inference import optimization
+from ..util.linalg import jitchol
+from GPy.util.misc import opt_wrapper
+from parameterised import parameterised, truncate_pad
+from scipy import optimize
+import multiprocessing as mp
+import numpy as np
+import priors
+import re
+import sys
+import pdb
+# import numdifftools as ndt
 
 class model(parameterised):
     def __init__(self):
@@ -24,14 +26,14 @@ class model(parameterised):
         self.preferred_optimizer = 'tnc'
     def _get_params(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
-    def _set_params(self,x):
+    def _set_params(self, x):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
 
-    def set_prior(self,which,what):
+    def set_prior(self, which, what):
         """
         Sets priors on the model parameters.
 
@@ -52,59 +54,59 @@ class model(parameterised):
 
         which = self.grep_param_names(which)
 
-        #check tied situation
-        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
+        # check tied situation
+        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
         if len(tie_partial_matches):
             raise ValueError, "cannot place prior across partial ties"
-        tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
-        if len(tie_matches)>1:
+        tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
+        if len(tie_matches) > 1:
             raise ValueError, "cannot place prior across multiple ties"
-        elif len(tie_matches)==1:
-            which = which[:1]# just place a prior object on the first parameter
+        elif len(tie_matches) == 1:
+            which = which[:1]  # just place a prior object on the first parameter
 
 
-        #check constraints are okay
+        # check constraints are okay
         if isinstance(what, (priors.gamma, priors.log_Gaussian)):
-            assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
-            assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
             unconst = np.setdiff1d(which, self.constrained_positive_indices)
             if len(unconst):
                 print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
+                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
                 print '\n'
                 self.constrain_positive(unconst)
-        elif isinstance(what,priors.Gaussian):
-            assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
+        elif isinstance(what, priors.Gaussian):
+            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
         else:
             raise ValueError, "prior not recognised"
 
 
-        #store the prior in a local list
+        # store the prior in a local list
         for w in which:
             self.priors[w] = what
 
-    def get_gradient(self,name, return_names=False):
+    def get_gradient(self, name, return_names=False):
         """
         Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
         matches = self.grep_param_names(name)
         if len(matches):
             if return_names:
-                return self._log_likelihood_gradients()[matches],  np.asarray(self._get_param_names())[matches].tolist()
+                return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
             else:
                 return self._log_likelihood_gradients()[matches]
         else:
-            raise AttributeError, "no parameter matches %s"%name
+            raise AttributeError, "no parameter matches %s" % name
 
     def log_prior(self):
         """evaluate the prior"""
-        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
+        return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
 
     def _log_prior_gradients(self):
         """evaluate the gradients of the priors"""
         x = self._get_params()
         ret = np.zeros(x.size)
-        [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
+        [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
         return ret
 
     def _transform_gradients(self, g):
@@ -113,13 +115,13 @@ class model(parameterised):
         """
 
         x = self._get_params()
-        g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
-        g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
-        [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
+        g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
+        g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
+        [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
         if len(self.tied_indices) or len(self.constrained_fixed_indices):
-            to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
-            return np.delete(g,to_remove)
+            to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
+            return np.delete(g, to_remove)
         else:
             return g
 
@@ -129,15 +131,15 @@ class model(parameterised):
         Randomize the model.
         Make this draw from the prior if one exists, else draw from N(0,1)
         """
-        #first take care of all parameters (from N(0,1))
+        # first take care of all parameters (from N(0,1))
         x = self._get_params_transformed()
         x = np.random.randn(x.size)
         self._set_params_transformed(x)
-        #now draw from prior where possible
+        # now draw from prior where possible
         x = self._get_params()
-        [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
+        [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
         self._set_params(x)
-        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed())  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
     def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@@ -171,10 +173,10 @@ class model(parameterised):
                 pool = mp.Pool(processes=num_processes)
                 for i in range(Nrestarts):
                     self.randomize()
-                    job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
+                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
                     jobs.append(job)
 
-                pool.close() # signal that no more data coming in
+                pool.close()  # signal that no more data coming in
                 pool.join()  # wait for all the tasks to complete
             except KeyboardInterrupt:
                 print "Ctrl+c received, terminating and joining pool."
@@ -190,10 +192,10 @@ class model(parameterised):
                     self.optimization_runs.append(jobs[i].get())
 
                 if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
+                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
             except Exception as e:
                 if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
+                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
                 else:
                     raise e
 
@@ -203,22 +205,22 @@ class model(parameterised):
         else:
             self._set_params_transformed(initial_parameters)
 
-    def ensure_default_constraints(self,warn=False):
+    def ensure_default_constraints(self, warn=False):
         """
         Ensure that any variables which should clearly be positive have been constrained somehow.
         """
-        positive_strings = ['variance','lengthscale', 'precision']
+        positive_strings = ['variance', 'lengthscale', 'precision']
         param_names = self._get_param_names()
         currently_constrained = self.all_constrained_indices()
         to_make_positive = []
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(param_names[i])
+                    to_make_positive.append(re.escape(param_names[i]))
                     if warn:
-                        print "Warning! constraining %s postive"%name
+                        print "Warning! constraining %s positive" % s
         if len(to_make_positive):
-            self.constrain_positive('('+'|'.join(to_make_positive)+')')
+            self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
 
 
 
@@ -236,14 +238,14 @@ class model(parameterised):
         self._set_params_transformed(x)
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        return - LL_gradients - prior_gradients
+        return -LL_gradients - prior_gradients
 
     def objective_and_gradients(self, x):
         self._set_params_transformed(x)
-        obj_f =  -self.log_likelihood() - self.log_prior()
+        obj_f = -self.log_likelihood() - self.log_prior()
         LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
         prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        obj_grads = - LL_gradients - prior_gradients
+        obj_grads = -LL_gradients - prior_gradients
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -269,7 +271,7 @@ class model(parameterised):
 
         self._set_params_transformed(opt.x_opt)
 
-    def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
+    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
         # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
         sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
         sgd.run()
@@ -277,8 +279,8 @@ class model(parameterised):
 
     def Laplace_covariance(self):
         """return the covariance matric of a Laplace approximatino at the current (stationary) point"""
-        #TODO add in the prior contributions for MAP estimation
-        #TODO fix the hessian for tied, constrained and fixed components
+        # TODO add in the prior contributions for MAP estimation
+        # TODO fix the hessian for tied, constrained and fixed components
         if hasattr(self, 'log_likelihood_hessian'):
             A = -self.log_likelihood_hessian()
 
@@ -292,8 +294,8 @@ class model(parameterised):
             A = -h(x)
             self._set_params(x)
         # check for almost zero components on the diagonal which screw up the cholesky
-        aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
-        A[aa,aa] = 0.
+        aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
+        A[aa, aa] = 0.
         return A
 
     def Laplace_evidence(self):
@@ -304,11 +306,11 @@ class model(parameterised):
             hld = np.sum(np.log(np.diag(jitchol(A)[0])))
         except:
             return np.nan
-        return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
+        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
         s = parameterised.__str__(self).split('\n')
-        #add priors to the string
+        # add priors to the string
         strs = [str(p) if p is not None else '' for p in self.priors]
         width = np.array(max([len(p) for p in strs] + [5])) + 4
 
@@ -319,16 +321,16 @@ class model(parameterised):
             obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
         obj_funct += '\n\n'
         s[0] = obj_funct + s[0]
-        s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
-        s[1] += '-'*(width + 1)
+        s[0] += "|{h:^{col}}".format(h='Prior', col=width)
+        s[1] += '-' * (width + 1)
 
-        for p in range(2, len(strs)+2):
-            s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
+        for p in range(2, len(strs) + 2):
+            s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
 
         return '\n'.join(s)
 
 
-    def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
         """
         Check the gradient of the model by comparing to a numerical estimate.
         If the verbose flag is passed, invividual components are tested (and printed)
@@ -348,27 +350,27 @@ class model(parameterised):
         x = self._get_params_transformed().copy()
 
         if not verbose:
-            #just check the global ratio
-            dx = step*np.sign(np.random.uniform(-1,1,x.size))
+            # just check the global ratio
+            dx = step * np.sign(np.random.uniform(-1, 1, x.size))
 
-            #evaulate around the point x
-            f1, g1 = self.objective_and_gradients(x+dx)
-            f2, g2 = self.objective_and_gradients(x-dx)
+            # evaulate around the point x
+            f1, g1 = self.objective_and_gradients(x + dx)
+            f2, g2 = self.objective_and_gradients(x - dx)
             gradient = self.objective_function_gradients(x)
 
-            numerical_gradient = (f1-f2)/(2*dx)
-            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
+            numerical_gradient = (f1 - f2) / (2 * dx)
+            global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
 
-            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
+            if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
                 return True
             else:
                 return False
         else:
-            #check the gradient of each parameter individually, and do some pretty printing
+            # check the gradient of each parameter individually, and do some pretty printing
             try:
                 names = self._get_param_names_transformed()
             except NotImplementedError:
-                names = ['Variable %i'%i for i in range(len(x))]
+                names = ['Variable %i' % i for i in range(len(x))]
 
             # Prepare for pretty-printing
             header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@@ -377,9 +379,9 @@ class model(parameterised):
             cols = [max_names]
             cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
             cols = np.array(cols) + 5
-            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+            header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
             header_string = map(lambda x: '|'.join(x), [header_string])
-            separator = '-'*len(header_string[0])
+            separator = '-' * len(header_string[0])
             print '\n'.join([header_string[0], separator])
 
             if target_param is None:
@@ -395,11 +397,11 @@ class model(parameterised):
                 f2, g2 = self.objective_and_gradients(xx)
                 gradient = self.objective_function_gradients(x)[i]
 
-                numerical_gradient = (f1-f2)/(2*step)
-                ratio = (f1-f2)/(2*step*gradient)
-                difference = np.abs((f1-f2)/2/step - gradient)
+                numerical_gradient = (f1 - f2) / (2 * step)
+                ratio = (f1 - f2) / (2 * step * gradient)
+                difference = np.abs((f1 - f2) / 2 / step - gradient)
 
-                if (np.abs(ratio-1)<tolerance):
+                if (np.abs(ratio - 1) < tolerance):
                     formatted_name = "\033[92m {0} \033[0m".format(names[i])
                 else:
                     formatted_name = "\033[91m {0} \033[0m".format(names[i])
@@ -407,7 +409,7 @@ class model(parameterised):
                 d = '%.6f' % float(difference)
                 g = '%.6f' % gradient
                 ng = '%.6f' % float(numerical_gradient)
-                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                 print grad_string
 
     def input_sensitivity(self):
@@ -418,21 +420,21 @@ class model(parameterised):
         TODO: proper sensitivity analysis
         """
 
-        if not hasattr(self,'kern'):
+        if not hasattr(self, 'kern'):
             raise ValueError, "this model has no kernel"
 
-        k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
-        if (not len(k)==1) or (not k[0].ARD):
+        k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
+        if (not len(k) == 1) or (not k[0].ARD):
             raise ValueError, "cannot determine sensitivity for this kernel"
         k = k[0]
 
-        if k.name=='rbf':
+        if k.name == 'rbf':
             return k.lengthscale
-        elif k.name=='linear':
-            return 1./k.variances
+        elif k.name == 'linear':
+            return 1. / k.variances
 
 
-    def pseudo_EM(self,epsilon=.1,**kwargs):
+    def pseudo_EM(self, epsilon=.1, **kwargs):
         """
         TODO: Should this not bein the GP class?
         EM - like algorithm  for Expectation Propagation and Laplace approximation
@@ -446,7 +448,7 @@ class model(parameterised):
         :type optimzer: string TODO: valid strings?
 
         """
-        assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
+        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
         ll_change = epsilon + 1.
         iteration = 0
         last_ll = -np.exp(1000)
@@ -466,9 +468,9 @@ class model(parameterised):
             ll_change = new_ll - last_ll
 
             if ll_change < 0:
-                self.likelihood = last_approximation #restore previous likelihood approximation
-                self._set_params(last_params) #restore model parameters
-                print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
+                self.likelihood = last_approximation  # restore previous likelihood approximation
+                self._set_params(last_params)  # restore model parameters
+                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                 stop = True
             else:
                 self.optimize(**kwargs)
@@ -477,5 +479,5 @@ class model(parameterised):
                     stop = True
             iteration += 1
             if stop:
-                print "%s iterations." %iteration
+                print "%s iterations." % iteration