From fb21a3589ba436bf57ca8f4f6e20c238ee7eeeeb Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 30 Nov 2012 17:32:32 +0000
Subject: [PATCH 01/33] added sympykern as a 'kernpart' object.

now we can add sympykerns to any other kern
---
 GPy/kern/__init__.py       |   2 +-
 GPy/kern/constructors.py   |  18 +++
 GPy/kern/sympy_helpers.cpp |  10 ++
 GPy/kern/sympy_helpers.h   |   3 +
 GPy/kern/sympykern.py      | 270 +++++++++++++++++++++++++++++++++++++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/sympy_helpers.cpp
 create mode 100644 GPy/kern/sympy_helpers.h
 create mode 100644 GPy/kern/sympykern.py

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index be3c902f..ead4d0fc 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 99b0aab3..7bbac967 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -21,6 +21,7 @@ from Brownian import Brownian as Brownianpart
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
+
 def rbf(D,variance=1., lengthscale=1.):
     """
     Construct an RBF kernel
@@ -170,3 +171,20 @@ def Brownian(D,variance=1.):
     """
     part = Brownianpart(D,variance)
     return kern(D, [part])
+
+import sympy as sp
+from sympykern import spkern
+from sympy.parsing.sympy_parser import parse_expr
+
+def rbf_sympy(D,variance=1., lengthscale=1.):
+    """
+    Radial Basis Function covariance.
+    """
+    X = [sp.var('x%i'%i) for i in range(D)]
+    Z = [sp.var('z%i'%i) for i in range(D)]
+    rbf_variance = sp.var('rbf_variance',positive=True)
+    rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+    dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+    dist = parse_expr(dist_string)
+    f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+    return kern(D,[spkern(D,f,np.array([variance,lengthscale]))])
diff --git a/GPy/kern/sympy_helpers.cpp b/GPy/kern/sympy_helpers.cpp
new file mode 100644
index 00000000..2af4737a
--- /dev/null
+++ b/GPy/kern/sympy_helpers.cpp
@@ -0,0 +1,10 @@
+#include <math.h>
+double DiracDelta(double x){
+    if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
+        return 1.0;
+    else
+        return 0.0;
+};
+double DiracDelta(double x,int foo){
+    return 0.0;
+};
diff --git a/GPy/kern/sympy_helpers.h b/GPy/kern/sympy_helpers.h
new file mode 100644
index 00000000..29244eca
--- /dev/null
+++ b/GPy/kern/sympy_helpers.h
@@ -0,0 +1,3 @@
+#include <math.h>
+double DiracDelta(double x);
+double DiracDelta(double x, int foo);
diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
new file mode 100644
index 00000000..525fadeb
--- /dev/null
+++ b/GPy/kern/sympykern.py
@@ -0,0 +1,270 @@
+import numpy as np
+import sympy as sp
+from sympy.utilities.codegen import codegen
+from sympy.core.cache import clear_cache
+from scipy import weave
+import re
+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+import tempfile
+import pdb
+from kernpart import kernpart
+
+class spkern(kernpart):
+    """
+    A kernel object, where all the hard work in done by sympy.
+
+    :param k: the covariance function
+    :type k: a positive definite sympy function of x1, z1, x2, z2...
+
+    To construct a new sympy kernel, you'll need to define:
+     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
+     - that's it! we'll extract the variables from the function k.
+
+    Note:
+     - to handle multiple inputs, call them x1, z1, etc
+     - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
+    """
+    def __init__(self,D,k,param=None):
+        self.name='sympykern'
+        self._sp_k = k
+        sp_vars = [e for e in k.atoms() if e.is_Symbol]
+        self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
+        self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
+        assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
+        assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
+        assert len(self._sp_x)==len(self._sp_z)
+        self.D = len(self._sp_x)
+        assert self.D == D
+        self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
+        self.Nparam = len(self._sp_theta)
+
+        #deal with param
+        if param is None:
+            param = np.ones(self.Nparam)
+        assert param.size==self.Nparam
+        self.set_param(param)
+
+        #Differentiate!
+        self._sp_dk_dtheta = [sp.diff(k,theta) for theta in self._sp_theta]
+        self._sp_dk_dx = [sp.diff(k,xi) for xi in self._sp_x]
+        #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
+
+        #self.compute_psi_stats()
+        self._gen_code()
+
+    def __add__(self,other):
+        return spkern(self._sp_k+other._sp_k)
+
+    def compute_psi_stats(self):
+        #define some normal distributions
+        mus = [sp.var('mu%i'%i,real=True) for i in range(self.D)]
+        Ss = [sp.var('S%i'%i,positive=True) for i in range(self.D)]
+        normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
+
+        #do some integration!
+        #self._sp_psi0 = ??
+        self._sp_psi1 = self._sp_k
+        for i in range(self.D):
+            print 'perfoming integrals %i of %i'%(i+1,2*self.D)
+            sys.stdout.flush()
+            self._sp_psi1 *= normals[i]
+            self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
+            clear_cache()
+        self._sp_psi1 = self._sp_psi1.simplify()
+
+        #and here's psi2 (eek!)
+        zprime = [sp.Symbol('zp%i'%i) for i in range(self.D)]
+        self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
+        for i in range(self.D):
+            print 'perfoming integrals %i of %i'%(self.D+i+1,2*self.D)
+            sys.stdout.flush()
+            self._sp_psi2 *= normals[i]
+            self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
+            clear_cache()
+        self._sp_psi2 = self._sp_psi2.simplify()
+
+
+    def _gen_code(self):
+        #generate c functions from sympy objects
+        (foo_c,self._function_code),(foo_h,self._function_header) = \
+                codegen([('k',self._sp_k)] \
+                + [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
+                #+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
+                + [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
+                ,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
+        #put the header file where we can find it
+        f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
+        f.write(self._function_header)
+        f.close()
+
+        #get rid of derivatives of DiracDelta
+        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
+
+        #Here's some code to do the looping for K
+        arglist = ", ".join(["X[i*D+%s]"%x.name[1:] for x in self._sp_x]\
+                + ["Z[j*D+%s]"%z.name[1:] for z in self._sp_z]\
+                + ["param[%i]"%i for i in range(self.Nparam)])
+
+        self._K_code =\
+        """
+        int i;
+        int j;
+        int N = target_array->dimensions[0];
+        int M = target_array->dimensions[1];
+        int D = X_array->dimensions[1];
+        //#pragma omp parallel for private(j)
+        for (i=0;i<N;i++){
+            for (j=0;j<M;j++){
+                target[i*M+j] = k(%s);
+            }
+        }
+        """%(arglist)
+
+        diag_arglist = re.sub('Z','X',arglist)
+        diag_arglist = re.sub('j','i',diag_arglist)
+        #Here's some code to do the looping for Kdiag
+        self._Kdiag_code =\
+        """
+        int i;
+        int N = target_array->dimensions[0];
+        int D = X_array->dimensions[1];
+        //#pragma omp parallel for
+        for (i=0;i<N;i++){
+                target[i] = k(%s);
+        }
+        """%diag_arglist
+
+        #here's some code to compute gradients
+        funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*M+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
+        self._dK_dtheta_code =\
+        """
+        int i;
+        int j;
+        int N = partial_array->dimensions[0];
+        int M = partial_array->dimensions[1];
+        int D = X_array->dimensions[1];
+        //#pragma omp parallel for private(j)
+        for (i=0;i<N;i++){
+            for (j=0;j<M;j++){
+%s
+            }
+        }
+        """%funclist
+
+        #here's some code to compute gradients for Kdiag TODO: thius is yucky.
+        diag_funclist = re.sub('Z','X',funclist,count=0)
+        diag_funclist = re.sub('j','i',diag_funclist)
+        diag_funclist = re.sub('partial\[i\*M\+i\]','partial[i]',diag_funclist)
+        self._dKdiag_dtheta_code =\
+        """
+        int i;
+        int N = partial_array->dimensions[0];
+        int D = X_array->dimensions[1];
+        for (i=0;i<N;i++){
+                %s
+        }
+        """%diag_funclist
+
+        #Here's some code to do gradients wrt x
+        gradient_funcs = "\n".join(["target[i*D+%i] += partial[i*M+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.D)])
+        self._dK_dX_code = \
+        """
+        int i;
+        int j;
+        int N = partial_array->dimensions[0];
+        int M = partial_array->dimensions[1];
+        int D = X_array->dimensions[1];
+        //#pragma omp parallel for private(j)
+        for (i=0;i<N; i++){
+            for (j=0; j<M; j++){
+                %s
+            }
+        }
+        """%gradient_funcs
+
+        #Here's some code to do gradients wrt z (should be the same as for X, but this is easier
+        gradient_funcs_Z = "\n".join(["target[j*D+%i] += partial[i*M+j]*dk_dz%i(%s);"%(q,q,arglist) for q in range(self.D)])
+        self._dK_dZ_code = \
+        """
+        int i;
+        int j;
+        int N = partial_array->dimensions[0];
+        int M = partial_array->dimensions[1];
+        int D = X_array->dimensions[1];
+        for (i=0;i<N; i++){
+            for (j=0; j<M; j++){
+                %s
+            }
+        }
+        """%gradient_funcs_Z
+
+
+        #now for gradients of Kdiag wrt X
+        self._dKdiag_dX_code= \
+        """
+        int i;
+        int j;
+        int N = partial_array->dimensions[0];
+        int M = 0;
+        int D = X_array->dimensions[1];
+        for (i=0;i<N; i++){
+            j = i;
+            %s
+        }
+        """%gradient_funcs
+
+
+        #TODO: insert multiple functions here via string manipulation
+        #TODO: similar functions for psi_stats
+
+    def K(self,X,Z,target):
+        param = self._param
+        weave.inline(self._K_code,arg_names=['target','X','Z','param'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(),os.path.join(current_dir,'kern/') ],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        return target
+
+    def Kdiag(self,X,target):
+        param = self._param
+        weave.inline(self._Kdiag_code,arg_names=['target','X','param'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        return target
+
+    def dK_dtheta(self,partial,X,Z,target):
+        param = self._param
+        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        return target
+
+    def dKdiag_dtheta(self,partial,X,target):
+        param = self._param
+        Z = X
+        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")])
+        return target
+
+    def dK_dX(self,partial,X,Z,target):
+        target = np.zeros_like(X)
+        param = self._param
+        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        return target
+
+    #def dK_dZ(self,X,Z,partial=None):
+        ##TODO: this function might not be necessary
+        #target = np.zeros_like(Z)
+        #param = self._param
+        #weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        #return target
+
+    def dKdiag_dX(self,partial,X,target):
+        param = self._param
+        Z = X
+        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")])
+        return target
+
+    def set_param(self,param):
+        #print param.flags['C_CONTIGUOUS']
+        self._param = param.copy()
+
+    def get_param(self):
+        return self._param
+
+    def get_param_names(self):
+        return [x.name for x in self._sp_theta]

From 4fb28dcbd4a56475e3f501a31306565a2c87d99a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 7 Dec 2012 15:39:47 -0800
Subject: [PATCH 02/33] Added get and set attributes to the mode class

... so that we can deal with the parameters in a Neil friendly way.
---
 GPy/core/model.py         | 26 +++++++++++++++++++++++++-
 GPy/core/parameterised.py |  2 +-
 GPy/testing/unit_tests.py |  1 +
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 6ae292d0..6e0dea60 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -29,7 +29,8 @@ class model(parameterised):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
 
     def set_prior(self,which,what):
-        """sets priors on the model parameters.
+        """
+        Sets priors on the model parameters.
 
         Arguments
         ---------
@@ -79,6 +80,29 @@ class model(parameterised):
         for w in which:
             self.priors[w] = what
 
+    def get(self,name):
+        """
+        get a model parameter by name
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            return self.get_param()[matches]
+        else:
+            raise AttributeError, "no parameter matches %s"%name
+
+    def set(self,name,val):
+        """
+        Set a model parameter by name
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            x = self.get_param()
+            x[matches] = val
+            self.set_param(x)
+        else:
+            raise AttributeError, "no parameter matches %s"%name
+
+
 
     def log_prior(self):
         """evaluate the prior"""
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index d9e9d0d7..3894ea6a 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -29,7 +29,7 @@ def truncate_pad(string,width,align='m'):
         else:
             raise ValueError
 
-class parameterised:
+class parameterised(object):
     def __init__(self):
         """
         This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 02a63feb..ff9aba0e 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -140,6 +140,7 @@ class GradientTests(unittest.TestCase):
         self.assertTrue(m.checkgrad())
 
     def test_GP_EP(self):
+        return # Disabled TODO
         N = 20
         X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
         k = GPy.kern.rbf(1) + GPy.kern.white(1)

From 1c0a22332957d4a83a21a072ccf6dd908b4bf3e3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 7 Dec 2012 15:51:27 -0800
Subject: [PATCH 03/33] added the raw_predict function in the uncollapsed
 sparse GP

---
 GPy/models/uncollapsed_sparse_GP.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 89a8ff0e..bb00caea 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -47,9 +47,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.V = self.beta*self.Y
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.Lm = jitchol(self.Kmm)
-        self.Lmi = chol_inv(self.Lm)
-        self.Kmmi = np.dot(self.Lmi.T, self.Lmi)
+        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
         self.A = mdot(self.Lmi, self.psi2, self.Lmi.T)
         self.B = np.eye(self.M) + self.beta * self.A
         self.Lambda = mdot(self.Lmi.T,self.B,sel.Lmi)
@@ -89,16 +87,19 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
     def _raw_predict(self, Xnew, slices):
         """Internal helper function for making predictions, does not account for normalisation"""
-
-        #TODO
+        Kx = self.kern.cross_compute(Xnew)
+        Kxx = self.kern.compute_new(Xnew)
+        mu = mdot(Kx.T,self.Kmmi,self.mu)
+        tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
+        var = Kxx - mdot(Kx.T,tmp,Kx) + np.eye(Xnew.shape[0])/self.beta
         return mu,var
 
+
     def set_vb_param(self,vb_param):
         """set the distribution q(u) from the canonical parameters"""
         self.q_u_prec = -2.*vb_param[self.M*self.D:].reshape(self.M,self.M)
-        self.q_u_prec_L = jitchol(self.q_u_prec)
-        self.q_u_cov_L = chol_inv(self.q_u_prec_L)
-        self.q_u_cov = np.dot(self.q_u_cov_L,self.q_u_cov_L.T)
+        self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
+        self.q_u_logdet = -tmp
         self.q_u_mean = -2.*np.dot(self.q_u_cov,vb_param[:self.M*self.D].reshape(self.M,self.D))
 
         self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov)

From fe13d2c09abeeac5f9247e4af62908dd9bba0020 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 7 Dec 2012 16:35:15 -0800
Subject: [PATCH 04/33] more skeletal work on the uncollapsed GP

None of the gradients work, but lots more things are in place
---
 GPy/models/__init__.py              |  1 +
 GPy/models/uncollapsed_sparse_GP.py | 58 +++++++++++++++--------------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index dd721559..ab7ff5b4 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -9,3 +9,4 @@ from warped_GP import warpedGP
 from GP_EP import GP_EP
 from generalized_FITC import generalized_FITC
 from sparse_GPLVM import sparse_GPLVM
+from uncollapsed_sparse_GP import uncollapsed_sparse_GP
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index bb00caea..8761aac4 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -32,35 +32,37 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
     :type normalize_(X|Y): bool
     """
 
-    def __init__(self, X, Y, q_u=None, *args, **kwargs)
-        D = Y.shape[1]
+    def __init__(self, X, Y, q_u=None, M=10, *args, **kwargs):
+        self.D = Y.shape[1]
         if q_u is None:
-            if Z is None:
-                M = Z.shape[0]
+            if 'Z' in kwargs.keys():
+                self.M = Z.shape[0]
             else:
-                M=M
-            q_u = np.hstack((np.ones(M*D)),np.eye(M).flatten())
+                self.M = M
+            q_u = np.hstack((np.ones(self.M*self.D),-0.5*np.eye(self.M).flatten()))
         self.set_vb_param(q_u)
-        sparse_GP_regression.__init__(self, X, Y, *args, **kwargs)
+        sparse_GP_regression.__init__(self, X, Y, M=M,*args, **kwargs)
 
     def _computations(self):
         self.V = self.beta*self.Y
+        self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.A = mdot(self.Lmi, self.psi2, self.Lmi.T)
-        self.B = np.eye(self.M) + self.beta * self.A
-        self.Lambda = mdot(self.Lmi.T,self.B,sel.Lmi)
+        self.A = self.beta * mdot(self.Lmi, self.psi2, self.Lmi.T)
+        self.B = np.eye(self.M) * self.A
+        self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
+        self.trace_K = self.psi0 - np.trace(self.A)/self.beta
+        self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
 
         # Compute dL_dpsi
         self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = 
-        self.dL_dpsi2 = 
+        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T
+        self.dL_dpsi2 = - 0.5 * self.beta * (-self.D*self.Kmmi + mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
-        self.dL_dKmm = 
-        self.dL_dKmm += 
-        self.dL_dKmm += 
+        tmp =  np.dot(0.5*np.eye(self.M) + np.dot(self.A,self.Kmmi),self.q_u_expectation[1]) -0.5*self.Kmm - np.dot(self.psi1,self.VmT)
+        self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
 
     def log_likelihood(self):
         """
@@ -68,10 +70,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
         B = -0.5*self.beta*self.D*self.trace_K
-        C = -self.D *(self.Kmm_hld +0.5*np.sum(self.Lambda * self.mmT_S) + self.M/2.)
-        E = -0.5*self.beta*self.trYYT
-        F = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B+C+D+E+F
+        C = -0.5*self.D *(self.Kmm_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) + self.M/2.)
+        D = -0.5*self.beta*self.trYYT
+        E = np.sum(np.dot(self.V.T,self.projected_mean))
+        return A+B+C+D+E
 
     def dL_dbeta(self):
         """
@@ -80,18 +82,18 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         dA_dbeta =   0.5 * self.N*self.D/self.beta
         dB_dbeta = - 0.5 * self.D * self.trace_K
-        dC_dbeta = - 0.5 * self.D * #TODO
+        dC_dbeta = - 0.5 * self.D * 1.#TODO
         dD_dbeta = - 0.5 * self.trYYT
 
-        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
+        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
 
     def _raw_predict(self, Xnew, slices):
         """Internal helper function for making predictions, does not account for normalisation"""
-        Kx = self.kern.cross_compute(Xnew)
-        Kxx = self.kern.compute_new(Xnew)
-        mu = mdot(Kx.T,self.Kmmi,self.mu)
+        Kx = self.kern.K(Xnew,self.Z)
+        Kxx = self.kern.K(Xnew)
+        mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
         tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
-        var = Kxx - mdot(Kx.T,tmp,Kx) + np.eye(Xnew.shape[0])/self.beta
+        var = Kxx - mdot(Kx,tmp,Kx.T) + np.eye(Xnew.shape[0])/self.beta
         return mu,var
 
 
@@ -100,7 +102,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.q_u_prec = -2.*vb_param[self.M*self.D:].reshape(self.M,self.M)
         self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
         self.q_u_logdet = -tmp
-        self.q_u_mean = -2.*np.dot(self.q_u_cov,vb_param[:self.M*self.D].reshape(self.M,self.D))
+        self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.D].reshape(self.M,self.D))
 
         self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov)
 
@@ -127,4 +129,6 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         add the distribution q(u) to the plot from sparse_GP_regression
         """
         sparse_GP_regression.plot(self,*args,**kwargs)
-        #TODO: plot the q(u) dist.
+        if self.Q==1:
+            pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
+

From e6d08348c7e45e39ac53ada94fd9c8a24985d264 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 7 Dec 2012 17:18:52 -0800
Subject: [PATCH 05/33] minor perfomance tweak for GP regression

---
 GPy/models/GP_regression.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index f21c4591..dcc95baf 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -83,9 +83,8 @@ class GP_regression(model):
         """
         Computes the model fit using Youter if it's available
         """
-
         if self.Youter is None:
-            return -0.5*np.trace(mdot(self.Y.T,self.Ki,self.Y))
+            return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
         else:
             return -0.5*np.sum(np.multiply(self.Ki, self.Youter))
 

From ab56f3f9b7aef5db19f042b3f47fbfc510c7f8b3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 7 Dec 2012 23:03:53 -0800
Subject: [PATCH 06/33] small tweak to the gradients in sparse GP

---
 GPy/examples/uncertain_input_GP_regression_demo.py | 8 ++------
 GPy/models/sparse_GPLVM.py                         | 1 +
 GPy/models/sparse_GP_regression.py                 | 6 ++----
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/GPy/examples/uncertain_input_GP_regression_demo.py b/GPy/examples/uncertain_input_GP_regression_demo.py
index 3d2c51f0..f0be5fe2 100644
--- a/GPy/examples/uncertain_input_GP_regression_demo.py
+++ b/GPy/examples/uncertain_input_GP_regression_demo.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import pylab as pb
 import numpy as np
 import GPy
@@ -9,18 +8,15 @@ pb.ion()
 pb.close('all')
 
 
-######################################
-## 1 dimensional example
-
 # sample inputs and outputs
 S = np.ones((20,1))
 X = np.random.uniform(-3.,3.,(20,1))
 Y = np.sin(X)+np.random.randn(20,1)*0.05
 
-k = GPy.kern.bias(1) + GPy.kern.white(1)
+k = GPy.kern.rbf(1) + GPy.kern.white(1)
 
 # create simple GP model
-m = GPy.models.uncertain_input_GP_regression(X,Y,S,kernel=k)
+m = GPy.models.sparse_GP_regression(X,Y,X_uncertainty=S,kernel=k)
 
 # contrain all parameters to be positive
 m.constrain_positive('(variance|prec)')
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index 96c3e530..c5125c85 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -54,5 +54,6 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
 
     def plot(self):
         GPLVM.plot(self)
+        #passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
         mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
         pb.plot(mu[:, 0] , mu[:, 1], 'ko')
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index da19d80a..f34819dc 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
 import pylab as pb
 from ..util.linalg import mdot, jitchol, chol_inv, pdinv
@@ -50,7 +49,7 @@ class sparse_GP_regression(GP_regression):
             self.has_uncertain_inputs=False
         else:
             assert X_uncertainty.shape==X.shape
-            self.has_uncertain_inputs=False
+            self.has_uncertain_inputs=True
             self.X_uncertainty = X_uncertainty
 
         GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
@@ -99,8 +98,7 @@ class sparse_GP_regression(GP_regression):
 
         # Compute dL_dpsi
         self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        dC_dpsi1 = (self.LLambdai.T[:,:, None, None] * self.V)
-        self.dL_dpsi1 = (dC_dpsi1*self.C[None,:,None,:]).sum(1).sum(-1)
+        self.dL_dpsi1 = (self.LLambdai.T[:,:,None,None]*self.V*self.C[None,:,None,:]).sum(1).sum(-1)
         self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
 
         # Compute dL_dKmm

From 5f2c2a04f812d0b5cc05b9b932bdd51c94a5b5df Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sat, 8 Dec 2012 15:15:31 -0800
Subject: [PATCH 07/33] chenged a little of the notation in the sparse GP

This should allow easier implementation of het. noise
---
 GPy/models/sparse_GP_regression.py  | 15 +++++++--------
 GPy/models/uncollapsed_sparse_GP.py | 10 +++++-----
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index f34819dc..0f0b0569 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -87,11 +87,11 @@ class sparse_GP_regression(GP_regression):
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.A = mdot(self.Lmi, self.psi2, self.Lmi.T)
-        self.B = np.eye(self.M) + self.beta * self.A
+        self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
+        self.B = np.eye(self.M) + self.A
         self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
         self.LLambdai = np.dot(self.LBi, self.Lmi)
-        self.trace_K = self.psi0 - np.trace(self.A)
+        self.trace_K = self.psi0 - np.trace(self.A)/self.beta
         self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
         self.C = mdot(self.LLambdai, self.psi1V)
         self.G =  mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
@@ -102,7 +102,7 @@ class sparse_GP_regression(GP_regression):
         self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
 
         # Compute dL_dKmm
-        self.dL_dKmm = -0.5 * self.beta * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
+        self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
         self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
         self.dL_dKmm +=  np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
 
@@ -126,15 +126,14 @@ class sparse_GP_regression(GP_regression):
     def dL_dbeta(self):
         """
         Compute the gradient of the log likelihood wrt beta.
-        TODO: suport heteroscedatic noise
         """
-
+        #TODO: suport heteroscedatic noise
         dA_dbeta =   0.5 * self.N*self.D/self.beta
         dB_dbeta = - 0.5 * self.D * self.trace_K
-        dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)
+        dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
         dD_dbeta = - 0.5 * self.trYYT
         tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
-        dE_dbeta = np.sum(np.square(self.C))/self.beta - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T))
+        dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
 
         return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
 
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 8761aac4..15f52612 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -39,7 +39,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
                 self.M = Z.shape[0]
             else:
                 self.M = M
-            q_u = np.hstack((np.ones(self.M*self.D),-0.5*np.eye(self.M).flatten()))
+            q_u = np.hstack((np.zeros(self.M*self.D),-0.5*np.eye(self.M).flatten()))
         self.set_vb_param(q_u)
         sparse_GP_regression.__init__(self, X, Y, M=M,*args, **kwargs)
 
@@ -49,8 +49,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.A = self.beta * mdot(self.Lmi, self.psi2, self.Lmi.T)
-        self.B = np.eye(self.M) * self.A
+        self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
+        self.B = np.eye(self.M) + self.A
         self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
         self.trace_K = self.psi0 - np.trace(self.A)/self.beta
         self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
@@ -70,10 +70,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
         B = -0.5*self.beta*self.D*self.trace_K
-        C = -0.5*self.D *(self.Kmm_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) + self.M/2.)
+        C = -0.5*self.D *(self.Kmm_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M*self.D)
         D = -0.5*self.beta*self.trYYT
         E = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B+C+D+E
+        return A+B#+C+D+E
 
     def dL_dbeta(self):
         """

From ef26888b78a944e330ae64395af2ab40bfafc6cc Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sat, 8 Dec 2012 15:29:17 -0800
Subject: [PATCH 08/33] dL_dbeta now works in the uncollapsed sparse GP

---
 GPy/models/uncollapsed_sparse_GP.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 15f52612..66bb332f 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -61,7 +61,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.dL_dpsi2 = - 0.5 * self.beta * (-self.D*self.Kmmi + mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
-        tmp =  np.dot(0.5*np.eye(self.M) + np.dot(self.A,self.Kmmi),self.q_u_expectation[1]) -0.5*self.Kmm - np.dot(self.psi1,self.VmT)
+        tmp =  np.dot(self.A,self.Kmmi)
+        tmp =  0.5*np.dot(np.eye(self.M) + tmp + tmp.T, self.q_u_expectation[1]) -0.5*self.Kmm - np.dot(self.psi1,self.VmT)
         self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
 
     def log_likelihood(self):
@@ -70,10 +71,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
         B = -0.5*self.beta*self.D*self.trace_K
-        C = -0.5*self.D *(self.Kmm_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M*self.D)
+        C = -0.5*self.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M*self.D)
         D = -0.5*self.beta*self.trYYT
         E = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B#+C+D+E
+        return A+B+C+D+E
 
     def dL_dbeta(self):
         """
@@ -82,8 +83,9 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         dA_dbeta =   0.5 * self.N*self.D/self.beta
         dB_dbeta = - 0.5 * self.D * self.trace_K
-        dC_dbeta = - 0.5 * self.D * 1.#TODO
+        dC_dbeta = - 0.5 * self.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
         dD_dbeta = - 0.5 * self.trYYT
+        dE_dbeta = np.sum(np.dot(self.Y.T,self.projected_mean))
 
         return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
 

From 053ebbe2d4666e1575fe7153c4fe75a55db47a0c Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 9 Dec 2012 14:41:36 -0800
Subject: [PATCH 09/33] t of the gradients are now working in the uncollapsed
 sparse GP: one term still to do.

---
 GPy/models/uncollapsed_sparse_GP.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 66bb332f..65b12f03 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -39,7 +39,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
                 self.M = Z.shape[0]
             else:
                 self.M = M
-            q_u = np.hstack((np.zeros(self.M*self.D),-0.5*np.eye(self.M).flatten()))
+            q_u = np.hstack((np.random.randn(self.M*self.D),-0.5*np.eye(self.M).flatten()))
         self.set_vb_param(q_u)
         sparse_GP_regression.__init__(self, X, Y, M=M,*args, **kwargs)
 
@@ -57,12 +57,12 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
         # Compute dL_dpsi
         self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T
-        self.dL_dpsi2 = - 0.5 * self.beta * (-self.D*self.Kmmi + mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
+        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T*0 # This is the correct term for E I think...
+        self.dL_dpsi2 = 0.5 * self.beta * self.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
-        tmp =  np.dot(self.A,self.Kmmi)
-        tmp =  0.5*np.dot(np.eye(self.M) + tmp + tmp.T, self.q_u_expectation[1]) -0.5*self.Kmm - np.dot(self.psi1,self.VmT)
+        tmp = -0.5*self.beta*self.D*self.psi2 -0.5*self.D*self.Kmm +0.5*self.D*(self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.q_u_expectation[1],self.Kmmi, self.psi2)
+        #tmp = - np.dot(self.q_u_expectation[0],self.psi1V.T)
         self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
 
     def log_likelihood(self):
@@ -71,10 +71,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
         B = -0.5*self.beta*self.D*self.trace_K
-        C = -0.5*self.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M*self.D)
+        C = -0.5*self.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
         D = -0.5*self.beta*self.trYYT
         E = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B+C+D+E
+        return A+B+C+D
 
     def dL_dbeta(self):
         """
@@ -87,7 +87,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         dD_dbeta = - 0.5 * self.trYYT
         dE_dbeta = np.sum(np.dot(self.Y.T,self.projected_mean))
 
-        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
+        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
 
     def _raw_predict(self, Xnew, slices):
         """Internal helper function for making predictions, does not account for normalisation"""

From 11dacb378afb9a3e4328e24eb79b9e2ed4f1d632 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 14 Dec 2012 13:57:29 +0000
Subject: [PATCH 10/33] added autodection of Rasmussen's minimize

---
 GPy/inference/optimization.py | 64 ++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 4cf56b69..2c5e2fba 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -3,7 +3,13 @@
 
 
 from scipy import optimize
-# import rasmussens_minimize as rasm
+
+try:
+    import rasmussens_minimize as rasm
+    rasm_available = True
+except ImportError:
+    rasm_available = False
+
 import pdb
 import pylab as pb
 import datetime as dt
@@ -38,7 +44,7 @@ class Optimizer():
         self.gtol = gtol
         self.ftol = ftol
         self.model = model
-        
+
     def run(self, **kwargs):
         start = dt.datetime.now()
         self.opt(**kwargs)
@@ -159,43 +165,45 @@ class opt_simplex(Optimizer):
         self.trace = None
 
 
-# class opt_rasm(Optimizer):
-#     def __init__(self, *args, **kwargs):
-#         Optimizer.__init__(self, *args, **kwargs)
-#         self.opt_name = "Rasmussen's Conjugate Gradient"
+class opt_rasm(Optimizer):
+    def __init__(self, *args, **kwargs):
+        Optimizer.__init__(self, *args, **kwargs)
+        self.opt_name = "Rasmussen's Conjugate Gradient"
 
-#     def opt(self):
-#         """
-#         Run Rasmussen's Conjugate Gradient optimizer
-#         """
+    def opt(self):
+        """
+        Run Rasmussen's Conjugate Gradient optimizer
+        """
 
-#         assert self.f_fp != None, "Rasmussen's minimizer requires f_fp"
-#         statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
-#                 'NaNs in optimization']
+        assert self.f_fp != None, "Rasmussen's minimizer requires f_fp"
+        statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
+                'NaNs in optimization']
 
-#         opt_dict = {}
-#         if self.xtol is not None:
-#             print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it"
-#         if self.ftol is not None:
-#             print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
-#         if self.gtol is not None:
-#             print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
+        opt_dict = {}
+        if self.xtol is not None:
+            print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it"
+        if self.ftol is not None:
+            print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
+        if self.gtol is not None:
+            print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
 
-#         opt_result = rasm.minimize(self.x_init, self.f_fp, (), messages = self.messages,
-#                 maxnumfuneval = self.max_f_eval)
-#         self.x_opt = opt_result[0]
-#         self.f_opt = opt_result[1][-1]
-#         self.funct_eval = opt_result[2]
-#         self.status = statuses[opt_result[3]]
+        opt_result = rasm.minimize(self.x_init, self.f_fp, (), messages = self.messages,
+                maxnumfuneval = self.max_f_eval)
+        self.x_opt = opt_result[0]
+        self.f_opt = opt_result[1][-1]
+        self.funct_eval = opt_result[2]
+        self.status = statuses[opt_result[3]]
 
-#         self.trace = opt_result[1]
+        self.trace = opt_result[1]
 
 def get_optimizer(f_min):
     optimizers = {'fmin_tnc': opt_tnc,
-          # 'rasmussen': opt_rasm,
           'simplex': opt_simplex,
           'lbfgsb': opt_lbfgsb}
 
+    if rasm_available:
+        optimizers['rasmussen'] = opt_rasm
+
     for opt_name in optimizers.keys():
         if opt_name.lower().find(f_min.lower()) != -1:
             return optimizers[opt_name]

From b4190f907e6ed0f0b7ea6dc7be3e8456cd8a1d99 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 14 Dec 2012 14:00:50 +0000
Subject: [PATCH 11/33] fixed interface change in optimization.py

---
 GPy/inference/optimization.py | 8 ++++----
 GPy/models/warped_GP.py       | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 2c5e2fba..9bc44fd4 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -170,12 +170,12 @@ class opt_rasm(Optimizer):
         Optimizer.__init__(self, *args, **kwargs)
         self.opt_name = "Rasmussen's Conjugate Gradient"
 
-    def opt(self):
+    def opt(self, f_fp = None, f = None, fp = None):
         """
         Run Rasmussen's Conjugate Gradient optimizer
         """
 
-        assert self.f_fp != None, "Rasmussen's minimizer requires f_fp"
+        assert f_fp != None, "Rasmussen's minimizer requires f_fp"
         statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
                 'NaNs in optimization']
 
@@ -187,8 +187,8 @@ class opt_rasm(Optimizer):
         if self.gtol is not None:
             print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
 
-        opt_result = rasm.minimize(self.x_init, self.f_fp, (), messages = self.messages,
-                maxnumfuneval = self.max_f_eval)
+        opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages,
+                                   maxnumfuneval = self.max_f_eval)
         self.x_opt = opt_result[0]
         self.f_opt = opt_result[1][-1]
         self.funct_eval = opt_result[2]
diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index 9a1bcbe1..bf5af21f 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -22,7 +22,7 @@ class warpedGP(GP_regression):
         if warping_function == None:
             self.warping_function = TanhWarpingFunction(warping_terms)
             # self.warping_params = np.random.randn(self.warping_function.n_terms, 3)
-            self.warping_params = np.ones((self.warping_function.n_terms, 3))*1.0 # TODO better init
+            self.warping_params = np.ones((self.warping_function.n_terms, 3))*0.0 # TODO better init
             self.warp_params_shape = (self.warping_function.n_terms, 3) # todo get this from the subclass
 
         self.Z = Y.copy()

From 35a7e6179dfab35a652881663088232cab4e9be5 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 16 Dec 2012 14:48:24 +0000
Subject: [PATCH 12/33] gradients now working in uncollapsed GP

---
 GPy/models/uncollapsed_sparse_GP.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 65b12f03..7c1cdb35 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -36,12 +36,12 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.D = Y.shape[1]
         if q_u is None:
             if 'Z' in kwargs.keys():
-                self.M = Z.shape[0]
+                self.M = kwargs['Z'].shape[0]
             else:
                 self.M = M
             q_u = np.hstack((np.random.randn(self.M*self.D),-0.5*np.eye(self.M).flatten()))
         self.set_vb_param(q_u)
-        sparse_GP_regression.__init__(self, X, Y, M=M,*args, **kwargs)
+        sparse_GP_regression.__init__(self, X, Y, M=self.M,*args, **kwargs)
 
     def _computations(self):
         self.V = self.beta*self.Y
@@ -57,12 +57,13 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
         # Compute dL_dpsi
         self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T*0 # This is the correct term for E I think...
+        self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
         self.dL_dpsi2 = 0.5 * self.beta * self.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
         tmp = -0.5*self.beta*self.D*self.psi2 -0.5*self.D*self.Kmm +0.5*self.D*(self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.q_u_expectation[1],self.Kmmi, self.psi2)
-        #tmp = - np.dot(self.q_u_expectation[0],self.psi1V.T)
+        tmptmp = - np.dot(self.q_u_expectation[0],self.psi1V.T)*0.5
+        tmp += tmptmp + tmptmp.T
         self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
 
     def log_likelihood(self):
@@ -74,7 +75,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         C = -0.5*self.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
         D = -0.5*self.beta*self.trYYT
         E = np.sum(np.dot(self.V.T,self.projected_mean))
-        return A+B+C+D
+        return A+B+C+D+E
 
     def dL_dbeta(self):
         """

From 7a34e1c446a95ffd07d0a77c9c28730f2194d495 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 16 Dec 2012 17:16:03 +0000
Subject: [PATCH 13/33] some natural gradients of the uncollapsed GP
 implemented

---
 GPy/models/uncollapsed_sparse_GP.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 7c1cdb35..f0193a31 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -125,7 +125,14 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
         Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
         """
-        foobar #TODO
+        dL_dmmT_S = -0.5*self.Lambda+self.q_u_canonical[1]
+        dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
+
+        #dL_dSim = 
+        #dL_dmhSi = 
+
+        return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten()))  # natgrad only, grad TODO
+
 
     def plot(self, *args, **kwargs):
         """

From 43af1c174d43ab9d93826aa56c38118c704f570b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 16 Dec 2012 17:31:51 +0000
Subject: [PATCH 14/33] some simplification of the gradient expressions

---
 GPy/models/uncollapsed_sparse_GP.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index f0193a31..fb728e7b 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -61,8 +61,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.dL_dpsi2 = 0.5 * self.beta * self.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
-        tmp = -0.5*self.beta*self.D*self.psi2 -0.5*self.D*self.Kmm +0.5*self.D*(self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) + 0.5*self.beta*mdot(self.q_u_expectation[1],self.Kmmi, self.psi2)
-        tmptmp = - np.dot(self.q_u_expectation[0],self.psi1V.T)*0.5
+        tmp = 0.5*self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1])
+        tmp += tmp.T
+        tmp += 0.5*self.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
+        tmptmp = - 0.5*np.dot(self.q_u_expectation[0],self.psi1V.T)
         tmp += tmptmp + tmptmp.T
         self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
 

From b56de4762e3be08f356a921b352b521e1751190b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Sun, 16 Dec 2012 19:03:08 +0000
Subject: [PATCH 15/33] some gradient tidying and a small correction in the
 natural gradients

---
 GPy/models/uncollapsed_sparse_GP.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index fb728e7b..3f645ab4 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -61,12 +61,10 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.dL_dpsi2 = 0.5 * self.beta * self.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
 
         # Compute dL_dKmm
-        tmp = 0.5*self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1])
+        tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
         tmp += tmp.T
-        tmp += 0.5*self.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
-        tmptmp = - 0.5*np.dot(self.q_u_expectation[0],self.psi1V.T)
-        tmp += tmptmp + tmptmp.T
-        self.dL_dKmm = mdot(self.Kmmi,tmp,self.Kmmi)
+        tmp += self.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
+        self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
 
     def log_likelihood(self):
         """
@@ -127,7 +125,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
         Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
         """
-        dL_dmmT_S = -0.5*self.Lambda+self.q_u_canonical[1]
+        dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
         dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
 
         #dL_dSim = 

From 539b80e51556057c8649a9bead6d494c95bc35d1 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 10:01:47 +0000
Subject: [PATCH 16/33] tidied upt he kwargs in sympykern

---
 GPy/kern/sympykern.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index 525fadeb..64c31111 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -54,6 +54,18 @@ class spkern(kernpart):
         #self.compute_psi_stats()
         self._gen_code()
 
+        self.weave_kwargs = {\
+            'support_code':self._function_code,\
+            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'symbolic/')],\
+            'headers':['"sympy_helpers.h"'],\
+            'sources':[os.path.join(current_dir,"symbolic/sympy_helpers.cpp")],\
+            #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
+            'extra_compile_args':[],\
+            'extra_link_args':['-lgomp'],\
+            'verbose':True}
+
+
+
     def __add__(self,other):
         return spkern(self._sp_k+other._sp_k)
 
@@ -221,42 +233,42 @@ class spkern(kernpart):
 
     def K(self,X,Z,target):
         param = self._param
-        weave.inline(self._K_code,arg_names=['target','X','Z','param'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(),os.path.join(current_dir,'kern/') ],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
         return target
 
     def Kdiag(self,X,target):
         param = self._param
-        weave.inline(self._Kdiag_code,arg_names=['target','X','param'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
         return target
 
     def dK_dtheta(self,partial,X,Z,target):
         param = self._param
-        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         return target
 
     def dKdiag_dtheta(self,partial,X,target):
         param = self._param
         Z = X
-        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")])
+        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         return target
 
     def dK_dX(self,partial,X,Z,target):
         target = np.zeros_like(X)
         param = self._param
-        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         return target
 
     #def dK_dZ(self,X,Z,partial=None):
         ##TODO: this function might not be necessary
         #target = np.zeros_like(Z)
         #param = self._param
-        #weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"','<omp.h>'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")],extra_compile_args=['-fopenmp'],extra_link_args=['-lgomp'])
+        #weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         #return target
 
     def dKdiag_dX(self,partial,X,target):
         param = self._param
         Z = X
-        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],support_code=self._function_code,include_dirs=[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],headers=['"sympy_helpers.h"'],sources=[os.path.join(current_dir,"kern/sympy_helpers.cpp")])
+        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         return target
 
     def set_param(self,param):

From abde9fd22d79d3c2093fb020174f150da398c704 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 10:02:45 +0000
Subject: [PATCH 17/33] added iterate.dat to gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 73431343..60866848 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,6 @@ nosetests.xml
 
 #vim
 *.swp
+
+#bfgs optimiser leaves this lying around
+iterate.dat

From 7ae0c163c199acb6a2a465ba73a9edea8226d49f Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 11:04:42 +0000
Subject: [PATCH 18/33] added Alan's bugfix to this version of GPy:

sympykern is now forced to recompile if the function changes.

Also re-enabled openmp loops, since I only diabled them for bugfinding
---
 GPy/kern/sympykern.py | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index 64c31111..4d912dc8 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -56,16 +56,14 @@ class spkern(kernpart):
 
         self.weave_kwargs = {\
             'support_code':self._function_code,\
-            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'symbolic/')],\
+            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
             'headers':['"sympy_helpers.h"'],\
-            'sources':[os.path.join(current_dir,"symbolic/sympy_helpers.cpp")],\
+            'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
             #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
             'extra_compile_args':[],\
             'extra_link_args':['-lgomp'],\
             'verbose':True}
 
-
-
     def __add__(self,other):
         return spkern(self._sp_k+other._sp_k)
 
@@ -126,13 +124,14 @@ class spkern(kernpart):
         int N = target_array->dimensions[0];
         int M = target_array->dimensions[1];
         int D = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
+        #pragma omp parallel for private(j)
         for (i=0;i<N;i++){
             for (j=0;j<M;j++){
                 target[i*M+j] = k(%s);
             }
         }
-        """%(arglist)
+        %s
+        """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         diag_arglist = re.sub('Z','X',arglist)
         diag_arglist = re.sub('j','i',diag_arglist)
@@ -142,11 +141,12 @@ class spkern(kernpart):
         int i;
         int N = target_array->dimensions[0];
         int D = X_array->dimensions[1];
-        //#pragma omp parallel for
+        #pragma omp parallel for
         for (i=0;i<N;i++){
                 target[i] = k(%s);
         }
-        """%diag_arglist
+        %s
+        """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         #here's some code to compute gradients
         funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*M+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
@@ -157,13 +157,14 @@ class spkern(kernpart):
         int N = partial_array->dimensions[0];
         int M = partial_array->dimensions[1];
         int D = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
+        #pragma omp parallel for private(j)
         for (i=0;i<N;i++){
             for (j=0;j<M;j++){
 %s
             }
         }
-        """%funclist
+        %s
+        """%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         #here's some code to compute gradients for Kdiag TODO: thius is yucky.
         diag_funclist = re.sub('Z','X',funclist,count=0)
@@ -177,7 +178,8 @@ class spkern(kernpart):
         for (i=0;i<N;i++){
                 %s
         }
-        """%diag_funclist
+        %s
+        """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         #Here's some code to do gradients wrt x
         gradient_funcs = "\n".join(["target[i*D+%i] += partial[i*M+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.D)])
@@ -188,13 +190,14 @@ class spkern(kernpart):
         int N = partial_array->dimensions[0];
         int M = partial_array->dimensions[1];
         int D = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
+        #pragma omp parallel for private(j)
         for (i=0;i<N; i++){
             for (j=0; j<M; j++){
                 %s
             }
         }
-        """%gradient_funcs
+        %s
+        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         #Here's some code to do gradients wrt z (should be the same as for X, but this is easier
         gradient_funcs_Z = "\n".join(["target[j*D+%i] += partial[i*M+j]*dk_dz%i(%s);"%(q,q,arglist) for q in range(self.D)])
@@ -210,8 +213,8 @@ class spkern(kernpart):
                 %s
             }
         }
-        """%gradient_funcs_Z
-
+        %s
+        """%(gradient_funcs_Z,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
         #now for gradients of Kdiag wrt X
         self._dKdiag_dX_code= \
@@ -225,7 +228,8 @@ class spkern(kernpart):
             j = i;
             %s
         }
-        """%gradient_funcs
+        %s
+        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
 
         #TODO: insert multiple functions here via string manipulation

From aa4e1cc9c1dd594b12d4357ce6303f00e538f278 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 11:45:25 +0000
Subject: [PATCH 19/33] removed dL_dZ from sympykern

(it's not needed, we can always use dK_dX)
---
 GPy/kern/sympykern.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index 4d912dc8..4ac123ec 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -199,23 +199,6 @@ class spkern(kernpart):
         %s
         """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
-        #Here's some code to do gradients wrt z (should be the same as for X, but this is easier
-        gradient_funcs_Z = "\n".join(["target[j*D+%i] += partial[i*M+j]*dk_dz%i(%s);"%(q,q,arglist) for q in range(self.D)])
-        self._dK_dZ_code = \
-        """
-        int i;
-        int j;
-        int N = partial_array->dimensions[0];
-        int M = partial_array->dimensions[1];
-        int D = X_array->dimensions[1];
-        for (i=0;i<N; i++){
-            for (j=0; j<M; j++){
-                %s
-            }
-        }
-        %s
-        """%(gradient_funcs_Z,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
         #now for gradients of Kdiag wrt X
         self._dKdiag_dX_code= \
         """
@@ -262,13 +245,6 @@ class spkern(kernpart):
         weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
         return target
 
-    #def dK_dZ(self,X,Z,partial=None):
-        ##TODO: this function might not be necessary
-        #target = np.zeros_like(Z)
-        #param = self._param
-        #weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
-        #return target
-
     def dKdiag_dX(self,partial,X,target):
         param = self._param
         Z = X

From 7f8310216cdd7aed3f5458295cca25741d06fc54 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 11:47:54 +0000
Subject: [PATCH 20/33] made sympykern truly work in place

---
 GPy/kern/sympykern.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index 4ac123ec..4e58f52a 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -221,35 +221,28 @@ class spkern(kernpart):
     def K(self,X,Z,target):
         param = self._param
         weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
-        return target
 
     def Kdiag(self,X,target):
         param = self._param
         weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
-        return target
 
     def dK_dtheta(self,partial,X,Z,target):
         param = self._param
         weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
-        return target
 
     def dKdiag_dtheta(self,partial,X,target):
         param = self._param
         Z = X
         weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
-        return target
 
     def dK_dX(self,partial,X,Z,target):
-        target = np.zeros_like(X)
         param = self._param
         weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
-        return target
 
     def dKdiag_dX(self,partial,X,target):
         param = self._param
         Z = X
         weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
-        return target
 
     def set_param(self,param):
         #print param.flags['C_CONTIGUOUS']

From 28edb59e8ba4e9232308e47d5388b860f5d81c24 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Dec 2012 11:52:15 +0000
Subject: [PATCH 21/33]  added demo for uncollapsed GP

---
 GPy/examples/uncollapsed_GP_demo.py | 32 +++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 GPy/examples/uncollapsed_GP_demo.py

diff --git a/GPy/examples/uncollapsed_GP_demo.py b/GPy/examples/uncollapsed_GP_demo.py
new file mode 100644
index 00000000..5dc1ae1d
--- /dev/null
+++ b/GPy/examples/uncollapsed_GP_demo.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+"""
+Sparse Gaussian Processes regression with an RBF kernel, 
+using the uncollapsed sparse GP (where the distribution of the 
+inducing points is explicitley represented)
+"""
+import pylab as pb
+import numpy as np
+import GPy
+np.random.seed(2)
+pb.ion()
+N = 500
+M = 20
+
+# sample inputs and outputs
+X = np.random.uniform(-3.,3.,(N,1))
+Y = np.sin(X)+np.random.randn(N,1)*0.05
+
+kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
+
+# create simple GP model
+m = GPy.models.uncollapsed_sparse_GP(X, Y, kernel=kernel, M=M)#, X_uncertainty=np.zeros_like(X)+0.01)
+
+# contrain all parameters to be positive
+m.ensure_default_constraints()
+m.checkgrad()
+# optimize and plot
+m.plot()

From e29dc1c9dd2dc5ff8c602e2f98f6497cf612e7dd Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 18 Dec 2012 15:45:52 +0000
Subject: [PATCH 22/33] added a constructor for a generic sympy kernel

---
 GPy/kern/__init__.py                | 2 +-
 GPy/kern/constructors.py            | 6 ++++++
 GPy/models/uncollapsed_sparse_GP.py | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index ead4d0fc..cd893bac 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy, sympykern
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 7bbac967..0dd579cc 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -188,3 +188,9 @@ def rbf_sympy(D,variance=1., lengthscale=1.):
     dist = parse_expr(dist_string)
     f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
     return kern(D,[spkern(D,f,np.array([variance,lengthscale]))])
+
+def sympykern(D,k):
+    """
+    A kernel from a symbolic sympy representation
+    """
+    return kern(D,[spkern(D,k)])
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 3f645ab4..c345d1c5 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -140,5 +140,5 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         """
         sparse_GP_regression.plot(self,*args,**kwargs)
         if self.Q==1:
-            pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
+            pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
 

From cc3ccf1ddc1bd58411d8bf2c5381f12f9a4c9cac Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 18 Dec 2012 15:51:39 +0000
Subject: [PATCH 23/33] reduced the memory requirements of the sparse GP

by a factor of M!
---
 GPy/models/sparse_GP_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 0f0b0569..b7280195 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -98,7 +98,7 @@ class sparse_GP_regression(GP_regression):
 
         # Compute dL_dpsi
         self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = (self.LLambdai.T[:,:,None,None]*self.V*self.C[None,:,None,:]).sum(1).sum(-1)
+        self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
         self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
 
         # Compute dL_dKmm

From e0384902ff35d43ce0fe2b618120efae1597c7ff Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 21 Dec 2012 11:40:22 +0000
Subject: [PATCH 24/33] added an ARD option to the sympy RBF kern

---
 GPy/kern/constructors.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 0dd579cc..0ddc09e3 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -176,18 +176,24 @@ import sympy as sp
 from sympykern import spkern
 from sympy.parsing.sympy_parser import parse_expr
 
-def rbf_sympy(D,variance=1., lengthscale=1.):
+def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
     """
     Radial Basis Function covariance.
     """
     X = [sp.var('x%i'%i) for i in range(D)]
     Z = [sp.var('z%i'%i) for i in range(D)]
     rbf_variance = sp.var('rbf_variance',positive=True)
-    rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-    dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-    dist = parse_expr(dist_string)
-    f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-    return kern(D,[spkern(D,f,np.array([variance,lengthscale]))])
+    if ARD:
+        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/2.)
+    else:
+        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+    return kern(D,[spkern(D,f)])
 
 def sympykern(D,k):
     """

From dfdd47a0a28b1111d25bb5797ff979197345a915 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 21 Dec 2012 11:41:08 +0000
Subject: [PATCH 25/33] Changed the behaviour of checkgrad.

verbose now works as (I) expected. discussion welcome
---
 GPy/core/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 46164f14..5d977378 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -286,7 +286,7 @@ class model(parameterised):
         return '\n'.join(s)
 
 
-    def checkgrad(self, verbose = True, include_priors=False, step=1e-6, tolerance = 1e-3, *args):
+    def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, *args):
         """
         Check the gradient of the model by comparing to a numerical estimate.
         If the overall gradient fails, invividual components are tested.
@@ -313,12 +313,12 @@ class model(parameterised):
             print " Gradient ratio = ", ratio, '\n'
             sys.stdout.flush()
 
-        if not (np.abs(1.-ratio)>tolerance):
+        if (np.abs(1.-ratio)<tolerance) and not np.isnan(ratio):
             if verbose:
                 print 'Gradcheck passed'
         else:
             if verbose:
-                print "Global ratio far from unity. Testing individual gradients\n"
+                print "Global check failed. Testing individual gradients\n"
             try:
                 names = self.extract_param_names()
             except NotImplementedError:

From 2999c6d2d60c8d5826ddf4bf521f28e5b20e2cc2 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 21 Dec 2012 11:41:42 +0000
Subject: [PATCH 26/33] forced simplification of sympy expressions before
 converting to c++

---
 GPy/kern/sympykern.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index 4e58f52a..d9f89f5b 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -47,8 +47,8 @@ class spkern(kernpart):
         self.set_param(param)
 
         #Differentiate!
-        self._sp_dk_dtheta = [sp.diff(k,theta) for theta in self._sp_theta]
-        self._sp_dk_dx = [sp.diff(k,xi) for xi in self._sp_x]
+        self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
+        self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
         #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
 
         #self.compute_psi_stats()
@@ -124,7 +124,7 @@ class spkern(kernpart):
         int N = target_array->dimensions[0];
         int M = target_array->dimensions[1];
         int D = X_array->dimensions[1];
-        #pragma omp parallel for private(j)
+        //#pragma omp parallel for private(j)
         for (i=0;i<N;i++){
             for (j=0;j<M;j++){
                 target[i*M+j] = k(%s);
@@ -141,7 +141,7 @@ class spkern(kernpart):
         int i;
         int N = target_array->dimensions[0];
         int D = X_array->dimensions[1];
-        #pragma omp parallel for
+        //#pragma omp parallel for
         for (i=0;i<N;i++){
                 target[i] = k(%s);
         }
@@ -157,7 +157,7 @@ class spkern(kernpart):
         int N = partial_array->dimensions[0];
         int M = partial_array->dimensions[1];
         int D = X_array->dimensions[1];
-        #pragma omp parallel for private(j)
+        //#pragma omp parallel for private(j)
         for (i=0;i<N;i++){
             for (j=0;j<M;j++){
 %s
@@ -190,10 +190,13 @@ class spkern(kernpart):
         int N = partial_array->dimensions[0];
         int M = partial_array->dimensions[1];
         int D = X_array->dimensions[1];
-        #pragma omp parallel for private(j)
+        //#pragma omp parallel for private(j)
         for (i=0;i<N; i++){
             for (j=0; j<M; j++){
                 %s
+                //if(isnan(target[i*D+2])){printf("%%f\\n",dk_dx2(X[i*D+0], X[i*D+1], X[i*D+2], Z[j*D+0], Z[j*D+1], Z[j*D+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
+                //if(isnan(target[i*D+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*D+2], Z[j*D+2],i,j);}
+
             }
         }
         %s

From aae006411a39dc59fa8f71cf16696b166259fbc6 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 21 Dec 2012 11:42:39 +0000
Subject: [PATCH 27/33] GP_regression and sparse_GP_regression now only return
 the full posterior covariance matrix when requested.

---
 GPy/models/GP_regression.py        | 37 ++++++++++++++++++++++--------
 GPy/models/sparse_GP_regression.py | 13 +++++++----
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index dcc95baf..ee2bb448 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -47,7 +47,9 @@ class GP_regression(model):
         if normalize_X:
             self._Xmean = X.mean(0)[None,:]
             self._Xstd = X.std(0)[None,:]
-            self.X = (X.copy()- self._Xmean) / self._Xstd
+            self.X = (X.copy() - self._Xmean) / self._Xstd
+            if hasattr(self,'Z'):
+                self.Z = (self.Z - self._Xmean) / self._Xstd
         else:
             self._Xmean = np.zeros((1,self.X.shape[1]))
             self._Xstd = np.ones((1,self.X.shape[1]))
@@ -104,7 +106,7 @@ class GP_regression(model):
     def log_likelihood_gradients(self):
         return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
 
-    def predict(self,Xnew, slices=None):
+    def predict(self,Xnew, slices=None, full_cov=False):
         """
 
         Predict the function(s) at the new point(s) Xnew.
@@ -115,6 +117,8 @@ class GP_regression(model):
         :type Xnew: np.ndarray, Nnew x self.Q
         :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
         :type slices: (None, list of slice objects, list of ints)
+        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
+        :type full_cov: bool
         :rtype: posterior mean,  a Numpy array, Nnew x self.D
         :rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
 
@@ -124,29 +128,42 @@ class GP_regression(model):
              - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
              - If a list of booleans, specifying which kernel parts are active
 
-           If self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
+           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
            This is to allow for different normalisations of the output dimensions.
 
+
         """
 
         #normalise X values
         Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var = self._raw_predict(Xnew,slices)
+        mu, var = self._raw_predict(Xnew, slices, full_cov)
 
         #un-normalise
         mu = mu*self._Ystd + self._Ymean
-        if self.D==1:
-            var *= np.square(self._Ystd)
+        if full_cov:
+            if self.D==1:
+                var *= np.square(self._Ystd)
+            else:
+                var = var[:,:,None] * np.square(self._Ystd)
         else:
-            var = var[:,:,None] * np.square(self._Ystd)
+            if self.D==1:
+                var *= np.square(np.squeeze(self._Ystd))
+            else:
+                var = var[:,None] * np.square(self._Ystd)
+
         return mu,var
 
-    def _raw_predict(self,_Xnew,slices):
+    def _raw_predict(self,_Xnew,slices, full_cov=False):
         """Internal helper function for making predictions, does not account for normalisation"""
         Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
         mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
-        var = Kxx - np.dot(np.dot(Kx.T,self.Ki),Kx)
+        KiKx = np.dot(self.Ki,Kx)
+        if full_cov:
+            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
+            var = Kxx - np.dot(KiKx.T,Kx)
+        else:
+            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
+            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
         return mu, var
 
     def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index b7280195..5f64c0ec 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -171,14 +171,19 @@ class sparse_GP_regression(GP_regression):
     def log_likelihood_gradients(self):
         return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
 
-    def _raw_predict(self, Xnew, slices):
+    def _raw_predict(self, Xnew, slices, full_cov=False):
         """Internal helper function for making predictions, does not account for normalisation"""
 
         Kx = self.kern.K(self.Z, Xnew)
-        Kxx = self.kern.K(Xnew)
-
         mu = mdot(Kx.T, self.LBL_inv, self.psi1V)
-        var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
+
+        if full_cov:
+            Kxx = self.kern.K(Xnew)
+            var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
+        else:
+            Kxx = self.kern.Kdiag(Xnew)
+            var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
+
         return mu,var
 
     def plot(self, *args, **kwargs):

From 3070f0b6c538f273c503c3bcdf86fcd26761e871 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 21 Dec 2012 11:43:15 +0000
Subject: [PATCH 28/33] removed some redundant looping in kern.py

---
 GPy/kern/kern.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 3830d844..3ba7d97b 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -182,7 +182,7 @@ class kern(parameterised):
             X2 = X
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros_like(X)
-        [p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p,i_s,ps,s1,s2 in zip(self.parts,self.input_slices, self.param_slices,slices1,slices2)]
+        [p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
         return target
 
     def Kdiag(self,X,slices=None):

From 57934d82a66b10e22821a489b12acbff340cf172 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 9 Jan 2013 15:45:39 +0000
Subject: [PATCH 29/33] sphinx configuratino for readthedocs.org

---
 GPy/models/sparse_GP_regression.py  |   2 +-
 GPy/models/uncollapsed_sparse_GP.py |  14 +-
 doc/Makefile                        | 153 ++++++++++++++++++
 doc/conf.py                         | 242 ++++++++++++++++++++++++++++
 doc/index.rst                       |  22 +++
 5 files changed, 429 insertions(+), 4 deletions(-)
 create mode 100644 doc/Makefile
 create mode 100644 doc/conf.py
 create mode 100644 doc/index.rst

diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 5f64c0ec..fe5f7cc1 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -44,7 +44,7 @@ class sparse_GP_regression(GP_regression):
         else:
             assert Z.shape[1]==X.shape[1]
             self.Z = Z
-            self.M = Z.shape[1]
+            self.M = Z.shape[0]
         if X_uncertainty is None:
             self.has_uncertain_inputs=False
         else:
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index c345d1c5..3bb72e60 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -36,7 +36,9 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         self.D = Y.shape[1]
         if q_u is None:
             if 'Z' in kwargs.keys():
+                print kwargs['Z']
                 self.M = kwargs['Z'].shape[0]
+                print self.M
             else:
                 self.M = M
             q_u = np.hstack((np.random.randn(self.M*self.D),-0.5*np.eye(self.M).flatten()))
@@ -90,13 +92,18 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
 
         return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
 
-    def _raw_predict(self, Xnew, slices):
+    def _raw_predict(self, Xnew, slices,full_cov=False):
         """Internal helper function for making predictions, does not account for normalisation"""
         Kx = self.kern.K(Xnew,self.Z)
-        Kxx = self.kern.K(Xnew)
         mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
+
         tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
-        var = Kxx - mdot(Kx,tmp,Kx.T) + np.eye(Xnew.shape[0])/self.beta
+        if full_cov:
+            Kxx = self.kern.K(Xnew)
+            var = Kxx - mdot(Kx,tmp,Kx.T) + np.eye(Xnew.shape[0])/self.beta
+        else:
+            Kxx = self.kern.Kdiag(Xnew)
+            var = Kxx - np.sum(Kx*np.dot(Kx,tmp),1) + 1./self.beta
         return mu,var
 
 
@@ -126,6 +133,7 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
         """
         dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
+        #dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
         dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
 
         #dL_dSim = 
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 00000000..faa4ed65
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,153 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GPy.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GPy.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/GPy"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GPy"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 00000000..7e1ec813
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+#
+# GPy documentation build configuration file, created by
+# sphinx-quickstart on Wed Jan  9 15:21:20 2013.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.pngmath', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'GPy'
+copyright = u'2013, The GPy authors'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.00001'
+# The full version, including alpha/beta/rc tags.
+release = '0.00001'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'GPydoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'GPy.tex', u'GPy Documentation',
+   u'The GPy authors', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'gpy', u'GPy Documentation',
+     [u'The GPy authors'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'GPy', u'GPy Documentation',
+   u'The GPy authors', 'GPy', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 00000000..46327bb7
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,22 @@
+.. GPy documentation master file, created by
+   sphinx-quickstart on Wed Jan  9 15:21:20 2013.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to GPy's documentation!
+===============================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+

From 8d3fa46089b78724f7aecc46f1a04ed74317ffcf Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 10 Jan 2013 16:24:58 +0000
Subject: [PATCH 30/33] Fixed a NF induced bug in the guts of GPy

---
 GPy/core/parameterised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index 3894ea6a..da0b6056 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -215,7 +215,7 @@ class parameterised(object):
         else:
             self.constrained_fixed_values.append(self.get_param()[self.constrained_fixed_indices[-1]])
 
-        self.constrained_fixed_values.append(value)
+        #self.constrained_fixed_values.append(value)
         self.expand_param(self.extract_param())
 
     def extract_param(self):

From 7a69b4c2a27dd1bdb0c941b796fe31b1dfb79eb0 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Thu, 10 Jan 2013 16:35:52 +0000
Subject: [PATCH 31/33] removed unused posix import

---
 GPy/util/datasets.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 3379ccd3..bc7bf546 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -1,5 +1,4 @@
 import os
-import posix
 import pylab as pb
 import numpy as np
 import GPy

From 3f01cbdbccbc85b98a29efcb61b5c369913f54b7 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 11 Jan 2013 12:05:53 +0000
Subject: [PATCH 32/33] removed ticks and checkmarks from checkgrad() output,
 coloring param name instead

---
 GPy/core/model.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 5d977378..0c1594a2 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -339,10 +339,12 @@ class model(parameterised):
                 ratio = (f1-f2)/(2*step*gradient)
                 difference = np.abs((f1-f2)/2/step - gradient)
                 if verbose:
-                    print "{0:10s}   ratio: {1:15f}   difference: {2:15f}   analytical: {3:15f}    numerical: {4:15f}".format(names[i], float(ratio), float(difference), gradient, float(numerical_gradient)),
                     if (np.abs(ratio-1)<tolerance):
-                        print " "+'\033[92m' + u"\u2713" + '\033[0m' # green chackmark
+                        formatted_name = "\033[92m {0:10s} \033[0m".format(names[i])
                     else:
-                        print " "+'\033[91m' + u"\u2717" + '\033[0m' # red crossmark
+                        formatted_name = "\033[91m {0:10s} \033[0m".format(names[i])
+                        
+                    print formatted_name + "   ratio: {0:15f}   difference: {1:15f}   analytical: {2:15f}    numerical: {3:15f}\n".format(float(ratio), float(difference), gradient, float(numerical_gradient)),
+        
             return False
         return True

From 3f6d3347044777900dff9acef9edb4a3c17ae9b7 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 11 Jan 2013 13:06:02 +0000
Subject: [PATCH 33/33] pretty printing of gradchecks

---
 GPy/core/model.py | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 0c1594a2..46cf6ac9 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -308,9 +308,7 @@ class model(parameterised):
         numerical_gradient = (f1-f2)/(2*dx)
         ratio = (f1-f2)/(2*np.dot(dx,gradient))
         if verbose:
-            #print "gradient = ",gradient
-            #print "numerical gradient = ",numerical_gradient
-            print " Gradient ratio = ", ratio, '\n'
+            print "Gradient ratio = ", ratio, '\n'
             sys.stdout.flush()
 
         if (np.abs(1.-ratio)<tolerance) and not np.isnan(ratio):
@@ -319,10 +317,24 @@ class model(parameterised):
         else:
             if verbose:
                 print "Global check failed. Testing individual gradients\n"
-            try:
-                names = self.extract_param_names()
-            except NotImplementedError:
-                names = ['Variable %i'%i for i in range(len(x))]
+
+                try:
+                    names = self.extract_param_names()
+                except NotImplementedError:
+                    names = ['Variable %i'%i for i in range(len(x))]
+
+                # Prepare for pretty-printing
+                header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
+                max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
+                float_len = 10
+                cols = [max_names]
+                cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
+                cols = np.array(cols) + 5
+                header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+                header_string = map(lambda x: '|'.join(x), [header_string])
+                separator = '-'*len(header_string[0])
+                print '\n'.join([header_string[0], separator])
+
             for i in range(len(x)):
                 xx = x.copy()
                 xx[i] += step
@@ -338,13 +350,20 @@ class model(parameterised):
                 numerical_gradient = (f1-f2)/(2*step)
                 ratio = (f1-f2)/(2*step*gradient)
                 difference = np.abs((f1-f2)/2/step - gradient)
+
                 if verbose:
                     if (np.abs(ratio-1)<tolerance):
-                        formatted_name = "\033[92m {0:10s} \033[0m".format(names[i])
+                        formatted_name = "\033[92m {0} \033[0m".format(names[i])
                     else:
-                        formatted_name = "\033[91m {0:10s} \033[0m".format(names[i])
-                        
-                    print formatted_name + "   ratio: {0:15f}   difference: {1:15f}   analytical: {2:15f}    numerical: {3:15f}\n".format(float(ratio), float(difference), gradient, float(numerical_gradient)),
-        
+                        formatted_name = "\033[91m {0} \033[0m".format(names[i])
+                    r = '%.6f' % float(ratio)
+                    d = '%.6f' % float(difference)
+                    g = '%.6f' % gradient
+                    ng = '%.6f' % float(numerical_gradient)
+                    grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                    print grad_string
+
+            print ''
+            
             return False
         return True