From d9b03044ac382ca484a5fa862cb98732904cb99a Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 25 Feb 2013 12:20:20 +0000
Subject: [PATCH 001/105] moved randomize() in a more proper place

---
 GPy/core/model.py | 3 ++-
 GPy/util/misc.py  | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 6b7d32c6..b6cedbaf 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -185,7 +185,7 @@ class model(parameterised):
         :verbose: whether to show informations about the current restart
         :parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
         :num_processes: number of workers in the multiprocessing pool
-        
+
         ..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
         set to the number of processors on the current machine.
 
@@ -198,6 +198,7 @@ class model(parameterised):
             jobs = []
             pool = mp.Pool(processes=num_processes)
             for i in range(Nrestarts):
+                self.randomize()
                 job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
                 jobs.append(job)
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index e3b91dce..e0f70703 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -9,7 +9,6 @@ def opt_wrapper(m, **kwargs):
     This function just wraps the optimization procedure of a GPy
     object so that optimize() pickleable (necessary for multiprocessing).
     """
-    m.randomize()
     m.optimize(**kwargs)
     return m.optimization_runs[-1]
 

From 4d79c3c97d03628326e0a4eb30b7170c14715683 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 26 Feb 2013 14:49:00 +0000
Subject: [PATCH 002/105] start of psi2 crossterms

---
 GPy/kern/kern.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 GPy/notes.txt    | 32 ++++++++++++++++--------
 2 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index de4ebc8a..99ad46ea 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -378,6 +378,26 @@ class kern(parameterised):
         slices1, slices2 = self._process_slices(slices1,slices2)
         [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
+        #compute the "cross" terms
+        for p1, p2 in itertools.combinations(self.parts,2):
+            #white doesn;t compine with anything
+            if p1.name=='white' or p2.name=='white':
+                pass
+            #rbf X bias
+            elif p1.name=='bias' and p2.name=='rbf':
+                target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
+            elif p2.name=='bias' and p1.name=='rbf':
+                target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
+            #rbf X linear
+            elif p1.name=='linear' and p2.name=='rbf':
+                raise NotImplementedError #TODO
+            elif p2.name=='linear' and p1.name=='rbf':
+                raise NotImplementedError #TODO
+            else:
+                raise NotImplementedError, "psi2 cannot be computed for this kernel"
+
+
+
 
 
         # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
@@ -402,6 +422,31 @@ class kern(parameterised):
         target = np.zeros(self.Nparam)
         [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
 
+        #compute the "cross" terms
+        #TODO: better looping
+        for i1, i2 in itertools.combinations(range(len(self.parts)),2):
+            p1,p2 = self.parts[i1], self.parts[i2]
+            ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
+            ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
+
+            #white doesn;t compine with anything
+            if p1.name=='white' or p2.name=='white':
+                pass
+            #rbf X bias
+            elif p1.name=='bias' and p2.name=='rbf':
+                p2.dpsi1_dtheta(partial.sum(1)*p1.variance,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(partial.sum(1)*p2._psi1,Z,mu,S,target[ps1])
+            elif p2.name=='bias' and p1.name=='rbf':
+                p1.dpsi1_dtheta(partial.sum(1)*p2.variance,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(partial.sum(1)*p1._psi1,Z,mu,S,target[ps2])
+            #rbf X linear
+            elif p1.name=='linear' and p2.name=='rbf':
+                raise NotImplementedError #TODO
+            elif p2.name=='linear' and p1.name=='rbf':
+                raise NotImplementedError #TODO
+            else:
+                raise NotImplementedError, "psi2 cannot be computed for this kernel"
+
         # # "crossterms"
         # # 1. get all the psi1 statistics
         # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
@@ -429,6 +474,26 @@ class kern(parameterised):
         target = np.zeros_like(Z)
         [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
+        #compute the "cross" terms
+        #TODO: slices (need to iterate around the input slices also...)
+        for p1, p2 in itertools.combinations(self.parts,2):
+            #white doesn;t compine with anything
+            if p1.name=='white' or p2.name=='white':
+                pass
+            #rbf X bias
+            elif p1.name=='bias' and p2.name=='rbf':
+                target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S)
+            elif p2.name=='bias' and p1.name=='rbf':
+                target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S)
+            #rbf X linear
+            elif p1.name=='linear' and p2.name=='rbf':
+                raise NotImplementedError #TODO
+            elif p2.name=='linear' and p1.name=='rbf':
+                raise NotImplementedError #TODO
+            else:
+                raise NotImplementedError, "psi2 cannot be computed for this kernel"
+
+
         return target
 
     def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
diff --git a/GPy/notes.txt b/GPy/notes.txt
index c09102b9..9617855a 100644
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@@ -1,12 +1,11 @@
-Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
-
-The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
-
 Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
+TODO
 
 Missing kernels: polynomial, rational quadratic.
+TODO
 
 Kernel implementations are far to obscure. Need to be easily readable for a first time user.
+Duplicate. 
 
 Need an implementation of scaled conjugate gradients for the optimizers.
 
@@ -15,21 +14,30 @@ Need an implementation of gradient descent for the optimizers (works well with G
 Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
 
 Change get_param and set_param to get_params and set_params
+FIXED
 
 Get constrain param by default inside model creation.
 
-Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
-
-
-Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
 
 Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
+Upstream: Waiting for the new scipy, where the optimisers have been unified. 
+
+Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
+Upstream, as above
 
 Change Youter to YYT (Youter doesn't mean anything for matrices).
+FIXED
 
 Bug when running classification.crescent_data()
 
 A dictionary for parameter storage? So we can go through names easily?
+Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. 
+
+When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
+FIXED
+
+the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it. 
+FIXED
 
 A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
 
@@ -37,6 +45,10 @@ Diagonal noise covariance function
 
 Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
 
-When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
+Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
 
-the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it. 
+Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
+FIXED
+
+The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
+FIXED

From 6e809b024faea533eeb9e45b16084e4c7cc8f193 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 26 Feb 2013 15:00:18 +0000
Subject: [PATCH 003/105] effiiency improvements in sparse_GP

the recasting of derivatives through psi2 into psi1 is now only done in
one place
---
 GPy/models/sparse_GP.py    | 27 ++++++++++++---------------
 GPy/models/sparse_GPLVM.py |  5 +----
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 9e83af3a..c4a6ac71 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -106,12 +106,20 @@ class sparse_GP(GP):
             self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
             self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
             self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
+            if not self.has_uncertain_inputs:
+                raise NotImplementedError, "TODO: recaste derivatibes in psi2 back into psi1"
+
         else:
             self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
             self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
             self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
-            #repeat for each of the N psi_2 matrices
-            self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
+            if self.has_uncertain_inputs:
+                #repeat for each of the N psi_2 matrices
+                self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
+            else:
+                self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
+                self.dL_dpsi2 = None
+
 
         # Compute dL_dKmm
         self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
@@ -175,13 +183,7 @@ class sparse_GP(GP):
             dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
             dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
         else:
-            #re-cast computations in psi2 back to psi1:
-            #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
-            if not self.likelihood.is_heteroscedastic:
-                dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
-            else:
-                raise NotImplementedError, "TODO"
-            dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
+            dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
             dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
 
         return dL_dtheta
@@ -195,12 +197,7 @@ class sparse_GP(GP):
             dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
             dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
         else:
-            #re-cast computations in psi2 back to psi1:
-            if not self.likelihood.is_heteroscedastic:
-                dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
-            else:
-                raise NotImplementedError, "TODO"
-            dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
+            dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
         return dL_dZ
 
     def _raw_predict(self, Xnew, slices, full_cov=False):
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index dc97c79e..fe7c1c43 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -42,11 +42,8 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
         return sparse_GP_regression.log_likelihood(self)
 
     def dL_dX(self):
-        #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
-	dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
-
         dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
-        dL_dX += self.kern.dK_dX(dL_dpsi1.T,self.X,self.Z)
+        dL_dX += self.kern.dK_dX(self.dL_dpsi1,self.X,self.Z)
 
         return dL_dX
 

From b47161e8747054773dc082490a50c39b507a4a1a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 27 Feb 2013 08:57:33 +0000
Subject: [PATCH 004/105] Some commentary on Neil's notes.txt

---
 GPy/notes.txt | 89 +++++++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 41 deletions(-)

diff --git a/GPy/notes.txt b/GPy/notes.txt
index 9617855a..c80cedad 100644
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@@ -1,54 +1,61 @@
-Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
-TODO
-
-Missing kernels: polynomial, rational quadratic.
-TODO
-
-Kernel implementations are far to obscure. Need to be easily readable for a first time user.
-Duplicate. 
-
-Need an implementation of scaled conjugate gradients for the optimizers.
-
-Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
-
-Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
-
-Change get_param and set_param to get_params and set_params
+the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it. 
 FIXED
 
-Get constrain param by default inside model creation.
-
-
-Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
-Upstream: Waiting for the new scipy, where the optimisers have been unified. 
-
-Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
-Upstream, as above
-
-Change Youter to YYT (Youter doesn't mean anything for matrices).
-FIXED
-
-Bug when running classification.crescent_data()
-
-A dictionary for parameter storage? So we can go through names easily?
-Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. 
-
 When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
 FIXED
 
-the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it. 
+Change Youter to YYT (Youter doesn't mean anything for matrices).
 FIXED
 
-A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
-
-Diagonal noise covariance function
-
-Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
-
-Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
+Change get_param and set_param to get_params and set_params
+FIXED
 
 Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
 FIXED
 
 The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
 FIXED
+
+Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
+TODO, a priority for this release
+
+Missing kernels: polynomial, rational quadratic.
+TODO, should be straightforward when the above is fixed.
+
+Need an implementation of scaled conjugate gradients for the optimizers.
+UPSTREAM: scipy are tidying up the optimize module. let's wait for their next release. 
+
+Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
+As above. 
+
+Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
+Any word from Carl yet?
+
+Get constrain param by default inside model creation.
+Well, we have ensure_default_constraints. There are some techinical difficulties in doing it inside model creation, so perhaps this is something for a later release.
+
+Bug when running classification.crescent_data()
+TODO.
+
+Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
+Upstream: Waiting for the new scipy, where the optimisers have been unified. Obviously it's be much better to be able to specify a unified set of args. 
+
+Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
+Upstream, as above
+
+A dictionary for parameter storage? So we can go through names easily?
+Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. it's easy enough to do:
+for val, name in zip(m._get_params(), m._get_param_names()): foobar
+
+A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
+TODO, agree this would be helpful.
+
+Diagonal noise covariance function
+TODO this is now straightforward using the likelihood framework, or as a kern. NF also requires a similar kind of kern function (a fixed form kernel)
+
+Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
+
+Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
+Excellent proposal, but lots of work: suggest leaving for the next release?
+
+

From 4674ed45588070412f8cd2132e9ca948b967b00b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 1 Mar 2013 11:13:55 +0000
Subject: [PATCH 005/105] added optional number of contour levels to the 2D
 plotting in GP.plot

---
 GPy/models/GP.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index c4c37e44..b9da5766 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -60,6 +60,12 @@ class GP(model):
 
         model.__init__(self)
 
+    def dL_dZ(self):
+        """
+        TODO: one day we might like to learn Z by gradient methods?
+        """
+        return np.zeros_like(self.Z)
+
     def _set_params(self,p):
         self.kern._set_params_transformed(p[:self.kern.Nparam])
         #self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
@@ -236,7 +242,12 @@ class GP(model):
         else:
             raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
 
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
+        """
+        TODO: Docstrings!
+        :param levels: for 2D plotting, the number of contour levels to use
+
+        """
         # TODO include samples
         if which_functions=='all':
             which_functions = [True]*self.kern.Nparts
@@ -265,7 +276,7 @@ class GP(model):
             x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
             m, var, lower, upper = self.predict(Xnew, slices=which_functions)
             m = m.reshape(resolution,resolution).T
-            pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
+            pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet,levels=levels)
             Yf = self.likelihood.Y.flatten()
             pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
             pb.xlim(xmin[0],xmax[0])

From 62424e5947e809a0397e07c5b0176616ee3a1612 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 1 Mar 2013 11:40:18 +0000
Subject: [PATCH 006/105] fixed levels in GP.plot

---
 GPy/models/GP.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index b9da5766..91327aab 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -276,7 +276,7 @@ class GP(model):
             x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
             m, var, lower, upper = self.predict(Xnew, slices=which_functions)
             m = m.reshape(resolution,resolution).T
-            pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet,levels=levels)
+            pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
             Yf = self.likelihood.Y.flatten()
             pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
             pb.xlim(xmin[0],xmax[0])

From adfe587c86407acd423c0823279cb4f8c006a498 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 1 Mar 2013 16:52:37 +0000
Subject: [PATCH 007/105] added dKdiag_dtheta for the periodic kernels

---
 GPy/kern/periodic_Matern32.py    | 63 ++++++++++++++++++++++++++++
 GPy/kern/periodic_Matern52.py    | 71 ++++++++++++++++++++++++++++++++
 GPy/kern/periodic_exponential.py | 64 ++++++++++++++++++++++++++--
 3 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index eeadf1c8..821a503c 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -171,3 +171,66 @@ class periodic_Matern32(kernpart):
         target[1] += np.sum(dK_dlen*partial)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
+
+def dKdiag_dtheta(self,partial,X,target):
+        """derivative of the diagonal covariance matrix with respect to the parameters"""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
+
+        #dK_dlen
+        da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
+        db_dlen = [0.,2*self.lengthscale/3.]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+
+        dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
+
+        dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
+
+        target[0] += np.sum(np.diag(dK_dvar)*partial)
+        target[1] += np.sum(np.diag(dK_dlen)*partial)
+        target[2] += np.sum(np.diag(dK_dper)*partial)
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
index 2db3d223..92a4bde1 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/periodic_Matern52.py
@@ -183,3 +183,74 @@ class periodic_Matern52(kernpart):
         target[1] += np.sum(dK_dlen*partial)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
+
+def dKdiag_dtheta(self,partial,X,target):
+        """derivative of the diagonal of the covariance matrix with respect to the parameters"""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
+        Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
+        Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
+
+        #dK_dlen
+        da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
+        db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
+        dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + .5*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + .5*self.lower**2*np.cos(phi-phi1.T)
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
+
+        dlower_terms_dper  = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
+        dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
+        dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
+        dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
+        dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
+
+        dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
+        dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
+
+        target[0] += np.sum(np.diag(dK_dvar)*partial)
+        target[1] += np.sum(np.diag(dK_dlen)*partial)
+        target[2] += np.sum(np.diag(dK_dper)*partial)
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py
index d99bada8..7f566f25 100644
--- a/GPy/kern/periodic_exponential.py
+++ b/GPy/kern/periodic_exponential.py
@@ -162,9 +162,67 @@ class periodic_exponential(kernpart):
 
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
-        # np.add(target[:,:,0],dK_dvar, target[:,:,0])
         target[0] += np.sum(dK_dvar*partial)
-        #np.add(target[:,:,1],dK_dlen, target[:,:,1])
         target[1] += np.sum(dK_dlen*partial)
-        #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
+
+    def dKdiag_dtheta(self,partial,X,target):
+        """derivative of the diagonal of the covariance matrix with respect to the parameters"""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
+
+        #dK_dlen
+        da_dlen = [-1./self.lengthscale**2,0.]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
+        r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+
+        dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
+
+        dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
+
+        target[0] += np.sum(np.diag(dK_dvar)*partial)
+        target[1] += np.sum(np.diag(dK_dlen)*partial)
+        target[2] += np.sum(np.diag(dK_dper)*partial)
+        

From 3ffa8d782cf767687eee4e97e07b41a810ed5793 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 1 Mar 2013 17:45:41 +0000
Subject: [PATCH 008/105] indentation for dKdiag_dtheta fixed

---
 GPy/kern/periodic_Matern32.py | 2 +-
 GPy/kern/periodic_Matern52.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index 821a503c..be1148c4 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -172,7 +172,7 @@ class periodic_Matern32(kernpart):
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
 
-def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,partial,X,target):
         """derivative of the diagonal covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
 
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
index 92a4bde1..8d1da8b1 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/periodic_Matern52.py
@@ -184,7 +184,7 @@ class periodic_Matern52(kernpart):
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
 
-def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,partial,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
 

From c5f4bcd02b58c21199a7423127ce17750fd1df3b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 1 Mar 2013 23:06:58 +0000
Subject: [PATCH 009/105] added dKdiag_dtheta for linear

---
 GPy/kern/linear.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index da4f79f4..df2fed46 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -92,6 +92,13 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         target += np.sum(self.variances*self.mu2_S,1)
 
+    def dKdiag_dtheta(self,partial, X, target):
+        tmp = partial[:,None]*X**2
+        if self.ARD:
+            target += tmp.sum(0)
+        else:
+            target += tmp.sum()
+
     def dpsi0_dtheta(self,partial,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         tmp = partial[:, None] * self.mu2_S

From 0e46e309e4a4e6a8dbf044e2147d4e0bbb9151df Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 4 Mar 2013 12:13:27 +0000
Subject: [PATCH 010/105] removed unnecessary computation of psi2

---
 GPy/models/sparse_GP.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index c4a6ac71..b75df22e 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -84,7 +84,6 @@ class sparse_GP(GP):
             else:
                 tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
             self.psi2_beta_scaled = np.dot(tmp,tmp.T)
-            self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
 
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
 

From 053e6519c82e635ea218e721b4ed44ece87f5d51 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 4 Mar 2013 12:43:05 +0000
Subject: [PATCH 011/105] re-enstated compute_kernel_matrices

---
 GPy/models/sparse_GP.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index b75df22e..e6caf9fc 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -58,6 +58,19 @@ class sparse_GP(GP):
         if self.has_uncertain_inputs:
             self.X_uncertainty /= np.square(self._Xstd)
 
+
+    def _compute_kernel_matrices(self):
+        # kernel computations, using BGPLVM notation
+        self.Kmm = self.kern.K(self.Z)
+        if self.has_uncertain_inputs:
+            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
+            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
+            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
+        else:
+            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
+            self.psi1 = self.kern.K(self.Z,self.X)
+            self.psi2 = None
+
     def _computations(self):
         # TODO find routine to multiply triangular matrices
         #TODO: slices for psi statistics (easy enough)
@@ -65,25 +78,20 @@ class sparse_GP(GP):
         sf = self.scale_factor
         sf2 = sf**2
 
-        # kernel computations, using BGPLVM notation
-        self.Kmm = self.kern.K(self.Z)
-        if self.has_uncertain_inputs:
-            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
-            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
-            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
-            if self.likelihood.is_heteroscedastic:
+        #The rather complex computations of psi2_beta_scaled
+        if self.likelihood.is_heteroscedastic:
+            assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
+            if self.has_uncertain_inputs:
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
-                #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
             else:
-                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
+                tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf)
+                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
         else:
-            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
-            self.psi1 = self.kern.K(self.Z,self.X)
-            if self.likelihood.is_heteroscedastic:
-                tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
+            if self.has_uncertain_inputs:
+                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
             else:
                 tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
-            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
 
         self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
 
@@ -149,6 +157,7 @@ class sparse_GP(GP):
         self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
         self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
         self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
+        self._compute_kernel_matrices()
         self._computations()
 
     def _get_params(self):

From 77a89794d70182dfe17540acd53df4796215da3d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 4 Mar 2013 16:22:18 +0000
Subject: [PATCH 012/105] minor changes for het. noise and uncertin inuputs

---
 GPy/models/sparse_GP.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index e6caf9fc..6932154d 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -84,7 +84,7 @@ class sparse_GP(GP):
             if self.has_uncertain_inputs:
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
             else:
-                tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf)
+                tmp = self.psi1.T*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf)
                 self.psi2_beta_scaled = np.dot(tmp,tmp.T)
         else:
             if self.has_uncertain_inputs:

From f2ce47d96e5704ab4e54666ce75ff3201c8ff84e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 5 Mar 2013 15:58:03 +0000
Subject: [PATCH 013/105] Added symmtrical covariance functions

---
 GPy/kern/__init__.py     |  2 +-
 GPy/kern/constructors.py | 10 +++++
 GPy/kern/symmetric.py    | 92 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/symmetric.py

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 008a2e1a..3e20644b 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index af82bb24..5d665d93 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -20,6 +20,7 @@ from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
 from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
 from product import product as productpart
 from product_orthogonal import product_orthogonal as product_orthogonalpart
+from symmetric import symmetric as symmetric_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -264,3 +265,12 @@ def product_orthogonal(k1,k2):
     """
     part = product_orthogonalpart(k1,k2)
     return kern(k1.D+k2.D, [part])
+
+def symmetric(k):
+    """
+    Construct a symmetrical kernel from an existing kernel
+    """
+    k_ = k.copy()
+    k_.parts = [symmetric_part(p) for p in k.parts]
+    return k_
+
diff --git a/GPy/kern/symmetric.py b/GPy/kern/symmetric.py
new file mode 100644
index 00000000..d493bfb1
--- /dev/null
+++ b/GPy/kern/symmetric.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2012 James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import kernpart
+import numpy as np
+
+class symmetric(kernpart):
+    """
+    Symmetrical kernels
+
+    :param k: the kernel to symmetrify
+    :type k: kernpart
+    :param transform: the transform to use in symmetrification (allows symmetry on specified axes)
+    :type transform: A numpy array (D x D) specifiying the transform
+    :rtype: kernpart
+
+    """
+    def __init__(self,k,transform=None):
+        if transform is None:
+            transform = np.eye(k.D)*-1.
+        assert transform.shape == (k.D, k.D)
+        self.transform = transform
+        self.D = k.D
+        self.Nparam = k.Nparam
+        self.name = k.name + '_symm'
+        self.k = k
+        self._set_params(k._get_params())
+
+    def _get_params(self):
+        """return the value of the parameters."""
+        return self.k._get_params()
+
+    def _set_params(self,x):
+        """set the value of the parameters."""
+        self.k._set_params(x)
+
+    def _get_param_names(self):
+        """return parameter names."""
+        return self.k._get_param_names()
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            AX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k.K(X,X2,target)
+        self.k.K(AX,X2,target)
+        self.k.K(X,AX2,target)
+        self.k.K(AX,AX2,target)
+
+    def dK_dtheta(self,partial,X,X2,target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            ZX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k.dK_dtheta(partial,X,X2,target)
+        self.k.dK_dtheta(partial,AX,X2,target)
+        self.k.dK_dtheta(partial,X,AX2,target)
+        self.k.dK_dtheta(partial,AX,AX2,target)
+
+
+    def dK_dX(self,partial,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            ZX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k.dK_dX(partial, X, X2, target)
+        self.k.dK_dX(partial, AX, X2, target)
+        self.k.dK_dX(partial, X, AX2, target)
+        self.k.dK_dX(partial, AX ,AX2, target)
+
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        foo = np.zeros((X.shape[0],X.shape[0]))
+        self.K(X,X,foo)
+        target += np.diag(foo)
+
+    def dKdiag_dX(self,partial,X,target):
+        raise NotImplementedError
+
+    def dKdiag_dtheta(self,partial,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        raise NotImplementedError

From 613aae641711ed2185b9118880d5ed6dc1cda446 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 6 Mar 2013 13:15:15 +0000
Subject: [PATCH 014/105] coregionalisation

---
 GPy/examples/regression.py | 69 +++++++++++++++++++++++++++++++
 GPy/kern/__init__.py       |  2 +-
 GPy/kern/constructors.py   |  6 +++
 GPy/kern/coregionalise.py  | 83 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/coregionalise.py

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 43fa0147..b8838078 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -75,6 +75,74 @@ def silhouette():
     print(m)
     return m
 
+def coregionalisation_toy2():
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions
+    """
+    X1 = np.random.rand(50,1)*8
+    X2 = np.random.rand(30,1)*5
+    index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1,X2)),index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
+    Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
+    Y = np.vstack((Y1,Y2))
+
+    k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
+    k2 = GPy.kern.coregionalise(2,1)
+    k = k1.prod_orthogonal(k2)
+    m = GPy.models.GP_regression(X,Y,kernel=k)
+    m.constrain_fixed('rbf_var',1.)
+    m.constrain_positive('kappa')
+    m.ensure_default_constraints()
+    m.optimize()
+
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
+    Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
+    mean, var,low,up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
+    mean, var,low,up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
+    pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
+    pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
+    return m
+
+
+
+
+def coregionalisation_toy():
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions
+    """
+    X1 = np.random.rand(50,1)*8
+    X2 = np.random.rand(30,1)*5
+    index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1,X2)),index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
+    Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
+    Y = np.vstack((Y1,Y2))
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.coregionalise(2,1)
+    k = k1.prod_orthogonal(k2)
+    m = GPy.models.GP_regression(X,Y,kernel=k)
+    m.constrain_fixed('rbf_var',1.)
+    m.constrain_positive('kappa')
+    m.ensure_default_constraints()
+    m.optimize()
+
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
+    Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
+    mean, var,low,up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
+    mean, var,low,up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
+    pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
+    pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
+    return m
+
+
 
 def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
     """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
@@ -160,3 +228,4 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
             length_scale_lls.append(model.log_likelihood())
         lls.append(length_scale_lls)
     return np.array(lls)
+
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 3e20644b..625f6080 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric, coregionalise
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 5d665d93..9b58c282 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -21,6 +21,7 @@ from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
 from product import product as productpart
 from product_orthogonal import product_orthogonal as product_orthogonalpart
 from symmetric import symmetric as symmetric_part
+from coregionalise import coregionalise as coregionalise_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -274,3 +275,8 @@ def symmetric(k):
     k_.parts = [symmetric_part(p) for p in k.parts]
     return k_
 
+def coregionalise(Nout,R=1, W=None, kappa=None):
+    p = coregionalise_part(Nout,R,W,kappa)
+    return kern(1,[p])
+
+
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
new file mode 100644
index 00000000..b8fdbf42
--- /dev/null
+++ b/GPy/kern/coregionalise.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2012, James Hensman and Ricardo Andrade
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+
+class coregionalise(kernpart):
+    """
+    Kernel for Intrisec Corregionalization Models
+    """
+    def __init__(self,Nout,R=1, W=None, kappa=None):
+        self.D = 1
+        self.name = 'coregion'
+        self.Nout = Nout
+        self.R = R
+        if W is None:
+            self.W = np.ones((self.Nout,self.R))
+        else:
+            assert W.shape==(self.Nout,self.R)
+            self.W = W
+        if kappa is None:
+            kappa = np.ones(self.Nout)
+        else:
+            assert kappa.shape==(self.Nout,)
+        self.kappa = kappa
+        self.Nparam = self.Nout*(self.R + 1)
+        self._set_params(np.hstack([self.W.flatten(),self.kappa]))
+
+    def _get_params(self):
+        return np.hstack([self.W.flatten(),self.kappa])
+
+    def _set_params(self,x):
+        assert x.size == self.Nparam
+        self.kappa = x[-self.Nout:]
+        self.W = x[:-self.Nout].reshape(self.Nout,self.R)
+        self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
+
+    def _get_param_names(self):
+
+        return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)]
+
+    def K(self,index,index2,target):
+        index = np.asarray(index,dtype=np.int)
+        if index2 is None:
+            index2 = index
+        else:
+            index2 = np.asarray(index2,dtype=np.int)
+        ii,jj = np.meshgrid(index,index2)
+        target += self.B[ii,jj].T
+
+    def Kdiag(self,index,target):
+        target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
+
+    def dK_dtheta(self,partial,index,index2,target):
+        index = np.asarray(index,dtype=np.int)
+        if index2 is None:
+            index2 = index
+        else:
+            index2 = np.asarray(index2,dtype=np.int)
+        ii,jj = np.meshgrid(index,index2)
+        PK = np.zeros((self.R,self.R))
+        dkappa = np.zeros(self.Nout)
+        partial_small = np.zeros_like(self.B)
+        for i in range(self.Nout):
+            for j in range(self.Nout):
+                partial_small[j,i] = np.sum(partial[(ii==i)*(jj==j)])
+        #print partial_small
+        dkappa = np.diag(partial_small)
+
+        ##target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
+        dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
+
+        target += np.hstack([dW.flatten(),dkappa])
+
+    def dKdiag_dtheta(self,partial,index,target):
+        raise NotImplementedError
+
+    def dK_dX(self,partial,X,X2,target):
+        pass
+
+
+

From 9501a76bdc563d435bd0af940275acae521ffcac Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 6 Mar 2013 13:33:24 +0000
Subject: [PATCH 015/105] added unit test for coregionalisation

---
 GPy/testing/kernel_tests.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 957bb44d..a69662fa 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -16,6 +16,22 @@ class KernelTests(unittest.TestCase):
         print m
         self.assertTrue(m.checkgrad())
 
+    def coregionalisation_test(self):
+        X1 = np.random.rand(50,1)*8
+        X2 = np.random.rand(30,1)*5
+        index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
+        X = np.hstack((np.vstack((X1,X2)),index))
+        Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
+        Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
+        Y = np.vstack((Y1,Y2))
+
+        k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
+        k2 = GPy.kern.coregionalise(2,1)
+        k = k1.prod_orthogonal(k2)
+        m = GPy.models.GP_regression(X,Y,kernel=k)
+        self.assertTrue(m.checkgrad())
+
+
 
 
 if __name__ == "__main__":

From fc34fa3eb9bfee3276446795f190f62b0bb75b1f Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 6 Mar 2013 13:36:38 +0000
Subject: [PATCH 016/105] yak shaving

---
 GPy/testing/kernel_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index a69662fa..3d738106 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -16,7 +16,7 @@ class KernelTests(unittest.TestCase):
         print m
         self.assertTrue(m.checkgrad())
 
-    def coregionalisation_test(self):
+    def test_coregionalisation(self):
         X1 = np.random.rand(50,1)*8
         X2 = np.random.rand(30,1)*5
         index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))

From b9425f7f08d4f10d8ecb58b43381cdbd2c2ce20f Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 6 Mar 2013 13:56:58 +0000
Subject: [PATCH 017/105] using setuptools instead of distutils

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 40c89ccb..d24171e2 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import os
-from numpy.distutils.core import Extension, setup
+from setuptools import setup
+#from numpy.distutils.core import Extension, setup
 #from sphinx.setup_command import BuildDoc
 
 # Version number

From 65f9c7bb768529e15bdce9ee41b92aad997f1346 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 6 Mar 2013 15:29:03 +0000
Subject: [PATCH 018/105] First attempt at making coregionalise work with the
 sparse model

Gradients are failing! have implemented prod_othogonal.dKdiag_dtheta
---
 GPy/examples/regression.py     | 37 ++++++++++++++++++++++++++++++++++
 GPy/kern/coregionalise.py      | 37 ++++++++++++++++++++++++++--------
 GPy/kern/product_orthogonal.py | 24 ++++++++++++++--------
 3 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index b8838078..7d092c26 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -143,6 +143,43 @@ def coregionalisation_toy():
     return m
 
 
+def coregionalisation_sparse():
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions
+    """
+    X1 = np.random.rand(500,1)*8
+    X2 = np.random.rand(300,1)*5
+    index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1,X2)),index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
+    Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
+    Y = np.vstack((Y1,Y2))
+
+    Z = np.hstack((np.random.rand(25,1)*8,np.random.randint(0,2,25)[:,None]))
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.coregionalise(2,2)
+    k = k1.prod_orthogonal(k2) + GPy.kern.white(2,0.001)
+
+    m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z)
+    m.constrain_fixed('rbf_var',1.)
+    m.constrain_positive('kappa')
+    m.constrain_fixed('iip')
+    m.ensure_default_constraints()
+    #m.optimize()
+
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
+    Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
+    mean, var,low,up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
+    mean, var,low,up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
+    pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
+    pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
+    return m
+
+
 
 def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
     """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index b8fdbf42..29073b3a 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -46,8 +46,8 @@ class coregionalise(kernpart):
             index2 = index
         else:
             index2 = np.asarray(index2,dtype=np.int)
-        ii,jj = np.meshgrid(index,index2)
-        target += self.B[ii,jj].T
+        ii,jj = np.meshgrid(index2,index)
+        target += self.B[ii,jj]
 
     def Kdiag(self,index,target):
         target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
@@ -58,26 +58,47 @@ class coregionalise(kernpart):
             index2 = index
         else:
             index2 = np.asarray(index2,dtype=np.int)
-        ii,jj = np.meshgrid(index,index2)
+        ii,jj = np.meshgrid(index2,index)
         PK = np.zeros((self.R,self.R))
-        dkappa = np.zeros(self.Nout)
         partial_small = np.zeros_like(self.B)
         for i in range(self.Nout):
             for j in range(self.Nout):
-                partial_small[j,i] = np.sum(partial[(ii==i)*(jj==j)])
-        #print partial_small
+                partial_small[i,j] = np.sum(partial[(ii==i)*(jj==j)])
         dkappa = np.diag(partial_small)
 
-        ##target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
         dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
 
         target += np.hstack([dW.flatten(),dkappa])
 
     def dKdiag_dtheta(self,partial,index,target):
-        raise NotImplementedError
+        index = np.asarray(index,dtype=np.int).flatten()
+        partial_small = np.zeros(self.Nout)
+        for i in range(self.Nout):
+            partial_small[i] += np.sum(partial[index==i])
+        dW = 2.*self.W*partial_small[:,None]
+        dkappa = partial_small
+        target += np.hstack([dW.flatten(),dkappa])
 
     def dK_dX(self,partial,X,X2,target):
         pass
 
+    def dKdiag_dthetai_(self,partial,index,target):
+        index = np.asarray(index,dtype=np.int)
+        index2 = index
+        ii,jj = np.meshgrid(index2,index)
+        PK = np.zeros((self.R,self.R))
+        partial_small = np.zeros_like(self.B)
+        for i in range(self.Nout):
+            for j in range(self.Nout):
+                partial_small[j,i] = np.sum(partial[np.diag((ii==i)*(jj==j))])
+        #print partial_small
+        dkappa = np.diag(partial_small)
+
+        ##target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
+        partial_small = np.diag(np.diag(partial_small))
+        #dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
+        dW = 2.
+
+        target += np.hstack([dW.flatten(),dkappa])
 
 
diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py
index a729c126..6b02b868 100644
--- a/GPy/kern/product_orthogonal.py
+++ b/GPy/kern/product_orthogonal.py
@@ -46,14 +46,6 @@ class product_orthogonal(kernpart):
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
         target += target1 * target2
 
-    def Kdiag(self,X,target):
-        """Compute the diagonal of the covariance matrix associated to X."""
-        target1 = np.zeros((X.shape[0],))
-        target2 = np.zeros((X.shape[0],))
-        self.k1.Kdiag(X[:,0:self.k1.D],target1)
-        self.k2.Kdiag(X[:,self.k1.D:],target2)
-        target += target1 * target2
-
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
@@ -70,6 +62,22 @@ class product_orthogonal(kernpart):
         target[:self.k1.Nparam] += k1_target
         target[self.k1.Nparam:] += k2_target
 
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        target1 = np.zeros((X.shape[0],))
+        target2 = np.zeros((X.shape[0],))
+        self.k1.Kdiag(X[:,:self.k1.D],target1)
+        self.k2.Kdiag(X[:,self.k1.D:],target2)
+        target += target1 * target2
+
+    def dKdiag_dtheta(self,partial,X,target):
+        K1 = np.zeros(X.shape[0])
+        K2 = np.zeros(X.shape[0])
+        self.k1.Kdiag(X[:,:self.k1.D],K1)
+        self.k2.Kdiag(X[:,self.k1.D:],K2)
+        self.k1.dKdiag_dtheta(partial*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
+        self.k2.dKdiag_dtheta(partial*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
+
     def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X

From c6f2082839d23074b8ae4a70d508b4e932199b8b Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 6 Mar 2013 15:43:58 +0000
Subject: [PATCH 019/105] Sparse GP with EP is working now

---
 GPy/likelihoods/EP.py                   | 64 ++++++++++++-------------
 GPy/likelihoods/likelihood_functions.py |  4 +-
 GPy/models/sparse_GP.py                 | 36 ++++++++++----
 GPy/testing/unit_tests.py               | 20 +++++++-
 4 files changed, 78 insertions(+), 46 deletions(-)

diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index efd887ae..cddc46ee 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -17,7 +17,7 @@ class EP(likelihood):
         self.epsilon = epsilon
         self.eta, self.delta = power_ep
         self.data = data
-        self.N = self.data.size
+        self.N, self.D = self.data.shape
         self.is_heteroscedastic = True
         self.Nparams = 0
 
@@ -29,7 +29,7 @@ class EP(likelihood):
         #initial values for the GP variables
         self.Y = np.zeros((self.N,1))
         self.covariance_matrix = np.eye(self.N)
-        self.precision = np.ones(self.N)
+        self.precision = np.ones(self.N)[:,None]
         self.Z = 0
         self.YYT = None
 
@@ -54,18 +54,14 @@ class EP(likelihood):
 
         self.Y =  mu_tilde[:,None]
         self.YYT = np.dot(self.Y,self.Y.T)
-        self.precision = self.tau_tilde
-        self.covariance_matrix = np.diag(1./self.precision)
+        self.covariance_matrix = np.diag(1./self.tau_tilde)
+        self.precision = self.tau_tilde[:,None]
 
     def fit_full(self,K):
         """
         The expectation-propagation algorithm.
         For nomenclature see Rasmussen & Williams 2006.
         """
-        #Prior distribution parameters: p(f|X) = N(f|0,K)
-
-        self.tau_tilde = np.zeros(self.N)
-        self.v_tilde = np.zeros(self.N)
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
         mu = np.zeros(self.N)
         Sigma = K.copy()
@@ -124,13 +120,14 @@ class EP(likelihood):
 
         return self._compute_GP_variables()
 
-    def fit_DTC(self, Knn_diag, Kmn, Kmm):
+    #def fit_DTC(self, Knn_diag, Kmn, Kmm):
+    def fit_DTC(self, Kmm, Kmn):
         """
         The expectation-propagation algorithm with sparse pseudo-input.
         For nomenclature see ... 2013.
         """
 
-        #TODO: this doesn;t work with uncertain inputs!
+        #TODO: this doesn't work with uncertain inputs!
 
         """
         Prior approximation parameters:
@@ -158,12 +155,12 @@ class EP(likelihood):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        tau_ = np.empty(self.N,dtype=float)
-        v_ = np.empty(self.N,dtype=float)
+        self.tau_ = np.empty(self.N,dtype=float)
+        self.v_ = np.empty(self.N,dtype=float)
 
         #Initial values - Marginal moments
         z = np.empty(self.N,dtype=float)
-        Z_hat = np.empty(self.N,dtype=float)
+        self.Z_hat = np.empty(self.N,dtype=float)
         phi = np.empty(self.N,dtype=float)
         mu_hat = np.empty(self.N,dtype=float)
         sigma2_hat = np.empty(self.N,dtype=float)
@@ -172,21 +169,21 @@ class EP(likelihood):
         epsilon_np1 = 1
         epsilon_np2 = 1
        	self.iterations = 0
-        np1 = [tau_tilde.copy()]
-        np2 = [v_tilde.copy()]
+        np1 = [self.tau_tilde.copy()]
+        np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
             update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
-                tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
-                v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
+                self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
                 #Marginal moments
-                Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
-                Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                 Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
-                tau_tilde[i] = tau_tilde[i] + Delta_tau
-                v_tilde[i] = v_tilde[i] + Delta_v
+                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
+                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                 #Posterior distribution parameters update
                 LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
                 L = jitchol(LLT)
@@ -196,25 +193,26 @@ class EP(likelihood):
                 mu = mu + (Delta_v-Delta_tau*mu[i])*si
                 self.iterations += 1
             #Sigma recomputation with Cholesky decompositon
-            LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
+            LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
             L = jitchol(LLT)
             V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
             V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
             Sigma_diag = np.sum(V*V,-2)
-            Knmv_tilde = np.dot(Kmn,v_tilde)
+            Knmv_tilde = np.dot(Kmn,self.v_tilde)
             mu = np.dot(V2.T,Knmv_tilde)
-            epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
-            epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
-            np1.append(tau_tilde.copy())
-            np2.append(v_tilde.copy())
+            epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
+            epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
+            np1.append(self.tau_tilde.copy())
+            np2.append(self.v_tilde.copy())
 
         self._compute_GP_variables()
 
-    def fit_FITC(self, Knn_diag, Kmn):
+    def fit_FITC(self, Kmm, Kmn, Knn_diag):
         """
         The expectation-propagation algorithm with sparse pseudo-input.
         For nomenclature see Naish-Guzman and Holden, 2008.
         """
+        M = Kmm.shape[0]
 
         """
         Prior approximation parameters:
@@ -235,7 +233,7 @@ class EP(likelihood):
         mu = w + P*gamma
         """
         self.w = np.zeros(self.N)
-        self.gamma = np.zeros(self.M)
+        self.gamma = np.zeros(M)
         mu = np.zeros(self.N)
         P = P0.copy()
         R = R0.copy()
@@ -271,7 +269,7 @@ class EP(likelihood):
                 self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
                 self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
                 #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
                 Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                 Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@@ -281,10 +279,10 @@ class EP(likelihood):
                 dtd1 = Delta_tau*Diag[i] + 1.
                 dii = Diag[i]
                 Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
-                pi_ = P[i,:].reshape(1,self.M)
+                pi_ = P[i,:].reshape(1,M)
                 P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
                 Rp_i = np.dot(R,pi_.T)
-                RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
+                RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
                 R = jitchol(RTR).T
                 self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
                 self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
@@ -296,7 +294,7 @@ class EP(likelihood):
             Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
             P = (Diag / Diag0)[:,None] * P0
             RPT0 = np.dot(R0,P0.T)
-            L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
+            L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
             R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
             RPT = np.dot(R,P.T)
             Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 23881899..3e2a0361 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -37,8 +37,8 @@ class probit(likelihood_function):
         :param tau_i: precision of the cavity distribution (float)
         :param v_i: mean/variance of the cavity distribution (float)
         """
-        # TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
-        if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
+        if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
+        # TODO: some version of assert
         z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
         Z_hat = stats.norm.cdf(z)
         phi = stats.norm.pdf(z)
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 6932154d..12cc1769 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -82,9 +82,9 @@ class sparse_GP(GP):
         if self.likelihood.is_heteroscedastic:
             assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
             if self.has_uncertain_inputs:
-                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
+                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
             else:
-                tmp = self.psi1.T*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf)
+                tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
                 self.psi2_beta_scaled = np.dot(tmp,tmp.T)
         else:
             if self.has_uncertain_inputs:
@@ -107,14 +107,18 @@ class sparse_GP(GP):
         self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
 
         # Compute dL_dpsi # FIXME: this is untested for the het. case
-        self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
+        self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
         self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
         if self.likelihood.is_heteroscedastic:
-            self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
-            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
-            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
-            if not self.has_uncertain_inputs:
-                raise NotImplementedError, "TODO: recaste derivatibes in psi2 back into psi1"
+            if self.has_uncertain_inputs:
+                self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
+                self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
+                self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
+            else:
+                self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
+                self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
+                self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
+                self.dL_dpsi2 = None
 
         else:
             self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
@@ -166,14 +170,28 @@ class sparse_GP(GP):
     def _get_param_names(self):
         return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
 
+    def update_likelihood_approximation(self):
+        """
+        Approximates a non-gaussian likelihood using Expectation Propagation
+
+        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
+        this function does nothing
+        """
+        if self.has_uncertain_inputs:
+            raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
+        else:
+            self.likelihood.fit_DTC(self.Kmm,self.psi1)
+            self._set_params(self._get_params()) # update the GP
+
     def log_likelihood(self):
         """ Compute the (lower bound on the) log marginal likelihood """
         sf2 = self.scale_factor**2
         if self.likelihood.is_heteroscedastic:
             A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
+            B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
         else:
             A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
-        B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
+            B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
         C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
         D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
         return A+B+C+D
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 61fb15bb..90037dcb 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -157,13 +157,29 @@ class GradientTests(unittest.TestCase):
     def test_GP_EP_probit(self):
         N = 20
         X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
-        Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
+        Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
         kernel = GPy.kern.rbf(1)
         distribution = GPy.likelihoods.likelihood_functions.probit()
         likelihood = GPy.likelihoods.EP(Y, distribution)
         m = GPy.models.GP(X, likelihood, kernel)
         m.ensure_default_constraints()
-        self.assertTrue(m.EPEM)
+        m.update_likelihood_approximation()
+        self.assertTrue(m.checkgrad())
+        #self.assertTrue(m.EPEM)
+
+    def test_sparse_EP_DTC_probit(self):
+        N = 20
+        X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
+        Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
+        Z = np.linspace(0,15,4)[:,None]
+        kernel = GPy.kern.rbf(1)
+        distribution = GPy.likelihoods.likelihood_functions.probit()
+        likelihood = GPy.likelihoods.EP(Y, distribution)
+        m = GPy.models.sparse_GP(X, likelihood, kernel,Z)
+        m.ensure_default_constraints()
+        m.update_likelihood_approximation()
+        self.assertTrue(m.checkgrad())
+
 
     @unittest.skip("FITC will be broken for a while")
     def test_generalized_FITC(self):

From 81810d3a7b01d39baa7d363434d353682814abf4 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 6 Mar 2013 16:00:14 +0000
Subject: [PATCH 020/105] JH bugfix for slices

---
 GPy/models/GP.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 91327aab..08ac1bb1 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -72,7 +72,7 @@ class GP(model):
         self.likelihood._set_params(p[self.kern.Nparam_transformed():])    # test by Nicolas
 
 
-        self.K = self.kern.K(self.X,slices1=self.Xslices)
+        self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
         self.K += self.likelihood.covariance_matrix
 
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@@ -129,7 +129,7 @@ class GP(model):
 
         For the likelihood parameters, pass in alpha = K^-1 y
         """
-        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
     def _raw_predict(self,_Xnew,slices=None, full_cov=False):
         """

From d00d10952c9e634c3ab498029c427f839b25a0be Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 6 Mar 2013 16:25:29 +0000
Subject: [PATCH 021/105] more messing around with coregionalize

---
 GPy/kern/coregionalise.py | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index 29073b3a..c24cb568 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -70,7 +70,7 @@ class coregionalise(kernpart):
 
         target += np.hstack([dW.flatten(),dkappa])
 
-    def dKdiag_dtheta(self,partial,index,target):
+    def dKdiag_dtheta_foo(self,partial,index,target):
         index = np.asarray(index,dtype=np.int).flatten()
         partial_small = np.zeros(self.Nout)
         for i in range(self.Nout):
@@ -82,23 +82,7 @@ class coregionalise(kernpart):
     def dK_dX(self,partial,X,X2,target):
         pass
 
-    def dKdiag_dthetai_(self,partial,index,target):
-        index = np.asarray(index,dtype=np.int)
-        index2 = index
-        ii,jj = np.meshgrid(index2,index)
-        PK = np.zeros((self.R,self.R))
-        partial_small = np.zeros_like(self.B)
-        for i in range(self.Nout):
-            for j in range(self.Nout):
-                partial_small[j,i] = np.sum(partial[np.diag((ii==i)*(jj==j))])
-        #print partial_small
-        dkappa = np.diag(partial_small)
-
-        ##target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
-        partial_small = np.diag(np.diag(partial_small))
-        #dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
-        dW = 2.
-
-        target += np.hstack([dW.flatten(),dkappa])
+    def dKdiag_dtheta(self,partial,index,target):
+        self.dK_dtheta(np.diag(partial),index,index,target)
 
 

From 9a97ad7348987ecaeecbc7eae49346bb17d53c86 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 7 Mar 2013 13:49:54 +0000
Subject: [PATCH 022/105] debugging the coregionalisation kern

---
 GPy/examples/regression.py |  9 ++++-----
 GPy/kern/coregionalise.py  | 22 ++++++++++++----------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 7d092c26..90fca886 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -123,7 +123,7 @@ def coregionalisation_toy():
     Y = np.vstack((Y1,Y2))
 
     k1 = GPy.kern.rbf(1)
-    k2 = GPy.kern.coregionalise(2,1)
+    k2 = GPy.kern.coregionalise(2,2)
     k = k1.prod_orthogonal(k2)
     m = GPy.models.GP_regression(X,Y,kernel=k)
     m.constrain_fixed('rbf_var',1.)
@@ -147,8 +147,8 @@ def coregionalisation_sparse():
     """
     A simple demonstration of coregionalisation on two sinusoidal functions
     """
-    X1 = np.random.rand(500,1)*8
-    X2 = np.random.rand(300,1)*5
+    X1 = np.random.rand(50,1)*8
+    X2 = np.random.rand(30,1)*5
     index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
     X = np.hstack((np.vstack((X1,X2)),index))
     Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
@@ -158,7 +158,7 @@ def coregionalisation_sparse():
     Z = np.hstack((np.random.rand(25,1)*8,np.random.randint(0,2,25)[:,None]))
 
     k1 = GPy.kern.rbf(1)
-    k2 = GPy.kern.coregionalise(2,2)
+    k2 = GPy.kern.coregionalise(2,1)
     k = k1.prod_orthogonal(k2) + GPy.kern.white(2,0.001)
 
     m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z)
@@ -180,7 +180,6 @@ def coregionalisation_sparse():
     return m
 
 
-
 def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
     """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
 
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index c24cb568..f6b9426f 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -4,6 +4,7 @@
 from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
+import pdb
 
 class coregionalise(kernpart):
     """
@@ -37,7 +38,6 @@ class coregionalise(kernpart):
         self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
 
     def _get_param_names(self):
-
         return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)]
 
     def K(self,index,index2,target):
@@ -46,7 +46,8 @@ class coregionalise(kernpart):
             index2 = index
         else:
             index2 = np.asarray(index2,dtype=np.int)
-        ii,jj = np.meshgrid(index2,index)
+        ii,jj = np.meshgrid(index,index2)
+        ii,jj = ii.T, jj.T
         target += self.B[ii,jj]
 
     def Kdiag(self,index,target):
@@ -58,19 +59,22 @@ class coregionalise(kernpart):
             index2 = index
         else:
             index2 = np.asarray(index2,dtype=np.int)
-        ii,jj = np.meshgrid(index2,index)
-        PK = np.zeros((self.R,self.R))
+        ii,jj = np.meshgrid(index,index2)
+        ii,jj = ii.T, jj.T
+
         partial_small = np.zeros_like(self.B)
         for i in range(self.Nout):
-            for j in range(self.Nout):
-                partial_small[i,j] = np.sum(partial[(ii==i)*(jj==j)])
-        dkappa = np.diag(partial_small)
+            for j in range(i,self.Nout):
+                tmp = np.sum(partial[(ii==i)*(jj==j)])
+                partial_small[i,j] = tmp
+                partial_small[j,i] = tmp
 
+        dkappa = np.diag(partial_small)
         dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
 
         target += np.hstack([dW.flatten(),dkappa])
 
-    def dKdiag_dtheta_foo(self,partial,index,target):
+    def dKdiag_dtheta(self,partial,index,target):
         index = np.asarray(index,dtype=np.int).flatten()
         partial_small = np.zeros(self.Nout)
         for i in range(self.Nout):
@@ -82,7 +86,5 @@ class coregionalise(kernpart):
     def dK_dX(self,partial,X,X2,target):
         pass
 
-    def dKdiag_dtheta(self,partial,index,target):
-        self.dK_dtheta(np.diag(partial),index,index,target)
 
 

From af510d166a53b548ab12d7dfb60b6d87a1caafb5 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 7 Mar 2013 16:01:00 +0000
Subject: [PATCH 023/105] some changes to product_orthogonal

dKdiag_dX is now implemented, some of the cod eis a little tidier
---
 GPy/kern/product_orthogonal.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py
index 6b02b868..e35c927c 100644
--- a/GPy/kern/product_orthogonal.py
+++ b/GPy/kern/product_orthogonal.py
@@ -36,13 +36,13 @@ class product_orthogonal(kernpart):
     def _get_param_names(self):
         """return parameter names."""
         return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
-    
+
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
         if X2 is None: X2 = X
         target1 = np.zeros((X.shape[0],X2.shape[0]))
         target2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],target1)
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
         target += target1 * target2
 
@@ -51,21 +51,16 @@ class product_orthogonal(kernpart):
         if X2 is None: X2 = X
         K1 = np.zeros((X.shape[0],X2.shape[0]))
         K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
 
-        k1_target = np.zeros(self.k1.Nparam)
-        k2_target = np.zeros(self.k2.Nparam)
-        self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], k1_target)
-        self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], k2_target)
-
-        target[:self.k1.Nparam] += k1_target
-        target[self.k1.Nparam:] += k2_target
+        self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
-        target1 = np.zeros((X.shape[0],))
-        target2 = np.zeros((X.shape[0],))
+        target1 = np.zeros(X.shape[0])
+        target2 = np.zeros(X.shape[0])
         self.k1.Kdiag(X[:,:self.k1.D],target1)
         self.k2.Kdiag(X[:,self.k1.D:],target2)
         target += target1 * target2
@@ -89,5 +84,12 @@ class product_orthogonal(kernpart):
         self.k1.dK_dX(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
         self.k2.dK_dX(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
 
-    def dKdiag_dX(self,X,target):
-        pass
+    def dKdiag_dX(self, partial, X, target):
+        K1 = np.zeros(X.shape[0])
+        K2 = np.zeros(X.shape[0])
+        self.k1.Kdiag(X[:,0:self.k1.D],K1)
+        self.k2.Kdiag(X[:,self.k1.D:],K2)
+
+        self.k1.dK_dX(partial*K2, X[:,:self.k1.D], target)
+        self.k2.dK_dX(partial*K1, X[:,self.k1.D:], target)
+

From 24d705417418d152bc98e102b77d3afa7e79e694 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Fri, 8 Mar 2013 11:46:17 +0000
Subject: [PATCH 024/105] some small changes.

---
 GPy/models/__init__.py         |   1 +
 GPy/models/generalized_FITC.py | 162 +++++++++++++++++++++++++++++++++
 GPy/models/sparse_GP.py        |   7 +-
 3 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 GPy/models/generalized_FITC.py

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index c099d0d5..61591320 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -11,3 +11,4 @@ from warped_GP import warpedGP
 from sparse_GPLVM import sparse_GPLVM
 from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from BGPLVM import Bayesian_GPLVM
+from generalized_FITC import generalized_FITC
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
new file mode 100644
index 00000000..7e0c656e
--- /dev/null
+++ b/GPy/models/generalized_FITC.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+import pylab as pb
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv
+from ..util.plot import gpplot
+from scipy import linalg
+from .. import kern
+from sparse_GP import sparse_GP
+
+"""
+import numpy as np
+import pylab as pb
+from scipy import stats, linalg
+from .. import kern
+from ..core import model
+from ..util.linalg import pdinv,mdot
+from ..util.plot import gpplot
+#from ..inference.Expectation_Propagation import FITC
+from ..likelihoods.EP import FITC
+from ..likelihoods import likelihood,probit
+"""
+
+class generalized_FITC(sparse_GP):
+    def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
+    #def __init__(self, X, likelihood, kernel=None, inducing=10, epsilon_ep=1e-3, powerep=[1.,1.]):
+        """
+        Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
+
+        :param X: input observations
+        :param likelihood: Output's likelihood (likelihood class)
+        :param kernel: a GPy kernel
+        :param Z:  Either an array specifying the inducing points location or a scalar defining their number.
+        """
+
+        if type(Z) == int:
+            self.M = Z
+            self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
+        elif type(Z) == np.ndarray:
+            self.Z = Z
+            self.M = self.Z.shape[0]
+
+        self._precision = likelihood.precision
+
+        sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False)
+        self.scale_factor = 100.
+
+    def update_likelihood_approximation(self):
+        """
+        Approximates a non-gaussian likelihood using Expectation Propagation
+
+        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
+        this function does nothing
+        """
+        if self.has_uncertain_inputs:
+            raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
+        else:
+            self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
+            self._precision = self.likelihood.precision # Save the true precision
+            self.likelihood.precision = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
+            self._set_params(self._get_params()) # update the GP
+
+    def _set_params(self, p):
+        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
+        self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
+        self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
+        self._compute_kernel_matrices()
+        self._computations()
+        self._FITC_computations()
+
+    def _FITC_computations(self):
+        """
+        FITC approximation doesn't have the correction term in the log-likelihood bound,
+        but adds a diagonal term to the covariance matrix.
+        This function:
+            - computes the diagonal term
+            - eliminates the extra terms computed in the sparse_GP approximation
+            - computes the likelihood gradients wrt the true precision.
+        """
+        # Compute FITC's diagonal term of the covariance
+        sf = self.scale_factor
+        sf2 = sf**2
+        self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
+        self.Diag0 = self.psi0 - np.diag(self.Qnn)
+
+        self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
+        self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
+        self.RPT0 = np.dot(self.Lmi,self.psi1)
+        self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
+        self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
+        self.RPT = np.dot(self.R,self.P.T)
+        self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
+        self.w = self.Diag * self.likelihood.v_tilde
+        self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
+        self.mu = self.w + np.dot(self.P,self.gamma)
+        self.mu_tilde = (self.likelihood.v_tilde/self.likelihood.tau_tilde)[:,None]
+
+        # Remove extra term from dL_dpsi
+        self.dL_dpsi0 = np.zeros(self.N)
+        # Remove extra term from dL_dKmm
+        self.dL_dKmm = +0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
+        #the partial derivative vector for the likelihood with the true precision
+        if self.likelihood.Nparams ==0:
+            #save computation here
+            self.partial_for_likelihood = None
+        elif self.likelihood.is_heteroscedastic:
+            raise NotImplementedError, "heteroscedatic derivates not implemented"
+        else:
+            beta = self.likelihood._precision # NOTE the true precison is now '_precison' not 'precision'
+            dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
+            #dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
+            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
+            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
+            self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
+
+
+
+
+    def _raw_predict(self, Xnew, slices, full_cov=True):
+        """
+        Make a prediction for the vsGP model
+
+        Arguments
+        ---------
+        X : Input prediction data - Nx1 numpy array (floats)
+        """
+        Kx = self.kern.K(self.Z, Xnew)
+        #K_x = self.kernel.K(self.Z,X)
+        if full_cov:
+            Kxx = self.kern.K(Xnew)
+        else:
+            Kxx = self.kern.K(Xnew)#FIXME
+            #raise NotImplementedError
+            #Kxx = self.kern.Kdiag(Xnew)
+
+        # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
+
+        # Ci = I + (RPT0)Di(RPT0).T
+        # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
+        #   = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
+        #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
+        #   = I - V.T * V
+        U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
+        V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
+        C = np.eye(self.M) - np.dot(V.T,V)
+        mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
+        #self.C = C
+        #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
+        #self.mu_u = mu_u
+        #self.U = U
+        # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
+        mu_H = np.dot(mu_u,self.mu)
+        self.mu_H = mu_H
+        Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
+        # q(f_star|y) = N(f_star|mu_star,sigma2_star)
+        KR0T = np.dot(Kx.T,self.Lmi.T)
+        mu_star = np.dot(KR0T,mu_H)
+        sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
+        vdiag = np.diag(sigma2_star)
+        return mu_star[:,None],vdiag[:,None]
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 12cc1769..54eebd2f 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -72,7 +72,7 @@ class sparse_GP(GP):
             self.psi2 = None
 
     def _computations(self):
-        # TODO find routine to multiply triangular matrices
+        #TODO: find routine to multiply triangular matrices
         #TODO: slices for psi statistics (easy enough)
 
         sf = self.scale_factor
@@ -106,7 +106,7 @@ class sparse_GP(GP):
         self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
         self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
 
-        # Compute dL_dpsi # FIXME: this is untested for the het. case
+        # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
         self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
         if self.likelihood.is_heteroscedastic:
@@ -180,7 +180,8 @@ class sparse_GP(GP):
         if self.has_uncertain_inputs:
             raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
         else:
-            self.likelihood.fit_DTC(self.Kmm,self.psi1)
+            #self.likelihood.fit_DTC(self.Kmm,self.psi1)
+            self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
             self._set_params(self._get_params()) # update the GP
 
     def log_likelihood(self):

From f881e65761acfc3ee3ef8eae6db13bf00db14faa Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 8 Mar 2013 18:21:29 +0000
Subject: [PATCH 025/105] coregionalisation seems to be a go-go

---
 GPy/examples/regression.py     | 22 +++++++++++++++-------
 GPy/kern/coregionalise.py      |  6 +++---
 GPy/kern/product_orthogonal.py |  4 ++--
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 90fca886..d3442504 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -94,7 +94,8 @@ def coregionalisation_toy2():
     m.constrain_fixed('rbf_var',1.)
     m.constrain_positive('kappa')
     m.ensure_default_constraints()
-    m.optimize()
+    m.optimize('sim',max_f_eval=5000,messages=1)
+    #m.optimize()
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -129,7 +130,7 @@ def coregionalisation_toy():
     m.constrain_fixed('rbf_var',1.)
     m.constrain_positive('kappa')
     m.ensure_default_constraints()
-    m.optimize()
+    #m.optimize()
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -147,26 +148,29 @@ def coregionalisation_sparse():
     """
     A simple demonstration of coregionalisation on two sinusoidal functions
     """
-    X1 = np.random.rand(50,1)*8
-    X2 = np.random.rand(30,1)*5
+    X1 = np.random.rand(500,1)*8
+    X2 = np.random.rand(300,1)*5
     index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
     X = np.hstack((np.vstack((X1,X2)),index))
     Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
     Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
     Y = np.vstack((Y1,Y2))
 
-    Z = np.hstack((np.random.rand(25,1)*8,np.random.randint(0,2,25)[:,None]))
+    M = 40
+    Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
+    #Z = X.copy()
 
     k1 = GPy.kern.rbf(1)
-    k2 = GPy.kern.coregionalise(2,1)
+    k2 = GPy.kern.coregionalise(2,2)
     k = k1.prod_orthogonal(k2) + GPy.kern.white(2,0.001)
 
     m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z)
+    m.scale_factor = 10000.
     m.constrain_fixed('rbf_var',1.)
     m.constrain_positive('kappa')
     m.constrain_fixed('iip')
     m.ensure_default_constraints()
-    #m.optimize()
+    m.optimize_restarts(5,robust=True,messages=1)
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -177,6 +181,10 @@ def coregionalisation_sparse():
     GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
     pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
     pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
+    y = pb.ylim()[0]
+    pb.plot(Z[:,0][Z[:,1]==0],np.zeros(np.sum(Z[:,1]==0))+y,'r|',mew=2)
+    pb.plot(Z[:,0][Z[:,1]==1],np.zeros(np.sum(Z[:,1]==1))+y,'g|',mew=2)
+    print Z
     return m
 
 
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index f6b9426f..2a9177d5 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -64,13 +64,13 @@ class coregionalise(kernpart):
 
         partial_small = np.zeros_like(self.B)
         for i in range(self.Nout):
-            for j in range(i,self.Nout):
+            for j in range(self.Nout):
                 tmp = np.sum(partial[(ii==i)*(jj==j)])
                 partial_small[i,j] = tmp
-                partial_small[j,i] = tmp
 
         dkappa = np.diag(partial_small)
-        dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0)
+        partial_small += partial_small.T
+        dW = (self.W[:,None,:]*partial_small[:,:,None]).sum(0)
 
         target += np.hstack([dW.flatten(),dkappa])
 
diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py
index e35c927c..a231cf8b 100644
--- a/GPy/kern/product_orthogonal.py
+++ b/GPy/kern/product_orthogonal.py
@@ -40,8 +40,8 @@ class product_orthogonal(kernpart):
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
         if X2 is None: X2 = X
-        target1 = np.zeros((X.shape[0],X2.shape[0]))
-        target2 = np.zeros((X.shape[0],X2.shape[0]))
+        target1 = np.zeros_like(target)
+        target2 = np.zeros_like(target)
         self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
         target += target1 * target2

From ec748e2d6b9a20480ac51ed175527686444ecf56 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 10:33:29 +0000
Subject: [PATCH 026/105] all the product_orthogonal have been changed to
 prod_orthogonal for consistency

---
 GPy/kern/__init__.py                                 |  2 +-
 GPy/kern/constructors.py                             | 12 ++++++------
 GPy/kern/kern.py                                     |  8 ++++----
 GPy/kern/{product.py => prod.py}                     |  2 +-
 .../{product_orthogonal.py => prod_orthogonal.py}    |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)
 rename GPy/kern/{product.py => prod.py} (99%)
 rename GPy/kern/{product_orthogonal.py => prod_orthogonal.py} (99%)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 625f6080..132fad41 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric, coregionalise
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 9b58c282..b848821b 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -18,8 +18,8 @@ from Brownian import Brownian as Brownianpart
 from periodic_exponential import periodic_exponential as periodic_exponentialpart
 from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
 from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
-from product import product as productpart
-from product_orthogonal import product_orthogonal as product_orthogonalpart
+from prod import prod as prodpart
+from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
 from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
@@ -245,7 +245,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
     part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
     return kern(D, [part])
 
-def product(k1,k2):
+def prod(k1,k2):
     """
      Construct a product kernel over D from two kernels over D
 
@@ -253,10 +253,10 @@ def product(k1,k2):
     :type k1, k2: kernpart
     :rtype: kernel object
     """
-    part = productpart(k1,k2)
+    part = prodpart(k1,k2)
     return kern(k1.D, [part])
 
-def product_orthogonal(k1,k2):
+def prod_orthogonal(k1,k2):
     """
      Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
 
@@ -264,7 +264,7 @@ def product_orthogonal(k1,k2):
     :type k1, k2: kernpart
     :rtype: kernel object
     """
-    part = product_orthogonalpart(k1,k2)
+    part = prod_orthogonalpart(k1,k2)
     return kern(k1.D+k2.D, [part])
 
 def symmetric(k):
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 99ad46ea..639ab5e9 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -7,8 +7,8 @@ import pylab as pb
 from ..core.parameterised import parameterised
 from kernpart import kernpart
 import itertools
-from product_orthogonal import product_orthogonal
-from product import product
+from prod_orthogonal import prod_orthogonal
+from prod import prod
 
 class kern(parameterised):
     def __init__(self,D,parts=[], input_slices=None):
@@ -161,7 +161,7 @@ class kern(parameterised):
         K1 = self.copy()
         K2 = other.copy()
 
-        newkernparts = [product(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [prod(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
 
         slices = []
         for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@@ -183,7 +183,7 @@ class kern(parameterised):
         K1 = self.copy()
         K2 = other.copy()
 
-        newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [prod_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
 
         slices = []
         for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
diff --git a/GPy/kern/product.py b/GPy/kern/prod.py
similarity index 99%
rename from GPy/kern/product.py
rename to GPy/kern/prod.py
index 92522418..218a33df 100644
--- a/GPy/kern/product.py
+++ b/GPy/kern/prod.py
@@ -6,7 +6,7 @@ import numpy as np
 import hashlib
 #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
 
-class product(kernpart):
+class prod(kernpart):
     """
     Computes the product of 2 kernels that are defined on the same space
 
diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/prod_orthogonal.py
similarity index 99%
rename from GPy/kern/product_orthogonal.py
rename to GPy/kern/prod_orthogonal.py
index a231cf8b..12b6629f 100644
--- a/GPy/kern/product_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -6,7 +6,7 @@ import numpy as np
 import hashlib
 #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
 
-class product_orthogonal(kernpart):
+class prod_orthogonal(kernpart):
     """
     Computes the product of 2 kernels
 

From 393662b05d00b4468094807ba20243e44f17530e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 10:43:17 +0000
Subject: [PATCH 027/105] sometidying of the psi statistic  cross terms

---
 GPy/examples/oil_flow_demo.py |  2 +-
 GPy/kern/kern.py              | 78 +++++++++++++----------------------
 2 files changed, 29 insertions(+), 51 deletions(-)

diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py
index 71fb1bd3..1e9f4f5a 100644
--- a/GPy/examples/oil_flow_demo.py
+++ b/GPy/examples/oil_flow_demo.py
@@ -41,7 +41,7 @@ m.constrain_positive('(rbf|bias|S|linear|white|noise)')
 # m.unconstrain('white')
 # m.constrain_bounded('white', 1e-6, 10.0)
 # plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
-m.optimize(messages = True)
+#m.optimize(messages = True)
 # m.optimize('tnc', messages = True)
 # plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
 # # pb.figure()
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 99ad46ea..dd121a00 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -371,16 +371,17 @@ class kern(parameterised):
 
     def psi2(self,Z,mu,S,slices1=None,slices2=None):
         """
-        :Z: np.ndarray of inducing inputs (M x Q)
-        : mu, S: np.ndarrays of means and variacnes (each N x Q)
-        :returns psi2: np.ndarray (N,M,M,Q) """
+        :param Z: np.ndarray of inducing inputs (M x Q)
+        :param mu, S: np.ndarrays of means and variances (each N x Q)
+        :returns psi2: np.ndarray (N,M,M)
+        """
         target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
         slices1, slices2 = self._process_slices(slices1,slices2)
         [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
         #compute the "cross" terms
         for p1, p2 in itertools.combinations(self.parts,2):
-            #white doesn;t compine with anything
+            #white doesn;t combine with anything
             if p1.name=='white' or p2.name=='white':
                 pass
             #rbf X bias
@@ -396,28 +397,9 @@ class kern(parameterised):
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
-
-
-
-
-        # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
-        # not really expensive, since it's just a matrix of zeroes.
-        # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
-        # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
-
-        crossterms = 0.0
-        # for 3 kernels this returns something like
-        # [(0,1), (0,2), (1,2)]
-        # in theory, we should also account for (1,0), (2,0) and so on, but
-        # the transpose deals exactly with that
-        # for a,b in itertools.combinations(psi1_matrices, 2):
-        #     tmp = np.multiply(a,b)
-        #     crossterms += tmp[:,None,:] + tmp[:, :,None]
-
-        return target + crossterms
+        return target
 
     def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
-        """Returns shape (N,M,M,Ntheta)"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros(self.Nparam)
         [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
@@ -429,7 +411,7 @@ class kern(parameterised):
             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
             ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
 
-            #white doesn;t compine with anything
+            #white doesn;t combine with anything
             if p1.name=='white' or p2.name=='white':
                 pass
             #rbf X bias
@@ -447,26 +429,6 @@ class kern(parameterised):
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
-        # # "crossterms"
-        # # 1. get all the psi1 statistics
-        # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
-        # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
-
-        # partial1 = np.ones_like(partial1)
-        # # 2. get all the dpsi1/dtheta gradients
-        # psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
-        # [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
-
-
-        # # 3. multiply them somehow
-        # for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
-
-        #     tmp = (psi1_gradients[a][None, None] * psi1_matrices[b][:,:, None])
-        #     # target += (tmp[None] + tmp[:,None]).sum(0).sum(0).sum(0)
-        #     # gne = (psi1_gradients[a].sum()*psi1_matrices[b].sum())
-        #     # target += gne
-        #     #target += (gne[None] + gne[:, None]).sum(0)
-        #     target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0)
         return self._transform_gradients(target)
 
     def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
@@ -475,16 +437,15 @@ class kern(parameterised):
         [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
         #compute the "cross" terms
-        #TODO: slices (need to iterate around the input slices also...)
         for p1, p2 in itertools.combinations(self.parts,2):
-            #white doesn;t compine with anything
+            #white doesn;t combine with anything
             if p1.name=='white' or p2.name=='white':
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S)
+                target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S,target)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S)
+                target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S,target)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -502,7 +463,24 @@ class kern(parameterised):
         target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
         [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
-        #TODO: there are some extra terms to compute here!
+        #compute the "cross" terms
+        for p1, p2 in itertools.combinations(self.parts,2):
+            #white doesn;t combine with anything
+            if p1.name=='white' or p2.name=='white':
+                pass
+            #rbf X bias
+            elif p1.name=='bias' and p2.name=='rbf':
+                target += p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
+            elif p2.name=='bias' and p1.name=='rbf':
+                target += p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
+            #rbf X linear
+            elif p1.name=='linear' and p2.name=='rbf':
+                raise NotImplementedError #TODO
+            elif p2.name=='linear' and p1.name=='rbf':
+                raise NotImplementedError #TODO
+            else:
+                raise NotImplementedError, "psi2 cannot be computed for this kernel"
+
         return target_mu, target_S
 
     def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):

From 4d355d823ffe33023e8eb05df5d65f27d1742a6c Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 10:45:24 +0000
Subject: [PATCH 028/105] removed log_likelihood_gradients_transformed, now
 everything is done in the objective functions

---
 GPy/core/model.py | 63 ++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index b6cedbaf..703e615d 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -121,9 +121,6 @@ class model(parameterised):
         else:
             raise AttributeError, "no parameter matches %s"%name
 
-
-
-
     def log_prior(self):
         """evaluate the prior"""
         return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
@@ -135,12 +132,11 @@ class model(parameterised):
         [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
         return ret
 
-    def _log_likelihood_gradients_transformed(self):
+    def _transform_gradients(self, g):
         """
-        Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
-        Adjust the gradient for constraints and ties, return.
+        Takes a list of gradients and return an array of transformed gradients (positive/negative/tied/and so on)
         """
-        g = self._log_likelihood_gradients() + self._log_prior_gradients()
+
         x = self._get_params()
         g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
         g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
@@ -152,6 +148,7 @@ class model(parameterised):
         else:
             return g
 
+
     def randomize(self):
         """
         Randomize the model.
@@ -241,6 +238,27 @@ class model(parameterised):
                         print "Warning! constraining %s postive"%name
 
 
+    def objective_function(self, x):
+        """
+        The objective function passed to the optimizer. It combines the likelihood and the priors.
+        """
+        self._set_params_transformed(x)
+        return -self.log_likelihood() - self.log_prior()
+
+    def objective_function_gradients(self, x):
+        """
+        Gets the gradients from the likelihood and the priors.
+        """
+        self._set_params_transformed(x)
+        LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
+        prior_gradients = self._transform_gradients(self._log_prior_gradients())
+        return -LL_gradients - prior_gradients
+
+    def objective_and_gradients(self, x):
+        obj_f = self.objective_function(x)
+        obj_grads = self.objective_function_gradients(x)
+        return obj_f, obj_grads
+
     def optimize(self, optimizer=None, start=None, **kwargs):
         """
         Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
@@ -254,22 +272,12 @@ class model(parameterised):
         if optimizer is None:
             optimizer = self.preferred_optimizer
 
-        def f(x):
-            self._set_params_transformed(x)
-            return -self.log_likelihood()-self.log_prior()
-        def fp(x):
-            self._set_params_transformed(x)
-            return -self._log_likelihood_gradients_transformed()
-        def f_fp(x):
-            self._set_params_transformed(x)
-            return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
-
         if start == None:
             start = self._get_params_transformed()
 
         optimizer = optimization.get_optimizer(optimizer)
         opt = optimizer(start, model = self, **kwargs)
-        opt.run(f_fp=f_fp, f=f, fp=fp)
+        opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
         self.optimization_runs.append(opt)
 
         self._set_params_transformed(opt.x_opt)
@@ -357,12 +365,9 @@ class model(parameterised):
             dx = step*np.sign(np.random.uniform(-1,1,x.size))
 
             #evaulate around the point x
-            self._set_params_transformed(x+dx)
-            f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-            self._set_params_transformed(x-dx)
-            f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-            self._set_params_transformed(x)
-            gradient = self._log_likelihood_gradients_transformed()
+            f1, g1 = self.objective_and_gradients(x+dx)
+            f2, g2 = self.objective_and_gradients(x-dx)
+            gradient = self.objective_function_gradients(x)
 
             numerical_gradient = (f1-f2)/(2*dx)
             global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
@@ -398,14 +403,10 @@ class model(parameterised):
             for i in param_list:
                 xx = x.copy()
                 xx[i] += step
-                self._set_params_transformed(xx)
-                f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
+                f1, g1 = self.objective_and_gradients(xx)
                 xx[i] -= 2.*step
-                self._set_params_transformed(xx)
-                f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
-                self._set_params_transformed(x)
-                gradient = self._log_likelihood_gradients_transformed()[i]
-
+                f2, g2 = self.objective_and_gradients(xx)
+                gradient = self.objective_function_gradients(x)[i]
 
                 numerical_gradient = (f1-f2)/(2*step)
                 ratio = (f1-f2)/(2*step*gradient)

From b39de379fd5d403056ebc8f04225386bfa72a565 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:04:11 +0000
Subject: [PATCH 029/105] added tutorial in examples

---
 GPy/examples/tuto_GP_regression.py   |  56 +++++++++++
 GPy/examples/tuto_kernel_overview.py | 139 +++++++++++++++++++++++++++
 doc/tuto_GP_regression.rst           |   2 +-
 doc/tuto_kernel_overview.rst         |   1 +
 4 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 GPy/examples/tuto_GP_regression.py
 create mode 100644 GPy/examples/tuto_kernel_overview.py

diff --git a/GPy/examples/tuto_GP_regression.py b/GPy/examples/tuto_GP_regression.py
new file mode 100644
index 00000000..b3953de0
--- /dev/null
+++ b/GPy/examples/tuto_GP_regression.py
@@ -0,0 +1,56 @@
+# The detailed explanations of the commands used in this file can be found in the tutorial section
+
+import pylab as pb
+pb.ion()
+import numpy as np
+import GPy
+
+X = np.random.uniform(-3.,3.,(20,1))
+Y = np.sin(X) + np.random.randn(20,1)*0.05
+
+kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+
+m = GPy.models.GP_regression(X,Y,kernel)
+
+print m
+m.plot()
+
+m.constrain_positive('')
+
+m.unconstrain('')                            # Required to remove the previous constrains
+m.constrain_positive('rbf_variance')
+m.constrain_bounded('lengthscale',1.,10. )
+m.constrain_fixed('noise',0.0025)
+
+m.optimize()
+
+m.optimize_restarts(Nrestarts = 10)
+
+###########################
+#  2-dimensional example  #
+###########################
+
+import pylab as pb
+pb.ion()
+import numpy as np
+import GPy
+
+# sample inputs and outputs
+X = np.random.uniform(-3.,3.,(50,2))
+Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
+
+# define kernel
+ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+
+# create simple GP model
+m = GPy.models.GP_regression(X,Y,ker)
+
+# contrain all parameters to be positive
+m.constrain_positive('')
+
+# optimize and plot
+pb.figure()
+m.optimize('tnc', max_f_eval = 1000)
+
+m.plot()
+print(m)
diff --git a/GPy/examples/tuto_kernel_overview.py b/GPy/examples/tuto_kernel_overview.py
new file mode 100644
index 00000000..ebd19d76
--- /dev/null
+++ b/GPy/examples/tuto_kernel_overview.py
@@ -0,0 +1,139 @@
+# The detailed explanations of the commands used in this file can be found in the tutorial section
+
+import pylab as pb
+import numpy as np
+import GPy
+pb.ion()
+
+ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
+ker3 = GPy.kern.rbf(1, .5, .5)
+
+print ker2
+ker1.plot()
+ker2.plot()
+ker3.plot()
+
+k1 = GPy.kern.rbf(1,1.,2.)
+k2 = GPy.kern.Matern32(1, 0.5, 0.2)
+
+# Product of kernels
+k_prod = k1.prod(k2)
+k_prodorth = k1.prod_orthogonal(k2)
+
+# Sum of kernels
+k_add = k1.add(k2)
+k_addorth = k1.add_orthogonal(k2)    
+
+pb.figure(figsize=(8,8))
+pb.subplot(2,2,1)
+k_prod.plot()
+pb.title('prod')
+pb.subplot(2,2,2)
+k_prodorth.plot()
+pb.title('prod_orthogonal')
+pb.subplot(2,2,3)
+k_add.plot()
+pb.title('add')
+pb.subplot(2,2,4)
+k_addorth.plot()
+pb.title('add_orthogonal')
+pb.subplots_adjust(wspace=0.3, hspace=0.3)
+
+k1 = GPy.kern.rbf(1,1.,2)
+k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
+
+k = k1 * k2  # equivalent to k = k1.prod(k2)
+print k
+
+# Simulate sample paths
+X = np.linspace(-5,5,501)[:,None]
+Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
+
+# plot
+pb.figure(figsize=(10,4))
+pb.subplot(1,2,1)
+k.plot()
+pb.subplot(1,2,2)
+pb.plot(X,Y.T)
+pb.ylabel("Sample path")
+pb.subplots_adjust(wspace=0.3)
+
+k = (k1+k2)*(k1+k2)
+print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
+
+k1 = GPy.kern.rbf(1)
+k2 = GPy.kern.Matern32(1)
+k3 = GPy.kern.white(1)
+
+k = k1 + k2 + k3
+print k
+
+k.constrain_positive('var')
+k.constrain_fixed(np.array([1]),1.75)
+k.tie_param('len')
+k.unconstrain('white')
+k.constrain_bounded('white',lower=1e-5,upper=.5)
+print k
+
+k_cst = GPy.kern.bias(1,variance=1.)
+k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
+Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
+print Kanova
+
+# sample inputs and outputs
+X = np.random.uniform(-3.,3.,(40,2))
+Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
+
+# Create GP regression model
+m = GPy.models.GP_regression(X,Y,Kanova)
+pb.figure(figsize=(5,5))
+m.plot()
+
+pb.figure(figsize=(20,3))
+pb.subplots_adjust(wspace=0.5)
+pb.subplot(1,5,1)
+m.plot()
+pb.subplot(1,5,2)
+pb.ylabel("=   ",rotation='horizontal',fontsize='30')
+pb.subplot(1,5,3)
+m.plot(which_functions=[False,True,False,False])
+pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
+pb.subplot(1,5,4)
+m.plot(which_functions=[False,False,True,False])
+pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+pb.subplot(1,5,5)
+pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+m.plot(which_functions=[False,False,False,True])
+
+import pylab as pb
+import numpy as np
+import GPy
+pb.ion()
+
+ker1 = GPy.kern.rbf(D=1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
+ker3 = GPy.kern.rbf(1, .5, .25)
+
+ker1.plot()
+ker2.plot()
+ker3.plot()
+#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
+
+kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1),  GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
+kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
+
+pb.figure(figsize=(16,12))
+pb.subplots_adjust(wspace=.5, hspace=.5)
+for i, kern in enumerate(kernels):
+   pb.subplot(3,4,i+1)
+   kern.plot(x=7.5,plot_limits=[0.00001,15.])
+   pb.title(kernel_names[i]+ '\n')
+
+# actual plot for the noise
+i = 11
+X = np.linspace(0.,15.,201)
+WN = 0*X
+WN[100] = 1.
+pb.subplot(3,4,i+1)
+pb.plot(X,WN,'b')
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 92b25bc0..9de79a8c 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -2,7 +2,7 @@
 Gaussian process regression tutorial
 *************************************
 
-We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model.
+We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_GP_regression.py.
 
 We first import the libraries we will need: ::
 
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index a8f5b53d..6ab439b6 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -2,6 +2,7 @@
 ****************************
 tutorial : A kernel overview
 ****************************
+The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_kernel_overview.py.
 
 First we import the libraries we will need ::
 

From 1d98d0a718159d37cddd58c106eb802251ffc8f4 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 11:06:08 +0000
Subject: [PATCH 030/105] FIxed a transpose bug in sparse_GPLVM

---
 GPy/models/sparse_GPLVM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index fe7c1c43..542fbe0e 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -43,7 +43,7 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
 
     def dL_dX(self):
         dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
-        dL_dX += self.kern.dK_dX(self.dL_dpsi1,self.X,self.Z)
+        dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
 
         return dL_dX
 

From cd7539a4292aa2abe3c3faff6933bc1ac8c8e500 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 11:08:53 +0000
Subject: [PATCH 031/105] added simple gplvm_tests

---
 GPy/testing/gplvm_tests.py | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 GPy/testing/gplvm_tests.py

diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py
new file mode 100644
index 00000000..51828768
--- /dev/null
+++ b/GPy/testing/gplvm_tests.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2012, Nicolo Fusi
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class GPLVMTests(unittest.TestCase):
+    def test_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_linear_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_rbf_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()

From 7d4e568d7b10e36207982ae5a78c35777519c3c9 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 11:11:42 +0000
Subject: [PATCH 032/105] added sparse_gplvm_tests -- they fail

---
 GPy/testing/sparse_gplvm_tests.py | 47 +++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 GPy/testing/sparse_gplvm_tests.py

diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py
new file mode 100644
index 00000000..72bb5bf8
--- /dev/null
+++ b/GPy/testing/sparse_gplvm_tests.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2012, Nicolo Fusi, James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class sparse_GPLVMTests(unittest.TestCase):
+    def test_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_linear_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_rbf_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        m.ensure_default_constraints()
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()

From c561867b3cbd9833ca81725d02af2e79e3474382 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 11:19:20 +0000
Subject: [PATCH 033/105] skipping a test known to fail (linear sparse)

---
 GPy/testing/sparse_gplvm_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py
index 72bb5bf8..35fa4fcf 100644
--- a/GPy/testing/sparse_gplvm_tests.py
+++ b/GPy/testing/sparse_gplvm_tests.py
@@ -18,6 +18,7 @@ class sparse_GPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
+    @unittest.skip('linear kernels do not have dKdiag_dX')
     def test_linear_kern(self):
         N, M, Q, D = 10, 3, 2, 4
         X = np.random.rand(N, Q)

From 3950347e3f61d8f645e637bb3dffc2a953593628 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:32:56 +0000
Subject: [PATCH 034/105] Draft of documentation for implemented kernels

---
 doc/kernel_implementation.rst | 9 +++++++++
 doc/tuto_kernel_overview.rst  | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 doc/kernel_implementation.rst

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
new file mode 100644
index 00000000..e98c33e2
--- /dev/null
+++ b/doc/kernel_implementation.rst
@@ -0,0 +1,9 @@
+
+***************************
+List of implemented kernels
+***************************
+
+======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== ======
+ NAME     get/set    K    Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
+======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
+rbf     \checkmark
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index 6ab439b6..c420943b 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -39,7 +39,7 @@ return::
 Implemented kernels
 ===================
 
-Many kernels are already implemented in GPy. Here is a summary of most of them:
+Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ . The following figure gives a summary of most of them:
 
 .. figure::  Figures/tuto_kern_overview_allkern.png
     :align:  center

From c20788c893b54580a64255721a55c4542ca43d40 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:36:53 +0000
Subject: [PATCH 035/105] Draft of documentation for implemented kernels

---
 doc/kernel_implementation.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index e98c33e2..327c001e 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -3,7 +3,8 @@
 List of implemented kernels
 ***************************
 
-======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== ======
+======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
  NAME     get/set    K    Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
 ======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
-rbf     \checkmark
+rbf     \\checkmark   y
+======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======

From 0ade786385a97f6c1fda221080430c7676609a0c Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:39:48 +0000
Subject: [PATCH 036/105] Plot function moved to GP model

---
 GPy/models/sparse_GP.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 54eebd2f..ff00faea 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -180,8 +180,8 @@ class sparse_GP(GP):
         if self.has_uncertain_inputs:
             raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
         else:
-            #self.likelihood.fit_DTC(self.Kmm,self.psi1)
-            self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
+            self.likelihood.fit_DTC(self.Kmm,self.psi1)
+            #self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
             self._set_params(self._get_params()) # update the GP
 
     def log_likelihood(self):
@@ -240,14 +240,3 @@ class sparse_GP(GP):
             var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
 
         return mu,var[:,None]
-
-    def plot(self, *args, **kwargs):
-        """
-        Plot the fitted model: just call the GP plot function and then add inducing inputs
-        """
-        GP.plot(self,*args,**kwargs)
-        if self.Q==1:
-            if self.has_uncertain_inputs:
-                pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
-        if self.Q==2:
-            pb.plot(self.Z[:,0],self.Z[:,1],'wo')

From 5dbc5bdb6e283d5705c4c376aa662119bff1e808 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:40:17 +0000
Subject: [PATCH 037/105] Plotting functions for sparse_GP added

---
 GPy/models/GP.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 08ac1bb1..11cd174d 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -269,6 +269,8 @@ class GP(model):
             if hasattr(self,'Z'):
                 Zu = self.Z*self._Xstd + self._Xmean
                 pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
+                if self.has_uncertain_inputs:
+                    pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
 
         elif self.X.shape[1]==2: #FIXME
             resolution = resolution or 50
@@ -281,5 +283,8 @@ class GP(model):
             pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
             pb.xlim(xmin[0],xmax[0])
             pb.ylim(xmin[1],xmax[1])
+            if hasattr(self,'Z'):
+                pb.plot(self.Z[:,0],self.Z[:,1],'wo')
+
         else:
             raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

From 1ddc05925178857bd3e3354c1d2b99a65abdde33 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:41:22 +0000
Subject: [PATCH 038/105] Test for EP_DTC added

---
 GPy/testing/unit_tests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 90037dcb..5ec1d766 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -180,7 +180,6 @@ class GradientTests(unittest.TestCase):
         m.update_likelihood_approximation()
         self.assertTrue(m.checkgrad())
 
-
     @unittest.skip("FITC will be broken for a while")
     def test_generalized_FITC(self):
         N = 20

From addb5da4e439f76b13a47a53a30ba35b6fe51bee Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:41:46 +0000
Subject: [PATCH 039/105] Irrelevant changes

---
 GPy/examples/classification.py |  22 +++----
 GPy/examples/sparse_ep_fix.py  | 113 +++++++++++++++++++++------------
 2 files changed, 82 insertions(+), 53 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 592299d8..031cc915 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -11,7 +11,7 @@ import GPy
 
 default_seed=10000
 
-def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
+def crescent_data(seed=default_seed): #FIXME
     """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
     :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@@ -31,11 +31,8 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
     likelihood = GPy.likelihoods.EP(data['Y'],distribution)
 
 
-    if model_type=='Full':
-        m = GPy.models.GP(data['X'],likelihood,kernel)
-    else:
-        # create sparse GP EP model
-        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
+    m = GPy.models.GP(data['X'],likelihood,kernel)
+    m.ensure_default_constraints()
 
     m.update_likelihood_approximation()
     print(m)
@@ -94,16 +91,13 @@ def toy_linear_1d_classification(seed=default_seed):
 
     # Model definition
     m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
+    m.ensure_default_constraints()
 
     # Optimize
-    """
-    EPEM runs a loop that consists of two steps:
-    1) EP likelihood approximation:
-        m.update_likelihood_approximation()
-    2) Parameters optimization:
-        m.optimize()
-    """
-    m.EPEM()
+    m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize()
+    #m.EPEM() #FIXME
 
     # Plot
     pb.subplot(211)
diff --git a/GPy/examples/sparse_ep_fix.py b/GPy/examples/sparse_ep_fix.py
index defcb4eb..acbd506c 100644
--- a/GPy/examples/sparse_ep_fix.py
+++ b/GPy/examples/sparse_ep_fix.py
@@ -10,51 +10,86 @@ import pylab as pb
 import numpy as np
 import GPy
 np.random.seed(2)
-pb.ion()
 N = 500
 M = 5
 
-pb.close('all')
-######################################
-## 1 dimensional example
+default_seed=10000
 
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(N,1))
-#Y = np.sin(X)+np.random.randn(N,1)*0.05
-F = np.sin(X)+np.random.randn(N,1)*0.05
-Y = np.ones([F.shape[0],1])
-Y[F<0] = -1
-likelihood = GPy.inference.likelihoods.probit(Y)
+def crescent_data(inducing=10, seed=default_seed):
+    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
-# construct kernel
-rbf =  GPy.kern.rbf(1)
-noise = GPy.kern.white(1)
-kernel = rbf + noise
+    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+    :param seed : seed value for data generation.
+    :type seed: int
+    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
+    :type inducing: int
+    """
 
-# create simple GP model
-#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
+    data = GPy.util.datasets.crescent_data(seed=seed)
 
-# contrain all parameters to be positive
-#m.constrain_fixed('prec',100.)
-m = GPy.models.sparse_GP(X, Y, kernel, M=M)
-m.ensure_default_constraints()
-#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
-#    m.approximate_likelihood()
-print m.checkgrad()
-m.optimize('tnc', messages = 1)
-m.plot(samples=3)
-print m
+    # Kernel object
+    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
 
-n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
-n.ensure_default_constraints()
-if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
-    n.approximate_likelihood()
-print n.checkgrad()
-pb.figure()
-n.plot()
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
+
+    sample = np.random.randint(0,data['X'].shape[0],inducing)
+    Z = data['X'][sample,:]
+    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
+
+    # create sparse GP EP model
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+    m.ensure_default_constraints()
+
+    m.update_likelihood_approximation()
+    print(m)
+
+    # optimize
+    m.optimize()
+    print(m)
+
+    # plot
+    m.plot()
+    return m
+
+
+def toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
+    Y = data['Y'][:, 0:1]
+    Y[Y == -1] = 0
+
+    # Kernel object
+    kernel = GPy.kern.rbf(1)
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
+
+    Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
+
+    # Model definition
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+
+    m.ensure_default_constraints()
+    # Optimize
+    m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize()
+    #m.EPEM() #FIXME
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f()
+    pb.subplot(212)
+    m.plot()
+    print(m)
+
+    return m
 
-"""
-m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
-m.ensure_default_constraints()
-print m.checkgrad()
-"""

From 9126c1086601bf128a8bf4842d45ce9829b2ce50 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:42:30 +0000
Subject: [PATCH 040/105] Removed generalized_FITC.py

---
 GPy/models/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 61591320..c099d0d5 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -11,4 +11,3 @@ from warped_GP import warpedGP
 from sparse_GPLVM import sparse_GPLVM
 from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from BGPLVM import Bayesian_GPLVM
-from generalized_FITC import generalized_FITC

From 3f88381dc9ed2cdce3e46bb31a81c1892edc552c Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:43:04 +0000
Subject: [PATCH 041/105] generalized_FITC removed

---
 GPy/models/generalized_FITC.py | 162 ---------------------------------
 1 file changed, 162 deletions(-)
 delete mode 100644 GPy/models/generalized_FITC.py

diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
deleted file mode 100644
index 7e0c656e..00000000
--- a/GPy/models/generalized_FITC.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
-from ..util.plot import gpplot
-from scipy import linalg
-from .. import kern
-from sparse_GP import sparse_GP
-
-"""
-import numpy as np
-import pylab as pb
-from scipy import stats, linalg
-from .. import kern
-from ..core import model
-from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot
-#from ..inference.Expectation_Propagation import FITC
-from ..likelihoods.EP import FITC
-from ..likelihoods import likelihood,probit
-"""
-
-class generalized_FITC(sparse_GP):
-    def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
-    #def __init__(self, X, likelihood, kernel=None, inducing=10, epsilon_ep=1e-3, powerep=[1.,1.]):
-        """
-        Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
-
-        :param X: input observations
-        :param likelihood: Output's likelihood (likelihood class)
-        :param kernel: a GPy kernel
-        :param Z:  Either an array specifying the inducing points location or a scalar defining their number.
-        """
-
-        if type(Z) == int:
-            self.M = Z
-            self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
-        elif type(Z) == np.ndarray:
-            self.Z = Z
-            self.M = self.Z.shape[0]
-
-        self._precision = likelihood.precision
-
-        sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False)
-        self.scale_factor = 100.
-
-    def update_likelihood_approximation(self):
-        """
-        Approximates a non-gaussian likelihood using Expectation Propagation
-
-        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
-        this function does nothing
-        """
-        if self.has_uncertain_inputs:
-            raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
-        else:
-            self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
-            self._precision = self.likelihood.precision # Save the true precision
-            self.likelihood.precision = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
-            self._set_params(self._get_params()) # update the GP
-
-    def _set_params(self, p):
-        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
-        self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
-        self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
-        self._compute_kernel_matrices()
-        self._computations()
-        self._FITC_computations()
-
-    def _FITC_computations(self):
-        """
-        FITC approximation doesn't have the correction term in the log-likelihood bound,
-        but adds a diagonal term to the covariance matrix.
-        This function:
-            - computes the diagonal term
-            - eliminates the extra terms computed in the sparse_GP approximation
-            - computes the likelihood gradients wrt the true precision.
-        """
-        # Compute FITC's diagonal term of the covariance
-        sf = self.scale_factor
-        sf2 = sf**2
-        self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
-        self.Diag0 = self.psi0 - np.diag(self.Qnn)
-
-        self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
-        self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
-        self.RPT0 = np.dot(self.Lmi,self.psi1)
-        self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
-        self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
-        self.RPT = np.dot(self.R,self.P.T)
-        self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
-        self.w = self.Diag * self.likelihood.v_tilde
-        self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
-        self.mu = self.w + np.dot(self.P,self.gamma)
-        self.mu_tilde = (self.likelihood.v_tilde/self.likelihood.tau_tilde)[:,None]
-
-        # Remove extra term from dL_dpsi
-        self.dL_dpsi0 = np.zeros(self.N)
-        # Remove extra term from dL_dKmm
-        self.dL_dKmm = +0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
-        #the partial derivative vector for the likelihood with the true precision
-        if self.likelihood.Nparams ==0:
-            #save computation here
-            self.partial_for_likelihood = None
-        elif self.likelihood.is_heteroscedastic:
-            raise NotImplementedError, "heteroscedatic derivates not implemented"
-        else:
-            beta = self.likelihood._precision # NOTE the true precison is now '_precison' not 'precision'
-            dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
-            #dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
-            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
-            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
-            self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
-
-
-
-
-    def _raw_predict(self, Xnew, slices, full_cov=True):
-        """
-        Make a prediction for the vsGP model
-
-        Arguments
-        ---------
-        X : Input prediction data - Nx1 numpy array (floats)
-        """
-        Kx = self.kern.K(self.Z, Xnew)
-        #K_x = self.kernel.K(self.Z,X)
-        if full_cov:
-            Kxx = self.kern.K(Xnew)
-        else:
-            Kxx = self.kern.K(Xnew)#FIXME
-            #raise NotImplementedError
-            #Kxx = self.kern.Kdiag(Xnew)
-
-        # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
-
-        # Ci = I + (RPT0)Di(RPT0).T
-        # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
-        #   = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
-        #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
-        #   = I - V.T * V
-        U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
-        V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
-        C = np.eye(self.M) - np.dot(V.T,V)
-        mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
-        #self.C = C
-        #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
-        #self.mu_u = mu_u
-        #self.U = U
-        # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
-        mu_H = np.dot(mu_u,self.mu)
-        self.mu_H = mu_H
-        Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
-        # q(f_star|y) = N(f_star|mu_star,sigma2_star)
-        KR0T = np.dot(Kx.T,self.Lmi.T)
-        mu_star = np.dot(KR0T,mu_H)
-        sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
-        vdiag = np.diag(sigma2_star)
-        return mu_star[:,None],vdiag[:,None]

From e9c84484c02bf16f8255fb47c42148502866bf0f Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 11:45:58 +0000
Subject: [PATCH 042/105] Draft of documentation for implemented kernels

---
 doc/kernel_implementation.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 327c001e..57b37c8e 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -3,8 +3,15 @@
 List of implemented kernels
 ***************************
 
+The :math:`\checkmark` symbol represents the functions that have been implemented for each kernel.
+
+..  |tick|
+
+..  |tick| image:: tick.png
+
+
 ======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
  NAME     get/set    K    Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
 ======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
-rbf     \\checkmark   y
+rbf     \\checkmark   y  
 ======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======

From 79ad72c46aedfea11bd60b82bfc667b8776c872e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 11:51:21 +0000
Subject: [PATCH 043/105] added the outline of a tutorial on 'interacting with
 models'

---
 doc/tuto_interacting_with_models.rst | 60 ++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 doc/tuto_interacting_with_models.rst

diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
new file mode 100644
index 00000000..370ffd95
--- /dev/null
+++ b/doc/tuto_interacting_with_models.rst
@@ -0,0 +1,60 @@
+*************************************
+Interacting with models
+*************************************
+
+The GPy model class has a set of features which are designed to make it simple to explore the parameter space of the model. By default, the scipy optimisers are used to fit GPy models (via model.optimize()), for which we provide mechanisms for 'free' optimisation: GPy can ensure that naturally positive parameters (such as variances) remain positive. But these mechanisms are much more powerful than simple reparameterisation, as we shall see. 
+
+All of the examples included in GPy return an instance of a model class. We'll use GPy.examples.?? as an example::
+
+    import pylab as pb
+    pb.ion()
+    import GPy
+    m = GPy.examples.??
+
+Examining the model using print
+===============================
+To see the current state of the model parameters, and the model's (marginal) likelihood just print the model::
+    print m
+
+?? output
+
+Getting the model's likelihood and gradients
+===========================================
+foobar
+
+Setting and fetching parameters by name
+=======================================
+foobar
+
+Constraining and optimising the model
+=====================================
+A simple task in GPy is to ensure that the models' variances remain positive during optimisation. the models class has a function called constrain_positive(), which accepts a regex string as above. To constrain the models' variance to be positive::
+    m.constrain_positive('variance')
+    print m
+
+Now we see that the variance of the model is constrained to be postive. GPy handles the effective change of gradients: see how m.objective_gradients has changed approriately
+
+
+For convenience, we also provide a catch all function which ensures that anything which appears to require positivity is constrianed appropriately::
+    m.ensure_default_constraints()
+
+
+Fixing parameters
+=================
+
+
+Tying Parameters
+================
+
+Bounding parameters
+===================
+
+
+Further Reading
+===============
+All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models. 
+
+By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+
+
+

From 12d6f5056bf5f5e590fcf1c293fabc3c87f24ebf Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 12:15:59 +0000
Subject: [PATCH 044/105] removed keyname partial

---
 GPy/kern/Matern32.py             | 18 ++++-----
 GPy/kern/Matern52.py             | 18 ++++-----
 GPy/kern/bias.py                 | 38 +++++++++---------
 GPy/kern/coregionalise.py        | 26 ++++++------
 GPy/kern/exponential.py          | 18 ++++-----
 GPy/kern/kern.py                 | 68 ++++++++++++++++----------------
 GPy/kern/kernpart.py             | 18 ++++-----
 GPy/kern/linear.py               | 50 +++++++++++------------
 GPy/kern/periodic_Matern32.py    | 16 ++++----
 GPy/kern/periodic_Matern52.py    | 18 ++++-----
 GPy/kern/periodic_exponential.py | 18 ++++-----
 GPy/kern/product.py              | 24 +++++------
 GPy/kern/product_orthogonal.py   | 24 +++++------
 GPy/kern/rbf.py                  | 60 ++++++++++++++--------------
 GPy/kern/symmetric.py            | 24 +++++------
 GPy/kern/white.py                | 30 +++++++-------
 GPy/models/GP.py                 |  2 +-
 17 files changed, 235 insertions(+), 235 deletions(-)

diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index c175009d..9503361d 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -76,7 +76,7 @@ class Matern32(kernpart):
         """Compute the diagonal of the covariance matrix associated to X."""
         np.add(target,self.variance,target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@@ -84,29 +84,29 @@ class Matern32(kernpart):
         invdist = 1./np.where(dist!=0.,dist,np.inf)
         dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
         #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
-        target[0] += np.sum(dvar*partial)
+        target[0] += np.sum(dvar*dL_dK)
         if self.ARD == True:
             dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
             #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
-            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+            target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
         else:
             dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
             #dl = self.variance*dvar*dist2M.sum(-1)*invdist
-            target[1] += np.sum(dl*partial)
+            target[1] += np.sum(dl*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
-        target[0] += np.sum(partial)
+        target[0] += np.sum(dL_dKdiag)
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
         ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
+        target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
     def Gram_matrix(self,F,F1,F2,lower,upper):
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index 26caad1c..377526d5 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -74,7 +74,7 @@ class Matern52(kernpart):
         """Compute the diagonal of the covariance matrix associated to X."""
         np.add(target,self.variance,target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@@ -82,29 +82,29 @@ class Matern52(kernpart):
         dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
         dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
         dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
-        target[0] += np.sum(dvar*partial)
+        target[0] += np.sum(dvar*dL_dK)
         if self.ARD:
             dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
             #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
-            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+            target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
         else:
             dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
             #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
-            target[1] += np.sum(dl*partial)
+            target[1] += np.sum(dl*dL_dKdiag)
 
-    def dKdiag_dtheta(self,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
-        target[0] += np.sum(partial)
+        target[0] += np.sum(dL_dKdiag)
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
         ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = -  np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
+        target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
     def Gram_matrix(self,F,F1,F2,F3,lower,upper):
diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py
index 91594e4c..07679abd 100644
--- a/GPy/kern/bias.py
+++ b/GPy/kern/bias.py
@@ -35,16 +35,17 @@ class bias(kernpart):
     def Kdiag(self,X,target):
         target += self.variance
 
-    def dK_dtheta(self,partial,X,X2,target):
-        target += partial.sum()
+    def dK_dtheta(self,dL_dKdiag,X,X2,target):
+        target += dL_dKdiag.sum()
 
-    def dKdiag_dtheta(self,partial,X,target):
-        target += partial.sum()
 
-    def dK_dX(self, partial,X, X2, target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        target += dL_dKdiag.sum()
+
+    def dK_dX(self, dL_dK,X, X2, target):
         pass
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
     #---------------------------------------#
@@ -60,30 +61,29 @@ class bias(kernpart):
     def psi2(self, Z, mu, S, target):
         target += self.variance**2
 
-    def dpsi0_dtheta(self, partial, Z, mu, S, target):
-        target += partial.sum()
+    def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
+        target += dL_dpsi0.sum()
 
-    def dpsi1_dtheta(self, partial, Z, mu, S, target):
-        target += partial.sum()
+    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
+        target += dL_dpsi1.sum()
 
-    def dpsi2_dtheta(self, partial, Z, mu, S, target):
-        target += 2.*self.variance*partial.sum()
+    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
+        target += 2.*self.variance*dL_dpsi2.sum()
 
-    
-    def dpsi0_dZ(self, partial, Z, mu, S, target):
+    def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
         pass
 
-    def dpsi0_dmuS(self, partial, Z, mu, S, target_mu, target_S):
+    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
         pass
 
-    def dpsi1_dZ(self, partial, Z, mu, S, target):
+    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
         pass
 
-    def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S):
+    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
         pass
 
-    def dpsi2_dZ(self, partial, Z, mu, S, target):
+    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
         pass
 
-    def dpsi2_dmuS(self, partial, Z, mu, S, target_mu, target_S):
+    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
         pass
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index 2a9177d5..a76bb31e 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -53,7 +53,7 @@ class coregionalise(kernpart):
     def Kdiag(self,index,target):
         target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
 
-    def dK_dtheta(self,partial,index,index2,target):
+    def dK_dtheta(self,dL_dK,index,index2,target):
         index = np.asarray(index,dtype=np.int)
         if index2 is None:
             index2 = index
@@ -62,28 +62,28 @@ class coregionalise(kernpart):
         ii,jj = np.meshgrid(index,index2)
         ii,jj = ii.T, jj.T
 
-        partial_small = np.zeros_like(self.B)
+        dL_dK_small = np.zeros_like(self.B)
         for i in range(self.Nout):
             for j in range(self.Nout):
-                tmp = np.sum(partial[(ii==i)*(jj==j)])
-                partial_small[i,j] = tmp
+                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
+                dL_dK_small[i,j] = tmp
 
-        dkappa = np.diag(partial_small)
-        partial_small += partial_small.T
-        dW = (self.W[:,None,:]*partial_small[:,:,None]).sum(0)
+        dkappa = np.diag(dL_dK_small)
+        dL_dK_small += dL_dK_small.T
+        dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
 
         target += np.hstack([dW.flatten(),dkappa])
 
-    def dKdiag_dtheta(self,partial,index,target):
+    def dKdiag_dtheta(self,dL_dKdiag,index,target):
         index = np.asarray(index,dtype=np.int).flatten()
-        partial_small = np.zeros(self.Nout)
+        dL_dKdiag_small = np.zeros(self.Nout)
         for i in range(self.Nout):
-            partial_small[i] += np.sum(partial[index==i])
-        dW = 2.*self.W*partial_small[:,None]
-        dkappa = partial_small
+            dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
+        dW = 2.*self.W*dL_dKdiag_small[:,None]
+        dkappa = dL_dKdiag_small
         target += np.hstack([dW.flatten(),dkappa])
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         pass
 
 
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 366ddf3b..9e50712b 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -74,35 +74,35 @@ class exponential(kernpart):
         """Compute the diagonal of the covariance matrix associated to X."""
         np.add(target,self.variance,target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         invdist = 1./np.where(dist!=0.,dist,np.inf)
         dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
         dvar = np.exp(-dist)
-        target[0] += np.sum(dvar*partial)
+        target[0] += np.sum(dvar*dL_dK)
         if self.ARD == True:
             dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
-            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+            target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
         else:
             dl = self.variance*dvar*dist2M.sum(-1)*invdist
-            target[1] += np.sum(dl*partial)
+            target[1] += np.sum(dl*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
         #NB: derivative of diagonal elements wrt lengthscale is 0
-        target[0] += np.sum(partial)
+        target[0] += np.sum(dL_dKdiag)
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
         ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
+        target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
     def Gram_matrix(self,F,F1,lower,upper):
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 99ad46ea..c1f5eca9 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -271,10 +271,10 @@ class kern(parameterised):
         [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
         return target
 
-    def dK_dtheta(self,partial,X,X2=None,slices1=None,slices2=None):
+    def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
         """
-        :param partial: An array of partial derivaties, dL_dK
-        :type partial: Np.ndarray (N x M)
+        :param dL_dK: An array of dL_dK derivaties, dL_dK
+        :type dL_dK: Np.ndarray (N x M)
         :param X: Observed data inputs
         :type X: np.ndarray (N x D)
         :param X2: Observed dara inputs (optional, defaults to X)
@@ -288,16 +288,16 @@ class kern(parameterised):
         if X2 is None:
             X2 = X
         target = np.zeros(self.Nparam)
-        [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+        [p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
 
         return self._transform_gradients(target)
 
-    def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
+    def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
         if X2 is None:
             X2 = X
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros_like(X)
-        [p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        [p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
         return target
 
     def Kdiag(self,X,slices=None):
@@ -307,20 +307,20 @@ class kern(parameterised):
         [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
         return target
 
-    def dKdiag_dtheta(self,partial,X,slices=None):
+    def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
         assert X.shape[1]==self.D
-        assert len(partial.shape)==1
-        assert partial.size==X.shape[0]
+        assert len(dL_dKdiag.shape)==1
+        assert dL_dKdiag.size==X.shape[0]
         slices = self._process_slices(slices,False)
         target = np.zeros(self.Nparam)
-        [p.dKdiag_dtheta(partial[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
+        [p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
         return self._transform_gradients(target)
 
-    def dKdiag_dX(self, partial, X, slices=None):
+    def dKdiag_dX(self, dL_dKdiag, X, slices=None):
         assert X.shape[1]==self.D
         slices = self._process_slices(slices,False)
         target = np.zeros_like(X)
-        [p.dKdiag_dX(partial[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
+        [p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
         return target
 
     def psi0(self,Z,mu,S,slices=None):
@@ -329,16 +329,16 @@ class kern(parameterised):
         [p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
         return target
 
-    def dpsi0_dtheta(self,partial,Z,mu,S,slices=None):
+    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
         slices = self._process_slices(slices,False)
         target = np.zeros(self.Nparam)
-        [p.dpsi0_dtheta(partial[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
+        [p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
         return self._transform_gradients(target)
 
-    def dpsi0_dmuS(self,partial,Z,mu,S,slices=None):
+    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
         slices = self._process_slices(slices,False)
         target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
-        [p.dpsi0_dmuS(partial,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
+        [p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
         return target_mu,target_S
 
     def psi1(self,Z,mu,S,slices1=None,slices2=None):
@@ -348,25 +348,25 @@ class kern(parameterised):
         [p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
         return target
 
-    def dpsi1_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None):
+    def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
         """N,M,(Ntheta)"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros((self.Nparam))
-        [p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
+        [p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
         return self._transform_gradients(target)
 
-    def dpsi1_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
+    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
         """N,M,Q"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros_like(Z)
-        [p.dpsi1_dZ(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        [p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
         return target
 
-    def dpsi1_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
+    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
         """return shapes are N,M,Q"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
-        [p.dpsi1_dmuS(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        [p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
         return target_mu, target_S
 
     def psi2(self,Z,mu,S,slices1=None,slices2=None):
@@ -416,11 +416,11 @@ class kern(parameterised):
 
         return target + crossterms
 
-    def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dtheta(self,dL_dpsi2,partial1,Z,mu,S,slices1=None,slices2=None):
         """Returns shape (N,M,M,Ntheta)"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros(self.Nparam)
-        [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
+        [p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
 
         #compute the "cross" terms
         #TODO: better looping
@@ -434,11 +434,11 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dtheta(partial.sum(1)*p1.variance,Z,mu,S,target[ps2])
-                p1.dpsi1_dtheta(partial.sum(1)*p2._psi1,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1,Z,mu,S,target[ps1])
             elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dtheta(partial.sum(1)*p2.variance,Z,mu,S,target[ps1])
-                p2.dpsi1_dtheta(partial.sum(1)*p1._psi1,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1,Z,mu,S,target[ps2])
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -469,10 +469,10 @@ class kern(parameterised):
         #     target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0)
         return self._transform_gradients(target)
 
-    def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros_like(Z)
-        [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        [p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
         #compute the "cross" terms
         #TODO: slices (need to iterate around the input slices also...)
@@ -482,9 +482,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S)
+                target += p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S)
+                target += p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -496,11 +496,11 @@ class kern(parameterised):
 
         return target
 
-    def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
         """return shapes are N,M,M,Q"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
-        [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        [p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
 
         #TODO: there are some extra terms to compute here!
         return target_mu, target_S
diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py
index 3a5486de..30a1cc3d 100644
--- a/GPy/kern/kernpart.py
+++ b/GPy/kern/kernpart.py
@@ -26,31 +26,31 @@ class kernpart(object):
         raise NotImplementedError
     def Kdiag(self,X,target):
         raise NotImplementedError
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         raise NotImplementedError
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         raise NotImplementedError
     def psi0(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
+    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
         raise NotImplementedError
     def psi1(self,Z,mu,S,target):
         raise NotImplementedError
     def dpsi1_dtheta(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
+    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
         raise NotImplementedError
     def psi2(self,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
+    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
+    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         raise NotImplementedError
-    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
         raise NotImplementedError
     def dK_dX(self,X,X2,target):
         raise NotImplementedError
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index df2fed46..7d817f62 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -73,16 +73,16 @@ class linear(kernpart):
     def Kdiag(self,X,target):
         np.add(target,np.sum(self.variances*np.square(X),-1),target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         if self.ARD:
             product = X[:,None,:]*X2[None,:,:]
-            target += (partial[:,:,None]*product).sum(0).sum(0)
+            target += (dL_dK[:,:,None]*product).sum(0).sum(0)
         else:
             self._K_computations(X, X2)
-            target += np.sum(self._dot_product*partial)
+            target += np.sum(self._dot_product*dL_dK)
 
-    def dK_dX(self,partial,X,X2,target):
-        target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
+    def dK_dX(self,dL_dK,X,X2,target):
+        target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
 
     #---------------------------------------#
     #             PSI statistics            #
@@ -92,40 +92,40 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         target += np.sum(self.variances*self.mu2_S,1)
 
-    def dKdiag_dtheta(self,partial, X, target):
-        tmp = partial[:,None]*X**2
+    def dKdiag_dtheta(self,dL_dKdiag, X, target):
+        tmp = dL_dKdiag[:,None]*X**2
         if self.ARD:
             target += tmp.sum(0)
         else:
             target += tmp.sum()
 
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
+    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
-        tmp = partial[:, None] * self.mu2_S
+        tmp = dL_dpsi0[:, None] * self.mu2_S
         if self.ARD:
             target += tmp.sum(0)
         else:
             target += tmp.sum()
 
-    def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S):
-        target_mu += partial[:, None] * (2.0*mu*self.variances)
-        target_S += partial[:, None] * self.variances
+    def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
+        target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
+        target_S += dL_dpsi0[:, None] * self.variances
 
     def psi1(self,Z,mu,S,target):
         """the variance, it does nothing"""
         self.K(mu,Z,target)
 
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
+    def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
         """the variance, it does nothing"""
-        self.dK_dtheta(partial,mu,Z,target)
+        self.dK_dtheta(dL_dpsi1,mu,Z,target)
 
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
         """Do nothing for S, it does not affect psi1"""
         self._psi_computations(Z,mu,S)
-        target_mu += (partial.T[:,:, None]*(Z*self.variances)).sum(1)
+        target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
 
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
-        self.dK_dX(partial.T,Z,mu,target)
+    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
+        self.dK_dX(dL_dpsi1.T,Z,mu,target)
 
     def psi2(self,Z,mu,S,target):
         """
@@ -135,25 +135,25 @@ class linear(kernpart):
         psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
         target += psi2.sum(-1)
 
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
+    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
-        tmp = (partial[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
+        tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
         if self.ARD:
             target += tmp.sum(0).sum(0).sum(0)
         else:
             target += tmp.sum()
 
-    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
         """Think N,M,M,Q """
         self._psi_computations(Z,mu,S)
         tmp = self.ZZ*np.square(self.variances) # M,M,Q
-        target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
-        target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1)
+        target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
+        target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
 
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
+    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         mu2_S = np.sum(self.mu2_S,0)# Q,
-        target += (partial[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
+        target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
 
     #---------------------------------------#
     #            Precomputations            #
diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index be1148c4..898dff7b 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -101,7 +101,7 @@ class periodic_Matern32(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@@ -166,13 +166,13 @@ class periodic_Matern32(kernpart):
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
         # np.add(target[:,:,0],dK_dvar, target[:,:,0])
-        target[0] += np.sum(dK_dvar*partial)
+        target[0] += np.sum(dK_dvar*dL_dK)
         #np.add(target[:,:,1],dK_dlen, target[:,:,1])
-        target[1] += np.sum(dK_dlen*partial)
+        target[1] += np.sum(dK_dlen*dL_dK)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
-        target[2] += np.sum(dK_dper*partial)
+        target[2] += np.sum(dK_dper*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
 
@@ -231,6 +231,6 @@ class periodic_Matern32(kernpart):
 
         dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
 
-        target[0] += np.sum(np.diag(dK_dvar)*partial)
-        target[1] += np.sum(np.diag(dK_dlen)*partial)
-        target[2] += np.sum(np.diag(dK_dper)*partial)
+        target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
+        target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
+        target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
index 8d1da8b1..c533961f 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/periodic_Matern52.py
@@ -46,7 +46,7 @@ class periodic_Matern52(kernpart):
         r =  np.sqrt(r1**2 + r2**2)
         psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
         return r,omega[:,0:1], psi
-    
+
     def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
         Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
         Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
@@ -105,7 +105,7 @@ class periodic_Matern52(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@@ -178,13 +178,13 @@ class periodic_Matern52(kernpart):
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
         # np.add(target[:,:,0],dK_dvar, target[:,:,0])
-        target[0] += np.sum(dK_dvar*partial)
+        target[0] += np.sum(dK_dvar*dL_dK)
         #np.add(target[:,:,1],dK_dlen, target[:,:,1])
-        target[1] += np.sum(dK_dlen*partial)
+        target[1] += np.sum(dK_dlen*dL_dK)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
-        target[2] += np.sum(dK_dper*partial)
+        target[2] += np.sum(dK_dper*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
 
@@ -251,6 +251,6 @@ class periodic_Matern52(kernpart):
         dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
         dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
 
-        target[0] += np.sum(np.diag(dK_dvar)*partial)
-        target[1] += np.sum(np.diag(dK_dlen)*partial)
-        target[2] += np.sum(np.diag(dK_dper)*partial)
+        target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
+        target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
+        target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py
index 7f566f25..b966bbef 100644
--- a/GPy/kern/periodic_exponential.py
+++ b/GPy/kern/periodic_exponential.py
@@ -101,7 +101,7 @@ class periodic_exponential(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@@ -162,11 +162,11 @@ class periodic_exponential(kernpart):
 
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
-        target[0] += np.sum(dK_dvar*partial)
-        target[1] += np.sum(dK_dlen*partial)
-        target[2] += np.sum(dK_dper*partial)
+        target[0] += np.sum(dK_dvar*dL_dK)
+        target[1] += np.sum(dK_dlen*dL_dK)
+        target[2] += np.sum(dK_dper*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
 
@@ -222,7 +222,7 @@ class periodic_exponential(kernpart):
 
         dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
 
-        target[0] += np.sum(np.diag(dK_dvar)*partial)
-        target[1] += np.sum(np.diag(dK_dlen)*partial)
-        target[2] += np.sum(np.diag(dK_dper)*partial)
-        
+        target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
+        target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
+        target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
+
diff --git a/GPy/kern/product.py b/GPy/kern/product.py
index 92522418..3bad51c1 100644
--- a/GPy/kern/product.py
+++ b/GPy/kern/product.py
@@ -55,7 +55,7 @@ class product(kernpart):
         self.k2.Kdiag(X,target2)
         target += target1 * target2
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
         K1 = np.zeros((X.shape[0],X2.shape[0]))
@@ -65,13 +65,13 @@ class product(kernpart):
 
         k1_target = np.zeros(self.k1.Nparam)
         k2_target = np.zeros(self.k2.Nparam)
-        self.k1.dK_dtheta(partial*K2, X, X2, k1_target)
-        self.k2.dK_dtheta(partial*K1, X, X2, k2_target)
+        self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
+        self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
 
         target[:self.k1.Nparam] += k1_target
         target[self.k1.Nparam:] += k2_target
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         K1 = np.zeros((X.shape[0],X2.shape[0]))
@@ -79,19 +79,19 @@ class product(kernpart):
         self.k1.K(X,X2,K1)
         self.k2.K(X,X2,K2)
 
-        self.k1.dK_dX(partial*K2, X, X2, target)
-        self.k2.dK_dX(partial*K1, X, X2, target)
+        self.k1.dK_dX(dL_dK*K2, X, X2, target)
+        self.k2.dK_dX(dL_dK*K1, X, X2, target)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         target1 = np.zeros((X.shape[0],))
         target2 = np.zeros((X.shape[0],))
         self.k1.Kdiag(X,target1)
         self.k2.Kdiag(X,target2)
 
-        self.k1.dKdiag_dX(partial*target2, X, target)
-        self.k2.dKdiag_dX(partial*target1, X, target)
+        self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
+        self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
         target1 = np.zeros((X.shape[0],))
         target2 = np.zeros((X.shape[0],))
@@ -100,8 +100,8 @@ class product(kernpart):
 
         k1_target = np.zeros(self.k1.Nparam)
         k2_target = np.zeros(self.k2.Nparam)
-        self.k1.dKdiag_dtheta(partial*target2, X, k1_target)
-        self.k2.dKdiag_dtheta(partial*target1, X, k2_target)
+        self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
+        self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
 
         target[:self.k1.Nparam] += k1_target
         target[self.k1.Nparam:] += k2_target
diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py
index a231cf8b..b0112199 100644
--- a/GPy/kern/product_orthogonal.py
+++ b/GPy/kern/product_orthogonal.py
@@ -46,7 +46,7 @@ class product_orthogonal(kernpart):
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
         target += target1 * target2
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
         K1 = np.zeros((X.shape[0],X2.shape[0]))
@@ -54,8 +54,8 @@ class product_orthogonal(kernpart):
         self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
 
-        self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -65,15 +65,15 @@ class product_orthogonal(kernpart):
         self.k2.Kdiag(X[:,self.k1.D:],target2)
         target += target1 * target2
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])
         self.k1.Kdiag(X[:,:self.k1.D],K1)
         self.k2.Kdiag(X[:,self.k1.D:],K2)
-        self.k1.dKdiag_dtheta(partial*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
-        self.k2.dKdiag_dtheta(partial*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
+        self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
+        self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         K1 = np.zeros((X.shape[0],X2.shape[0]))
@@ -81,15 +81,15 @@ class product_orthogonal(kernpart):
         self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
         self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
 
-        self.k1.dK_dX(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
 
-    def dKdiag_dX(self, partial, X, target):
+    def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])
         self.k1.Kdiag(X[:,0:self.k1.D],K1)
         self.k2.Kdiag(X[:,self.k1.D:],K2)
 
-        self.k1.dK_dX(partial*K2, X[:,:self.k1.D], target)
-        self.k2.dK_dX(partial*K1, X[:,self.k1.D:], target)
+        self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
 
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 16eda459..3c3d59e6 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -82,27 +82,27 @@ class rbf(kernpart):
     def Kdiag(self,X,target):
         np.add(target,self.variance,target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         self._K_computations(X,X2)
-        target[0] += np.sum(self._K_dvar*partial)
+        target[0] += np.sum(self._K_dvar*dL_dK)
         if self.ARD == True:
             dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
-            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+            target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
         else:
-            target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial)
-        #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
+            target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*dL_dK)
+        #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         #NB: derivative of diagonal elements wrt lengthscale is 0
-        target[0] += np.sum(partial)
+        target[0] += np.sum(dL_dKdiag)
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         self._K_computations(X,X2)
         _K_dist = X[:,None,:]-X2[None,:,:]
         dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
+        target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
 
@@ -113,69 +113,69 @@ class rbf(kernpart):
     def psi0(self,Z,mu,S,target):
         target += self.variance
 
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += np.sum(partial)
+    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
+        target[0] += np.sum(dL_dpsi0)
 
-    def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
         pass
 
     def psi1(self,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         target += self._psi1
 
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
+    def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
         d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
-        target[0] += np.sum(partial*self._psi1/self.variance)
-        dpsi1_dlength = d_length*partial[:,:,None]
+        target[0] += np.sum(dL_dpsi1*self._psi1/self.variance)
+        dpsi1_dlength = d_length*dL_dpsi1[:,:,None]
         if not self.ARD:
             target[1] += dpsi1_dlength.sum()
         else:
             target[1:] += dpsi1_dlength.sum(0).sum(0)
 
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
+    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         denominator = (self.lengthscale2*(self._psi1_denom))
         dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
-        target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
+        target += np.sum(dL_dpsi1.T[:,:,None] * dpsi1_dZ, 0)
 
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
         self._psi_computations(Z,mu,S)
         tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
-        target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
-        target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
+        target_mu += np.sum(dL_dpsi1.T[:, :, None]*tmp*self._psi1_dist,1)
+        target_S += np.sum(dL_dpsi1.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
 
     def psi2(self,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         target += self._psi2
 
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
+    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         """Shape N,M,M,Ntheta"""
         self._psi_computations(Z,mu,S)
         d_var = 2.*self._psi2/self.variance
         d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
 
-        target[0] += np.sum(partial*d_var)
-        dpsi2_dlength = d_length*partial[:,:,:,None]
+        target[0] += np.sum(dL_dpsi2*d_var)
+        dpsi2_dlength = d_length*dL_dpsi2[:,:,:,None]
         if not self.ARD:
             target[1] += dpsi2_dlength.sum()
         else:
             target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
-            
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
+
+    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
         term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
         dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
-        target += (partial[:,:,:,None]*dZ).sum(0).sum(0)
+        target += (dL_dpsi2[:,:,:,None]*dZ).sum(0).sum(0)
 
-    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
         """Think N,M,M,Q """
         self._psi_computations(Z,mu,S)
         tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
-        target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
+        target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
+        target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
 
 
     #---------------------------------------#
diff --git a/GPy/kern/symmetric.py b/GPy/kern/symmetric.py
index d493bfb1..c3b046c7 100644
--- a/GPy/kern/symmetric.py
+++ b/GPy/kern/symmetric.py
@@ -51,7 +51,7 @@ class symmetric(kernpart):
         self.k.K(X,AX2,target)
         self.k.K(AX,AX2,target)
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         AX = np.dot(X,self.transform)
         if X2 is None:
@@ -59,13 +59,13 @@ class symmetric(kernpart):
             ZX2 = AX
         else:
             AX2 = np.dot(X2, self.transform)
-        self.k.dK_dtheta(partial,X,X2,target)
-        self.k.dK_dtheta(partial,AX,X2,target)
-        self.k.dK_dtheta(partial,X,AX2,target)
-        self.k.dK_dtheta(partial,AX,AX2,target)
+        self.k.dK_dtheta(dL_dK,X,X2,target)
+        self.k.dK_dtheta(dL_dK,AX,X2,target)
+        self.k.dK_dtheta(dL_dK,X,AX2,target)
+        self.k.dK_dtheta(dL_dK,AX,AX2,target)
 
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         AX = np.dot(X,self.transform)
         if X2 is None:
@@ -73,10 +73,10 @@ class symmetric(kernpart):
             ZX2 = AX
         else:
             AX2 = np.dot(X2, self.transform)
-        self.k.dK_dX(partial, X, X2, target)
-        self.k.dK_dX(partial, AX, X2, target)
-        self.k.dK_dX(partial, X, AX2, target)
-        self.k.dK_dX(partial, AX ,AX2, target)
+        self.k.dK_dX(dL_dK, X, X2, target)
+        self.k.dK_dX(dL_dK, AX, X2, target)
+        self.k.dK_dX(dL_dK, X, AX2, target)
+        self.k.dK_dX(dL_dK, AX ,AX2, target)
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -84,9 +84,9 @@ class symmetric(kernpart):
         self.K(X,X,foo)
         target += np.diag(foo)
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         raise NotImplementedError
 
-    def dKdiag_dtheta(self,partial,X,target):
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
         raise NotImplementedError
diff --git a/GPy/kern/white.py b/GPy/kern/white.py
index b3b00c48..f5d6894a 100644
--- a/GPy/kern/white.py
+++ b/GPy/kern/white.py
@@ -37,50 +37,50 @@ class white(kernpart):
     def Kdiag(self,X,target):
         target += self.variance
 
-    def dK_dtheta(self,partial,X,X2,target):
+    def dK_dtheta(self,dL_dK,X,X2,target):
         if X.shape==X2.shape:
             if np.all(X==X2):
-                target += np.trace(partial)
+                target += np.trace(dL_dK)
 
-    def dKdiag_dtheta(self,partial,X,target):
-        target += np.sum(partial)
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        target += np.sum(dL_dKdiag)
 
-    def dK_dX(self,partial,X,X2,target):
+    def dK_dX(self,dL_dK,X,X2,target):
         pass
 
-    def dKdiag_dX(self,partial,X,target):
+    def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
 
     def psi0(self,Z,mu,S,target):
         target += self.variance
 
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target += partial.sum()
+    def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
+        target += dL_dpsi0.sum()
 
-    def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
         pass
 
     def psi1(self,Z,mu,S,target):
         pass
 
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
+    def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
         pass
 
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
+    def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
         pass
 
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
         pass
 
     def psi2(self,Z,mu,S,target):
         pass
 
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
+    def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
         pass
 
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
+    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         pass
 
-    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+    def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
         pass
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 08ac1bb1..1d985c33 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -129,7 +129,7 @@ class GP(model):
 
         For the likelihood parameters, pass in alpha = K^-1 y
         """
-        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
     def _raw_predict(self,_Xnew,slices=None, full_cov=False):
         """

From f562d6cd46779f2bb1adebb50eed7ef4b0de0c69 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 12:21:12 +0000
Subject: [PATCH 045/105] deprecated flapack, namespace changed to
 lapack.flapack

---
 GPy/kern/Matern52.py  |  2 +-
 GPy/likelihoods/EP.py | 10 +++++-----
 GPy/util/linalg.py    |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index 377526d5..9338db15 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -90,7 +90,7 @@ class Matern52(kernpart):
         else:
             dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
             #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
-            target[1] += np.sum(dl*dL_dKdiag)
+            target[1] += np.sum(dl*dL_dK)
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index efd887ae..30b21d9b 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -114,7 +114,7 @@ class EP(likelihood):
             Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
             B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
             L = jitchol(B)
-            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
+            V,info = linalg.lapack.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
             Sigma = K - np.dot(V.T,V)
             mu = np.dot(Sigma,self.v_tilde)
             epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
@@ -190,7 +190,7 @@ class EP(likelihood):
                 #Posterior distribution parameters update
                 LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
                 L = jitchol(LLT)
-                V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
+                V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
                 Sigma_diag = np.sum(V*V,-2)
                 si = np.sum(V.T*V[:,i],-1)
                 mu = mu + (Delta_v-Delta_tau*mu[i])*si
@@ -198,8 +198,8 @@ class EP(likelihood):
             #Sigma recomputation with Cholesky decompositon
             LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
             L = jitchol(LLT)
-            V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
-            V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
+            V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
+            V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
             Sigma_diag = np.sum(V*V,-2)
             Knmv_tilde = np.dot(Kmn,v_tilde)
             mu = np.dot(V2.T,Knmv_tilde)
@@ -297,7 +297,7 @@ class EP(likelihood):
             P = (Diag / Diag0)[:,None] * P0
             RPT0 = np.dot(R0,P0.T)
             L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
-            R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
+            R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
             RPT = np.dot(R,P.T)
             Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
             self.w = Diag * self.v_tilde
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 7414eb29..26105789 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -11,7 +11,7 @@ import re
 import pdb
 import cPickle
 import types
-import scipy.lib.lapack.flapack
+#import scipy.lib.lapack.flapack
 import scipy as sp
 
 def mdot(*args):
@@ -101,7 +101,7 @@ def chol_inv(L):
 
     """
 
-    return linalg.flapack.dtrtri(L, lower = True)[0]
+    return linalg.lapack.flapack.dtrtri(L, lower = True)[0]
 
 
 def multiple_pdinv(A):
@@ -118,7 +118,7 @@ def multiple_pdinv(A):
     N = A.shape[-1]
     chols = [jitchol(A[:,:,i]) for i in range(N)]
     halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
-    invs = [linalg.flapack.dpotri(L[0],True)[0] for L in chols]
+    invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols]
     invs = [np.triu(I)+np.triu(I,1).T for I in invs]
     return np.dstack(invs),np.array(halflogdets)
 

From e32afa11e5b437ef5db6bfd015f4f87936723bd0 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 12:33:03 +0000
Subject: [PATCH 046/105] added GPy.tests(), removed some useless tests

---
 GPy/__init__.py             |  5 +++++
 GPy/testing/bgplvm_tests.py |  5 ++++-
 GPy/testing/kernel_tests.py |  1 -
 GPy/testing/unit_tests.py   | 11 -----------
 setup.py                    |  6 ++----
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index c0772c27..6c43e471 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -9,3 +9,8 @@ import util
 import examples
 from core import priors
 import likelihoods
+import testing
+from numpy.testing import Tester
+
+def tests():
+    Tester(testing).test(verbose=10)
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index c49bdfda..e3bd2b36 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -12,6 +12,7 @@ class BGPLVMTests(unittest.TestCase):
         k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
         k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
         m.constrain_positive('(rbf|bias|noise|white|S)')
@@ -24,6 +25,7 @@ class BGPLVMTests(unittest.TestCase):
         k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
         k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
         m.constrain_positive('(linear|bias|noise|white|S)')
@@ -36,13 +38,14 @@ class BGPLVMTests(unittest.TestCase):
         k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
         k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
         m.constrain_positive('(rbf|bias|noise|white|S)')
         m.randomize()
         self.assertTrue(m.checkgrad())
 
-        
+
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
     unittest.main()
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 3d738106..bb809ea6 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -13,7 +13,6 @@ class KernelTests(unittest.TestCase):
         X = np.random.rand(5,5)
         Y = np.ones((5,1))
         m = GPy.models.GP_regression(X,Y,K)
-        print m
         self.assertTrue(m.checkgrad())
 
     def test_coregionalisation(self):
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 61fb15bb..55963805 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -177,17 +177,6 @@ class GradientTests(unittest.TestCase):
         m.approximate_likelihood()
         self.assertTrue(m.checkgrad())
 
-    def test_warped_GP(self):
-        xmin, xmax = 1, 2.5*np.pi
-        b, C, SNR = 1, 0, 0.1
-        X = np.linspace(xmin, xmax, 500)
-        y  = b*X + C + 1*np.sin(X)
-        y += 0.05*np.random.randn(len(X))
-        X, y = X[:, None], y[:, None]
-        m = GPy.models.warpedGP(X, y, warping_terms = 3)
-        m.constrain_positive('(tanh_a|tanh_b|rbf|white|bias)')
-        self.assertTrue(m.checkgrad())
-
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
diff --git a/setup.py b/setup.py
index d24171e2..b701b74d 100644
--- a/setup.py
+++ b/setup.py
@@ -3,8 +3,6 @@
 
 import os
 from setuptools import setup
-#from numpy.distutils.core import Extension, setup
-#from sphinx.setup_command import BuildDoc
 
 # Version number
 version = '0.1.3'
@@ -14,12 +12,12 @@ def read(fname):
 
 setup(name = 'GPy',
       version = version,
-      author = 'James Hensman, Nicolo Fusi, Ricardo Andrade, Nicolas Durrande, Alan Saul, Neil D. Lawrence',
+      author = read('AUTHORS.txt'),
       author_email = "james.hensman@gmail.com",
       description = ("The Gaussian Process Toolbox"),
       license = "BSD 3-clause",
       keywords = "machine-learning gaussian-processes kernels",
-      url = "http://ml.sheffield.ac.uk/GPy/",
+      url = "http://sheffieldml.github.com/GPy/",
       packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
       package_dir={'GPy': 'GPy'},
       package_data = {'GPy': ['GPy/examples']},

From a86676016247b222664c177bfc4e9dc834500c00 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 12:39:44 +0000
Subject: [PATCH 047/105] Removed unused partial1

---
 GPy/kern/kern.py        | 2 +-
 GPy/models/sparse_GP.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index f1a5bd45..87e67f33 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -399,7 +399,7 @@ class kern(parameterised):
 
         return target
 
-    def dpsi2_dtheta(self,dL_dpsi2,partial1,Z,mu,S,slices1=None,slices2=None):
+    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
         """Returns shape (N,M,M,Ntheta)"""
         slices1, slices2 = self._process_slices(slices1,slices2)
         target = np.zeros(self.Nparam)
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index ff00faea..e2019d99 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -208,7 +208,7 @@ class sparse_GP(GP):
         if self.has_uncertain_inputs:
             dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
             dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
-            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
+            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_uncertainty)
         else:
             dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
             dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)

From f98e52ffe8cd5678b178eb69fa771b23af55125f Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 12:40:14 +0000
Subject: [PATCH 048/105] now running nosetest doesn't run unittests twice

---
 GPy/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 6c43e471..fa69dac3 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -11,6 +11,8 @@ from core import priors
 import likelihoods
 import testing
 from numpy.testing import Tester
+from nose.tools import nottest
 
+@nottest
 def tests():
     Tester(testing).test(verbose=10)

From e511bb69cf25cfa3c6c8aea946a85fcdf3f437d2 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 12:40:29 +0000
Subject: [PATCH 049/105] added in documentation the current status of kernel
 implementation

---
 doc/Figures/tick.png          | Bin 0 -> 175 bytes
 doc/GPy.examples.rst          |  16 +++++++++++++
 doc/GPy.kern.rst              |  28 ++++++++++++++++++-----
 doc/kernel_implementation.rst |  41 +++++++++++++++++++++++++---------
 doc/tuto_kernel_overview.rst  |   2 +-
 5 files changed, 70 insertions(+), 17 deletions(-)
 create mode 100644 doc/Figures/tick.png

diff --git a/doc/Figures/tick.png b/doc/Figures/tick.png
new file mode 100644
index 0000000000000000000000000000000000000000..1175c8021717199329a79061bbf1e02c49f677fc
GIT binary patch
literal 175
zcmeAS@N?(olHy`uVBq!ia0vp@K+MO&3?$hCyB`86tpJ}8*Z=?j1DW&Y%?k(!NJ>g_
zaBwIoDcQDd8xIeUl9Cb&3rkm5*O@bCo;`ae#qL%PRO;aA;uyklJvo7aQH|B$Xadv1
z2w{a1iIyb8h6|zsT?;v#4E-4<hDuBDH@s-hHdg3zovnDO_mM!<Mn<RNc!r4#Q{QsB
Tgn!))avFoDtDnm{r-UW|rZhEX

literal 0
HcmV?d00001

diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index 59ffd43d..ec283d21 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -73,6 +73,22 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`tuto_GP_regression` Module
+--------------------------------
+
+.. automodule:: GPy.examples.tuto_GP_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`tuto_kernel_overview` Module
+----------------------------------
+
+.. automodule:: GPy.examples.tuto_kernel_overview
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`uncertain_input_GP_regression_demo` Module
 ------------------------------------------------
 
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index a3a611b7..3ebeda40 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -49,6 +49,14 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`coregionalise` Module
+---------------------------
+
+.. automodule:: GPy.kern.coregionalise
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`exponential` Module
 -------------------------
 
@@ -113,18 +121,18 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`product` Module
----------------------
+:mod:`prod` Module
+------------------
 
-.. automodule:: GPy.kern.product
+.. automodule:: GPy.kern.prod
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`product_orthogonal` Module
---------------------------------
+:mod:`prod_orthogonal` Module
+-----------------------------
 
-.. automodule:: GPy.kern.product_orthogonal
+.. automodule:: GPy.kern.prod_orthogonal
     :members:
     :undoc-members:
     :show-inheritance:
@@ -145,6 +153,14 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`symmetric` Module
+-----------------------
+
+.. automodule:: GPy.kern.symmetric
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`sympykern` Module
 -----------------------
 
diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 57b37c8e..888d1ee5 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -3,15 +3,36 @@
 List of implemented kernels
 ***************************
 
-The :math:`\checkmark` symbol represents the functions that have been implemented for each kernel.
+The following table shows the implemented kernels in GPy and gives the details of the implemented function for each kernel.
 
-..  |tick|
+====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+NAME                  get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
+====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+bias                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Brownian              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                                                
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+exponential           |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+finite_dimensional    |tick|       |tick|  |tick|  |tick|      |tick| 
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+linear                |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern32              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|        
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern52              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_exponential  |tick|       |tick|  |tick|  |tick|      |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern32     |tick|       |tick|  |tick|  |tick|      |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern52     |tick|       |tick|  |tick|  |tick|      |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+rbf                   |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+spline                |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
+--------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+white                 |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
-..  |tick| image:: tick.png
-
-
-======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
- NAME     get/set    K    Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
-======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
-rbf     \\checkmark   y  
-======  ===========  ===  ======= =========== =============== ======= =========== ====== ====== =======
+..  |tick| image:: Figures/tick.png
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index c420943b..e410696a 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -39,7 +39,7 @@ return::
 Implemented kernels
 ===================
 
-Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ . The following figure gives a summary of most of them:
+Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ and the following figure gives a summary of most of them:
 
 .. figure::  Figures/tuto_kern_overview_allkern.png
     :align:  center

From da219a66e811767303f858d59d37fbde96c7eb76 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 12:40:53 +0000
Subject: [PATCH 050/105] added init

---
 GPy/testing/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 GPy/testing/__init__.py

diff --git a/GPy/testing/__init__.py b/GPy/testing/__init__.py
new file mode 100644
index 00000000..e69de29b

From 25d73b13e99ecf6471063573d3ab4f2d02b6f587 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 12:59:39 +0000
Subject: [PATCH 051/105]  update in the documentation on kernel implementation

---
 doc/kernel_implementation.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 888d1ee5..99ee006b 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -35,4 +35,13 @@ spline                |tick|       |tick|  |tick|  |tick|      |tick|
 white                 |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 ====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
+Depending on the use, all functions may not be required
+
+    * ``get/set, K, Kdiag``: compulsory
+    * ``dK_dtheta``: necessary to optimize the model
+    * ``dKdiag_dtheta``: sparse models, BGPLVM, GPs with uncertain inputs
+    * ``dK_dX``: sparse models, GPLVM, BGPLVM, GPs with uncertain inputs
+    * ``dKdiag_dX``: sparse models, BGPLVM, GPs with uncertain inputs
+    * ``psi0, psi1, psi2``: BGPLVM, GPs with uncertain inputs
+
 ..  |tick| image:: Figures/tick.png

From 16a23758c6f64b568c2ce985ef98454932ed3350 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 13:13:18 +0000
Subject: [PATCH 052/105] example files for tutorials are now in Neil's format

---
 GPy/examples/__init__.py             |   1 +
 GPy/examples/tuto_GP_regression.py   |  56 --------
 GPy/examples/tuto_kernel_overview.py | 139 ------------------
 GPy/examples/tutorials.py            | 201 +++++++++++++++++++++++++++
 doc/tuto_GP_regression.rst           |   2 +-
 doc/tuto_kernel_overview.rst         |   2 +-
 6 files changed, 204 insertions(+), 197 deletions(-)
 delete mode 100644 GPy/examples/tuto_GP_regression.py
 delete mode 100644 GPy/examples/tuto_kernel_overview.py
 create mode 100644 GPy/examples/tutorials.py

diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 2f3cf0f4..ce4618ac 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -6,3 +6,4 @@
 import classification
 import regression
 import unsupervised
+import tutorials
diff --git a/GPy/examples/tuto_GP_regression.py b/GPy/examples/tuto_GP_regression.py
deleted file mode 100644
index b3953de0..00000000
--- a/GPy/examples/tuto_GP_regression.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# The detailed explanations of the commands used in this file can be found in the tutorial section
-
-import pylab as pb
-pb.ion()
-import numpy as np
-import GPy
-
-X = np.random.uniform(-3.,3.,(20,1))
-Y = np.sin(X) + np.random.randn(20,1)*0.05
-
-kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
-
-m = GPy.models.GP_regression(X,Y,kernel)
-
-print m
-m.plot()
-
-m.constrain_positive('')
-
-m.unconstrain('')                            # Required to remove the previous constrains
-m.constrain_positive('rbf_variance')
-m.constrain_bounded('lengthscale',1.,10. )
-m.constrain_fixed('noise',0.0025)
-
-m.optimize()
-
-m.optimize_restarts(Nrestarts = 10)
-
-###########################
-#  2-dimensional example  #
-###########################
-
-import pylab as pb
-pb.ion()
-import numpy as np
-import GPy
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(50,2))
-Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
-
-# define kernel
-ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
-
-# create simple GP model
-m = GPy.models.GP_regression(X,Y,ker)
-
-# contrain all parameters to be positive
-m.constrain_positive('')
-
-# optimize and plot
-pb.figure()
-m.optimize('tnc', max_f_eval = 1000)
-
-m.plot()
-print(m)
diff --git a/GPy/examples/tuto_kernel_overview.py b/GPy/examples/tuto_kernel_overview.py
deleted file mode 100644
index ebd19d76..00000000
--- a/GPy/examples/tuto_kernel_overview.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# The detailed explanations of the commands used in this file can be found in the tutorial section
-
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-
-ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
-ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
-ker3 = GPy.kern.rbf(1, .5, .5)
-
-print ker2
-ker1.plot()
-ker2.plot()
-ker3.plot()
-
-k1 = GPy.kern.rbf(1,1.,2.)
-k2 = GPy.kern.Matern32(1, 0.5, 0.2)
-
-# Product of kernels
-k_prod = k1.prod(k2)
-k_prodorth = k1.prod_orthogonal(k2)
-
-# Sum of kernels
-k_add = k1.add(k2)
-k_addorth = k1.add_orthogonal(k2)    
-
-pb.figure(figsize=(8,8))
-pb.subplot(2,2,1)
-k_prod.plot()
-pb.title('prod')
-pb.subplot(2,2,2)
-k_prodorth.plot()
-pb.title('prod_orthogonal')
-pb.subplot(2,2,3)
-k_add.plot()
-pb.title('add')
-pb.subplot(2,2,4)
-k_addorth.plot()
-pb.title('add_orthogonal')
-pb.subplots_adjust(wspace=0.3, hspace=0.3)
-
-k1 = GPy.kern.rbf(1,1.,2)
-k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
-
-k = k1 * k2  # equivalent to k = k1.prod(k2)
-print k
-
-# Simulate sample paths
-X = np.linspace(-5,5,501)[:,None]
-Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
-
-# plot
-pb.figure(figsize=(10,4))
-pb.subplot(1,2,1)
-k.plot()
-pb.subplot(1,2,2)
-pb.plot(X,Y.T)
-pb.ylabel("Sample path")
-pb.subplots_adjust(wspace=0.3)
-
-k = (k1+k2)*(k1+k2)
-print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
-
-k1 = GPy.kern.rbf(1)
-k2 = GPy.kern.Matern32(1)
-k3 = GPy.kern.white(1)
-
-k = k1 + k2 + k3
-print k
-
-k.constrain_positive('var')
-k.constrain_fixed(np.array([1]),1.75)
-k.tie_param('len')
-k.unconstrain('white')
-k.constrain_bounded('white',lower=1e-5,upper=.5)
-print k
-
-k_cst = GPy.kern.bias(1,variance=1.)
-k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
-Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
-print Kanova
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(40,2))
-Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
-
-# Create GP regression model
-m = GPy.models.GP_regression(X,Y,Kanova)
-pb.figure(figsize=(5,5))
-m.plot()
-
-pb.figure(figsize=(20,3))
-pb.subplots_adjust(wspace=0.5)
-pb.subplot(1,5,1)
-m.plot()
-pb.subplot(1,5,2)
-pb.ylabel("=   ",rotation='horizontal',fontsize='30')
-pb.subplot(1,5,3)
-m.plot(which_functions=[False,True,False,False])
-pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
-pb.subplot(1,5,4)
-m.plot(which_functions=[False,False,True,False])
-pb.ylabel("+   ",rotation='horizontal',fontsize='30')
-pb.subplot(1,5,5)
-pb.ylabel("+   ",rotation='horizontal',fontsize='30')
-m.plot(which_functions=[False,False,False,True])
-
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-
-ker1 = GPy.kern.rbf(D=1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
-ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
-ker3 = GPy.kern.rbf(1, .5, .25)
-
-ker1.plot()
-ker2.plot()
-ker3.plot()
-#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
-
-kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1),  GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
-kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
-
-pb.figure(figsize=(16,12))
-pb.subplots_adjust(wspace=.5, hspace=.5)
-for i, kern in enumerate(kernels):
-   pb.subplot(3,4,i+1)
-   kern.plot(x=7.5,plot_limits=[0.00001,15.])
-   pb.title(kernel_names[i]+ '\n')
-
-# actual plot for the noise
-i = 11
-X = np.linspace(0.,15.,201)
-WN = 0*X
-WN[100] = 1.
-pb.subplot(3,4,i+1)
-pb.plot(X,WN,'b')
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
new file mode 100644
index 00000000..be550e01
--- /dev/null
+++ b/GPy/examples/tutorials.py
@@ -0,0 +1,201 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+"""
+Code of Tutorials
+"""
+
+def tuto_GP_regression():
+    """The detailed explanations of the commands used in this file can be found in the tutorial section"""
+
+    import pylab as pb
+    pb.ion()
+    import numpy as np
+    import GPy
+
+    X = np.random.uniform(-3.,3.,(20,1))
+    Y = np.sin(X) + np.random.randn(20,1)*0.05
+
+    kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+
+    m = GPy.models.GP_regression(X,Y,kernel)
+
+    print m
+    m.plot()
+
+    m.constrain_positive('')
+
+    m.unconstrain('')                            # Required to remove the previous constrains
+    m.constrain_positive('rbf_variance')
+    m.constrain_bounded('lengthscale',1.,10. )
+    m.constrain_fixed('noise',0.0025)
+
+    m.optimize()
+
+    m.optimize_restarts(Nrestarts = 10)
+
+    ###########################
+    #  2-dimensional example  #
+    ###########################
+
+    import pylab as pb
+    pb.ion()
+    import numpy as np
+    import GPy
+
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(50,2))
+    Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
+
+    # define kernel
+    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+
+    # create simple GP model
+    m = GPy.models.GP_regression(X,Y,ker)
+
+    # contrain all parameters to be positive
+    m.constrain_positive('')
+
+    # optimize and plot
+    pb.figure()
+    m.optimize('tnc', max_f_eval = 1000)
+
+    m.plot()
+    print(m)
+
+
+def tuto_kernel_overview():
+    """The detailed explanations of the commands used in this file can be found in the tutorial section"""
+    import pylab as pb
+    import numpy as np
+    import GPy
+    pb.ion()
+
+    ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+    ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
+    ker3 = GPy.kern.rbf(1, .5, .5)
+
+    print ker2
+    ker1.plot()
+    ker2.plot()
+    ker3.plot()
+
+    k1 = GPy.kern.rbf(1,1.,2.)
+    k2 = GPy.kern.Matern32(1, 0.5, 0.2)
+
+    # Product of kernels
+    k_prod = k1.prod(k2)
+    k_prodorth = k1.prod_orthogonal(k2)
+
+    # Sum of kernels
+    k_add = k1.add(k2)
+    k_addorth = k1.add_orthogonal(k2)    
+
+    pb.figure(figsize=(8,8))
+    pb.subplot(2,2,1)
+    k_prod.plot()
+    pb.title('prod')
+    pb.subplot(2,2,2)
+    k_prodorth.plot()
+    pb.title('prod_orthogonal')
+    pb.subplot(2,2,3)
+    k_add.plot()
+    pb.title('add')
+    pb.subplot(2,2,4)
+    k_addorth.plot()
+    pb.title('add_orthogonal')
+    pb.subplots_adjust(wspace=0.3, hspace=0.3)
+
+    k1 = GPy.kern.rbf(1,1.,2)
+    k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
+
+    k = k1 * k2  # equivalent to k = k1.prod(k2)
+    print k
+
+    # Simulate sample paths
+    X = np.linspace(-5,5,501)[:,None]
+    Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
+
+    # plot
+    pb.figure(figsize=(10,4))
+    pb.subplot(1,2,1)
+    k.plot()
+    pb.subplot(1,2,2)
+    pb.plot(X,Y.T)
+    pb.ylabel("Sample path")
+    pb.subplots_adjust(wspace=0.3)
+
+    k = (k1+k2)*(k1+k2)
+    print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.Matern32(1)
+    k3 = GPy.kern.white(1)
+
+    k = k1 + k2 + k3
+    print k
+
+    k.constrain_positive('var')
+    k.constrain_fixed(np.array([1]),1.75)
+    k.tie_param('len')
+    k.unconstrain('white')
+    k.constrain_bounded('white',lower=1e-5,upper=.5)
+    print k
+
+    k_cst = GPy.kern.bias(1,variance=1.)
+    k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
+    Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
+    print Kanova
+
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(40,2))
+    Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
+
+    # Create GP regression model
+    m = GPy.models.GP_regression(X,Y,Kanova)
+    pb.figure(figsize=(5,5))
+    m.plot()
+
+    pb.figure(figsize=(20,3))
+    pb.subplots_adjust(wspace=0.5)
+    pb.subplot(1,5,1)
+    m.plot()
+    pb.subplot(1,5,2)
+    pb.ylabel("=   ",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,3)
+    m.plot(which_functions=[False,True,False,False])
+    pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,4)
+    m.plot(which_functions=[False,False,True,False])
+    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,5)
+    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+    m.plot(which_functions=[False,False,False,True])
+
+    ker1 = GPy.kern.rbf(D=1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+    ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
+    ker3 = GPy.kern.rbf(1, .5, .25)
+
+    ker1.plot()
+    ker2.plot()
+    ker3.plot()
+    #pb.savefig("Figures/tuto_kern_overview_basicdef.png")
+
+    kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1),  GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
+    kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
+
+    pb.figure(figsize=(16,12))
+    pb.subplots_adjust(wspace=.5, hspace=.5)
+    for i, kern in enumerate(kernels):
+       pb.subplot(3,4,i+1)
+       kern.plot(x=7.5,plot_limits=[0.00001,15.])
+       pb.title(kernel_names[i]+ '\n')
+
+    # actual plot for the noise
+    i = 11
+    X = np.linspace(0.,15.,201)
+    WN = 0*X
+    WN[100] = 1.
+    pb.subplot(3,4,i+1)
+    pb.plot(X,WN,'b')
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 9de79a8c..24e10528 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -2,7 +2,7 @@
 Gaussian process regression tutorial
 *************************************
 
-We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_GP_regression.py.
+We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be obtained at GPy/examples/tutorials.py, or by running ``GPy.examples.tutorials.tuto_GP_regression()``.
 
 We first import the libraries we will need: ::
 
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index e410696a..dfb7fb3f 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -2,7 +2,7 @@
 ****************************
 tutorial : A kernel overview
 ****************************
-The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_kernel_overview.py.
+The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be obtained at GPy/examples/tutorials.py or by running ``GPy.examples.tutorials.tuto_kernel_overview()``.
 
 First we import the libraries we will need ::
 

From 6a330db25336dad8c2668fff0cb7fc8430be41e0 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 13:26:39 +0000
Subject: [PATCH 053/105] americanized spellings

---
 GPy/likelihoods/Gaussian.py         |   4 +-
 GPy/models/GP.py                    |  15 ++--
 GPy/models/sparse_GP.py             |   4 +-
 GPy/models/uncollapsed_sparse_GP.py |   2 +-
 GPy/util/Tango.py                   | 132 ++++++++++++++--------------
 GPy/util/datasets.py                |   2 +-
 GPy/util/plot.py                    |   2 +-
 7 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index 4a32f066..a5084cc0 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -8,7 +8,7 @@ class Gaussian(likelihood):
         self.Z = 0. # a correction factor which accounts for the approximation made
         N, self.D = data.shape
 
-        #normalisation
+        #normaliztion
         if normalize:
             self._mean = data.mean(0)[None,:]
             self._std = data.std(0)[None,:]
@@ -45,7 +45,7 @@ class Gaussian(likelihood):
 
     def predictive_values(self,mu,var):
         """
-        Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
+        Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
         """
         mean = mu*self._std + self._mean
         true_var = (var + self._variance)*self._std**2
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 5879a2bf..796ab7d6 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -30,7 +30,6 @@ class GP(model):
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
     """
-    #FIXME normalize vs normalise
     def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
 
         # parse arguments
@@ -41,7 +40,7 @@ class GP(model):
         assert isinstance(kernel, kern.kern)
         self.kern = kernel
 
-        #here's some simple normalisation for the inputs
+        #here's some simple normalization for the inputs
         if normalize_X:
             self._Xmean = X.mean(0)[None,:]
             self._Xstd = X.std(0)[None,:]
@@ -134,7 +133,7 @@ class GP(model):
     def _raw_predict(self,_Xnew,slices=None, full_cov=False):
         """
         Internal helper function for making predictions, does not account
-        for normalisation or likelihood
+        for normalization or likelihood
         """
         Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
         mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
@@ -172,10 +171,10 @@ class GP(model):
              - If a list of booleans, specifying which kernel parts are active
 
            If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
-           This is to allow for different normalisations of the output dimensions.
+           This is to allow for different normalizations of the output dimensions.
 
         """
-        #normalise X values
+        #normalize X values
         Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
         mu, var = self._raw_predict(Xnew, slices, full_cov)
 
@@ -187,7 +186,7 @@ class GP(model):
 
     def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
         """
-        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
+        Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
 
         :param samples: the number of a posteriori samples to plot
         :param which_data: which if the training data to plot (default all)
@@ -203,7 +202,7 @@ class GP(model):
           - In higher dimensions, we've no implemented this yet !TODO!
 
         Can plot only part of the data and part of the posterior functions using which_data and which_functions
-        Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
+        Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
         """
         if which_functions=='all':
             which_functions = [True]*self.kern.Nparts
@@ -221,7 +220,7 @@ class GP(model):
                 Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
                 gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
                 for i in range(samples):
-                    pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
+                    pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
             pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
             pb.xlim(xmin,xmax)
             ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index ff00faea..acf4f6c0 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -54,7 +54,7 @@ class sparse_GP(GP):
 
         GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
 
-        #normalise X uncertainty also
+        #normalize X uncertainty also
         if self.has_uncertain_inputs:
             self.X_uncertainty /= np.square(self._Xstd)
 
@@ -228,7 +228,7 @@ class sparse_GP(GP):
         return dL_dZ
 
     def _raw_predict(self, Xnew, slices, full_cov=False):
-        """Internal helper function for making predictions, does not account for normalisation"""
+        """Internal helper function for making predictions, does not account for normalization"""
 
         Kx = self.kern.K(self.Z, Xnew)
         mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 43624e72..d2638784 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -93,7 +93,7 @@ class uncollapsed_sparse_GP(sparse_GP):
         return A+B+C+D+E
 
     def _raw_predict(self, Xnew, slices,full_cov=False):
-        """Internal helper function for making predictions, does not account for normalisation"""
+        """Internal helper function for making predictions, does not account for normalization"""
         Kx = self.kern.K(Xnew,self.Z)
         mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
 
diff --git a/GPy/util/Tango.py b/GPy/util/Tango.py
index 8035ffe6..eeb2e075 100644
--- a/GPy/util/Tango.py
+++ b/GPy/util/Tango.py
@@ -25,7 +25,7 @@ def fewerXticks(ax=None,divideby=2):
     ax.set_xticks(ax.get_xticks()[::divideby])
 
 
-coloursHex = {\
+colorsHex = {\
 "Aluminium6":"#2e3436",\
 "Aluminium5":"#555753",\
 "Aluminium4":"#888a85",\
@@ -54,9 +54,9 @@ coloursHex = {\
 "mediumButter":"#edd400",\
 "darkButter":"#c4a000"}
 
-darkList = [coloursHex['darkBlue'],coloursHex['darkRed'],coloursHex['darkGreen'], coloursHex['darkOrange'], coloursHex['darkButter'], coloursHex['darkPurple'], coloursHex['darkChocolate'], coloursHex['Aluminium6']]
-mediumList = [coloursHex['mediumBlue'], coloursHex['mediumRed'],coloursHex['mediumGreen'], coloursHex['mediumOrange'], coloursHex['mediumButter'], coloursHex['mediumPurple'], coloursHex['mediumChocolate'], coloursHex['Aluminium5']]
-lightList = [coloursHex['lightBlue'], coloursHex['lightRed'],coloursHex['lightGreen'], coloursHex['lightOrange'], coloursHex['lightButter'], coloursHex['lightPurple'], coloursHex['lightChocolate'], coloursHex['Aluminium4']]
+darkList = [colorsHex['darkBlue'],colorsHex['darkRed'],colorsHex['darkGreen'], colorsHex['darkOrange'], colorsHex['darkButter'], colorsHex['darkPurple'], colorsHex['darkChocolate'], colorsHex['Aluminium6']]
+mediumList = [colorsHex['mediumBlue'], colorsHex['mediumRed'],colorsHex['mediumGreen'], colorsHex['mediumOrange'], colorsHex['mediumButter'], colorsHex['mediumPurple'], colorsHex['mediumChocolate'], colorsHex['Aluminium5']]
+lightList = [colorsHex['lightBlue'], colorsHex['lightRed'],colorsHex['lightGreen'], colorsHex['lightOrange'], colorsHex['lightButter'], colorsHex['lightPurple'], colorsHex['lightChocolate'], colorsHex['Aluminium4']]
 
 def currentDark():
     return darkList[-1]
@@ -76,85 +76,85 @@ def nextLight():
     return lightList[-1]
 
 def reset():
-    while not darkList[0]==coloursHex['darkBlue']:
+    while not darkList[0]==colorsHex['darkBlue']:
         darkList.append(darkList.pop(0))
-    while not mediumList[0]==coloursHex['mediumBlue']:
+    while not mediumList[0]==colorsHex['mediumBlue']:
         mediumList.append(mediumList.pop(0))
-    while not lightList[0]==coloursHex['lightBlue']:
+    while not lightList[0]==colorsHex['lightBlue']:
         lightList.append(lightList.pop(0))
 
 def setLightFigures():
-    mpl.rcParams['axes.edgecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['axes.facecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['axes.labelcolor']=coloursHex['Aluminium6']
-    mpl.rcParams['figure.edgecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['figure.facecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['grid.color']=coloursHex['Aluminium6']
-    mpl.rcParams['savefig.edgecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['savefig.facecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['text.color']=coloursHex['Aluminium6']
-    mpl.rcParams['xtick.color']=coloursHex['Aluminium6']
-    mpl.rcParams['ytick.color']=coloursHex['Aluminium6']
+    mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['axes.facecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium6']
+    mpl.rcParams['figure.edgecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['figure.facecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['grid.color']=colorsHex['Aluminium6']
+    mpl.rcParams['savefig.edgecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['savefig.facecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['text.color']=colorsHex['Aluminium6']
+    mpl.rcParams['xtick.color']=colorsHex['Aluminium6']
+    mpl.rcParams['ytick.color']=colorsHex['Aluminium6']
 
 def setDarkFigures():
-    mpl.rcParams['axes.edgecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['axes.facecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['axes.labelcolor']=coloursHex['Aluminium2']
-    mpl.rcParams['figure.edgecolor']=coloursHex['Aluminium2']
-    mpl.rcParams['figure.facecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['grid.color']=coloursHex['Aluminium2']
-    mpl.rcParams['savefig.edgecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['savefig.facecolor']=coloursHex['Aluminium6']
-    mpl.rcParams['text.color']=coloursHex['Aluminium2']
-    mpl.rcParams['xtick.color']=coloursHex['Aluminium2']
-    mpl.rcParams['ytick.color']=coloursHex['Aluminium2']
+    mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['axes.facecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium2']
+    mpl.rcParams['figure.edgecolor']=colorsHex['Aluminium2']
+    mpl.rcParams['figure.facecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['grid.color']=colorsHex['Aluminium2']
+    mpl.rcParams['savefig.edgecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['savefig.facecolor']=colorsHex['Aluminium6']
+    mpl.rcParams['text.color']=colorsHex['Aluminium2']
+    mpl.rcParams['xtick.color']=colorsHex['Aluminium2']
+    mpl.rcParams['ytick.color']=colorsHex['Aluminium2']
 
 def hex2rgb(hexcolor):
     hexcolor = [hexcolor[1+2*i:1+2*(i+1)] for i in range(3)]
     r,g,b = [int(n,16) for n in hexcolor]
     return (r,g,b)
 
-coloursRGB = dict([(k,hex2rgb(i)) for k,i in coloursHex.items()])
+colorsRGB = dict([(k,hex2rgb(i)) for k,i in colorsHex.items()])
 
-cdict_RB = {'red' :((0.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][0]/256.),
-                     (.5,coloursRGB['mediumPurple'][0]/256.,coloursRGB['mediumPurple'][0]/256.),
-                     (1.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.)),
-            'green':((0.,coloursRGB['mediumRed'][1]/256.,coloursRGB['mediumRed'][1]/256.),
-                     (.5,coloursRGB['mediumPurple'][1]/256.,coloursRGB['mediumPurple'][1]/256.),
-                     (1.,coloursRGB['mediumBlue'][1]/256.,coloursRGB['mediumBlue'][1]/256.)),
-            'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.),
-                      (.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.),
-                      (1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))}
+cdict_RB = {'red' :((0.,colorsRGB['mediumRed'][0]/256.,colorsRGB['mediumRed'][0]/256.),
+                     (.5,colorsRGB['mediumPurple'][0]/256.,colorsRGB['mediumPurple'][0]/256.),
+                     (1.,colorsRGB['mediumBlue'][0]/256.,colorsRGB['mediumBlue'][0]/256.)),
+            'green':((0.,colorsRGB['mediumRed'][1]/256.,colorsRGB['mediumRed'][1]/256.),
+                     (.5,colorsRGB['mediumPurple'][1]/256.,colorsRGB['mediumPurple'][1]/256.),
+                     (1.,colorsRGB['mediumBlue'][1]/256.,colorsRGB['mediumBlue'][1]/256.)),
+            'blue':((0.,colorsRGB['mediumRed'][2]/256.,colorsRGB['mediumRed'][2]/256.),
+                      (.5,colorsRGB['mediumPurple'][2]/256.,colorsRGB['mediumPurple'][2]/256.),
+                      (1.,colorsRGB['mediumBlue'][2]/256.,colorsRGB['mediumBlue'][2]/256.))}
 
-cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.),
-                     (.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.),
-                     (1.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][0]/256.)),
-            'green':((0.,coloursRGB['mediumBlue'][1]/256.,coloursRGB['mediumBlue'][1]/256.),
-                     (.5,coloursRGB['mediumGreen'][1]/256.,coloursRGB['mediumGreen'][1]/256.),
-                     (1.,coloursRGB['mediumRed'][1]/256.,coloursRGB['mediumRed'][1]/256.)),
-            'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.),
-                      (.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.),
-                      (1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))}
+cdict_BGR = {'red' :((0.,colorsRGB['mediumBlue'][0]/256.,colorsRGB['mediumBlue'][0]/256.),
+                     (.5,colorsRGB['mediumGreen'][0]/256.,colorsRGB['mediumGreen'][0]/256.),
+                     (1.,colorsRGB['mediumRed'][0]/256.,colorsRGB['mediumRed'][0]/256.)),
+            'green':((0.,colorsRGB['mediumBlue'][1]/256.,colorsRGB['mediumBlue'][1]/256.),
+                     (.5,colorsRGB['mediumGreen'][1]/256.,colorsRGB['mediumGreen'][1]/256.),
+                     (1.,colorsRGB['mediumRed'][1]/256.,colorsRGB['mediumRed'][1]/256.)),
+            'blue':((0.,colorsRGB['mediumBlue'][2]/256.,colorsRGB['mediumBlue'][2]/256.),
+                      (.5,colorsRGB['mediumGreen'][2]/256.,colorsRGB['mediumGreen'][2]/256.),
+                      (1.,colorsRGB['mediumRed'][2]/256.,colorsRGB['mediumRed'][2]/256.))}
 
 
-cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.),
-                     (1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.),
-                     (2./5,coloursRGB['Aluminium3'][0]/256.,coloursRGB['Aluminium3'][0]/256.),
-                     (3./5,coloursRGB['Aluminium4'][0]/256.,coloursRGB['Aluminium4'][0]/256.),
-                     (4./5,coloursRGB['Aluminium5'][0]/256.,coloursRGB['Aluminium5'][0]/256.),
-                     (5./5,coloursRGB['Aluminium6'][0]/256.,coloursRGB['Aluminium6'][0]/256.)),
-           'green' :((0./5,coloursRGB['Aluminium1'][1]/256.,coloursRGB['Aluminium1'][1]/256.),
-                     (1./5,coloursRGB['Aluminium2'][1]/256.,coloursRGB['Aluminium2'][1]/256.),
-                     (2./5,coloursRGB['Aluminium3'][1]/256.,coloursRGB['Aluminium3'][1]/256.),
-                     (3./5,coloursRGB['Aluminium4'][1]/256.,coloursRGB['Aluminium4'][1]/256.),
-                     (4./5,coloursRGB['Aluminium5'][1]/256.,coloursRGB['Aluminium5'][1]/256.),
-                     (5./5,coloursRGB['Aluminium6'][1]/256.,coloursRGB['Aluminium6'][1]/256.)),
-            'blue' :((0./5,coloursRGB['Aluminium1'][2]/256.,coloursRGB['Aluminium1'][2]/256.),
-                     (1./5,coloursRGB['Aluminium2'][2]/256.,coloursRGB['Aluminium2'][2]/256.),
-                     (2./5,coloursRGB['Aluminium3'][2]/256.,coloursRGB['Aluminium3'][2]/256.),
-                     (3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.),
-                     (4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.),
-                     (5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))}
+cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1'][0]/256.),
+                     (1./5,colorsRGB['Aluminium2'][0]/256.,colorsRGB['Aluminium2'][0]/256.),
+                     (2./5,colorsRGB['Aluminium3'][0]/256.,colorsRGB['Aluminium3'][0]/256.),
+                     (3./5,colorsRGB['Aluminium4'][0]/256.,colorsRGB['Aluminium4'][0]/256.),
+                     (4./5,colorsRGB['Aluminium5'][0]/256.,colorsRGB['Aluminium5'][0]/256.),
+                     (5./5,colorsRGB['Aluminium6'][0]/256.,colorsRGB['Aluminium6'][0]/256.)),
+           'green' :((0./5,colorsRGB['Aluminium1'][1]/256.,colorsRGB['Aluminium1'][1]/256.),
+                     (1./5,colorsRGB['Aluminium2'][1]/256.,colorsRGB['Aluminium2'][1]/256.),
+                     (2./5,colorsRGB['Aluminium3'][1]/256.,colorsRGB['Aluminium3'][1]/256.),
+                     (3./5,colorsRGB['Aluminium4'][1]/256.,colorsRGB['Aluminium4'][1]/256.),
+                     (4./5,colorsRGB['Aluminium5'][1]/256.,colorsRGB['Aluminium5'][1]/256.),
+                     (5./5,colorsRGB['Aluminium6'][1]/256.,colorsRGB['Aluminium6'][1]/256.)),
+            'blue' :((0./5,colorsRGB['Aluminium1'][2]/256.,colorsRGB['Aluminium1'][2]/256.),
+                     (1./5,colorsRGB['Aluminium2'][2]/256.,colorsRGB['Aluminium2'][2]/256.),
+                     (2./5,colorsRGB['Aluminium3'][2]/256.,colorsRGB['Aluminium3'][2]/256.),
+                     (3./5,colorsRGB['Aluminium4'][2]/256.,colorsRGB['Aluminium4'][2]/256.),
+                     (4./5,colorsRGB['Aluminium5'][2]/256.,colorsRGB['Aluminium5'][2]/256.),
+                     (5./5,colorsRGB['Aluminium6'][2]/256.,colorsRGB['Aluminium6'][2]/256.))}
 # cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
 # cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
 # cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 5506fbef..a8ec2539 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -46,7 +46,7 @@ def oil_100(seed=default_seed):
     return {'X': X, 'Y': Y, 'info': "Subsample of the oil data extracting 100 values randomly without replacement."}
 
 def pumadyn(seed=default_seed):
-    # Data is variance 1, no need to normalise.
+    # Data is variance 1, no need to normalize.
     data = np.loadtxt(os.path.join(data_path, 'pumadyn-32nm/Dataset.data.gz'))
     indices = np.random.permutation(data.shape[0])
     indicesTrain = indices[0:7168]
diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index 8e71764d..295047b1 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -6,7 +6,7 @@ import Tango
 import pylab as pb
 import numpy as np
 
-def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
+def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],axes=None,**kwargs):
     if axes is None:
         axes = pb.gca()
     mu = mu.flatten()

From 417dac0080a8f8699d3d6e15662d94c84a783d59 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 13:54:47 +0000
Subject: [PATCH 054/105] Adding testing file for examples

---
 GPy/testing/examples_tests.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 GPy/testing/examples_tests.py

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
new file mode 100644
index 00000000..dd85ea34
--- /dev/null
+++ b/GPy/testing/examples_tests.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class ExamplesTests(unittest.TestCase):
+    def test_check_model_returned(self):
+        pass
+
+    def test_model_checkgrads(self):
+        pass
+
+    def test_all_examples(self):
+        #Load models
+
+        #Loop through models
+        for model in models:
+
+            self.assertTrue(m.checkgrad())
+
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()

From 84119a19b3e82d84d201399404758fb9c8396839 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 13:55:43 +0000
Subject: [PATCH 055/105] Skipping tests

---
 GPy/testing/examples_tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index dd85ea34..25cfad04 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -13,12 +13,12 @@ class ExamplesTests(unittest.TestCase):
         pass
 
     def test_all_examples(self):
+        pass
         #Load models
 
         #Loop through models
-        for model in models:
-
-            self.assertTrue(m.checkgrad())
+        #for model in models:
+            #self.assertTrue(m.checkgrad())
 
 
 if __name__ == "__main__":

From 05ca5cfe6d3788f065da2e8f420fe5ba301a021a Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 14:03:23 +0000
Subject: [PATCH 056/105] working on psi cross terms

---
 GPy/kern/kern.py            |  8 ++++----
 GPy/testing/bgplvm_tests.py | 26 ++++++++++++++++++++++++++
 setup.py                    |  2 --
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 87e67f33..8cadf662 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -444,9 +444,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target)
+                p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S,target)
+                p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S,target)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -471,9 +471,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
+                p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
+                p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index e3bd2b36..80e6fecd 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -45,6 +45,32 @@ class BGPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
+    def test_rbf_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        m.constrain_positive('(rbf|bias|noise|white|S)')
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_linear_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.linear(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        m.constrain_positive('(linear|bias|noise|white|S)')
+        m.randomize()
+        self.assertTrue(m.checkgrad())        
+
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
diff --git a/setup.py b/setup.py
index b701b74d..ca193fbc 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,5 @@ setup(name = 'GPy',
       #setup_requires=['sphinx'],
       #cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[
-      "Development Status :: 1 - Alpha",
-      "Topic :: Machine Learning",
       "License :: OSI Approved :: BSD License"],
       )

From f23e3bbf7cf2d21cb56db0829906d9503a2117b7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 14:04:07 +0000
Subject: [PATCH 057/105] a small demo for model tutorial

---
 GPy/examples/tutorials.py |  9 ++++++++-
 doc/GPy.examples.rst      | 14 +++-----------
 doc/GPy.rst               |  9 +++++++++
 doc/index.rst             |  3 ++-
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index be550e01..9d892b8e 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -90,7 +90,7 @@ def tuto_kernel_overview():
 
     # Sum of kernels
     k_add = k1.add(k2)
-    k_addorth = k1.add_orthogonal(k2)    
+    k_addorth = k1.add_orthogonal(k2)
 
     pb.figure(figsize=(8,8))
     pb.subplot(2,2,1)
@@ -199,3 +199,10 @@ def tuto_kernel_overview():
     WN[100] = 1.
     pb.subplot(3,4,i+1)
     pb.plot(X,WN,'b')
+
+def model_interaction():
+    X = np.random.randn(20,1)
+    Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
+    k = GPy.kern.rbf(1) + GPy.kern.bias(1)
+    return GPy.models.GP_regression(X,Y,kernel=k)
+
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index ec283d21..d369de41 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -73,18 +73,10 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`tuto_GP_regression` Module
---------------------------------
+:mod:`tutorials` Module
+-----------------------
 
-.. automodule:: GPy.examples.tuto_GP_regression
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`tuto_kernel_overview` Module
-----------------------------------
-
-.. automodule:: GPy.examples.tuto_kernel_overview
+.. automodule:: GPy.examples.tutorials
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 3fd4bcfd..242a22bc 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -9,6 +9,14 @@ GPy Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`test_coreg` Module
+------------------------
+
+.. automodule:: GPy.test_coreg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Subpackages
 -----------
 
@@ -20,5 +28,6 @@ Subpackages
     GPy.kern
     GPy.likelihoods
     GPy.models
+    GPy.testing
     GPy.util
 
diff --git a/doc/index.rst b/doc/index.rst
index b62ff6a7..5066278f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -8,9 +8,10 @@ Welcome to GPy's documentation!
 For a quick start, you can have a look at one of the tutorials:
 
 * `Basic Gaussian process regression <tuto_GP_regression.html>`_  
+* `Interacting with models <tuto_interacting_with_models.html>`_
 * `A kernel overview <tuto_kernel_overview.html>`_ 
 * Advanced GP regression (Forthcoming)
-* Writting kernels (Forthcoming)
+* Writing kernels (Forthcoming)
 
 You may also be interested by some examples in the GPy/examples folder.
 

From a54e9bb82666d322551bbca7e222470af18b9a01 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 14:05:56 +0000
Subject: [PATCH 058/105] examples directory organized.

---
 GPy/examples/BGPLVM_demo.py                   | 37 --------
 GPy/examples/__init__.py                      |  5 +-
 GPy/examples/classification.py                | 77 +++++++++++++++
 GPy/examples/dimensionality_reduction.py      | 56 +++++++++++
 GPy/examples/{poisson.py => non_gaussian.py}  |  2 +-
 GPy/examples/oil_flow_demo.py                 | 57 -----------
 GPy/examples/regression.py                    | 80 ++++++++++++++--
 GPy/examples/sparse_GPLVM_demo.py             | 30 ------
 GPy/examples/sparse_GP_regression_demo.py     | 64 -------------
 GPy/examples/sparse_ep_fix.py                 | 95 -------------------
 .../uncertain_input_GP_regression_demo.py     | 27 ------
 GPy/examples/uncollapsed_GP_demo.py           | 32 -------
 GPy/examples/unsupervised.py                  | 25 -----
 13 files changed, 208 insertions(+), 379 deletions(-)
 delete mode 100644 GPy/examples/BGPLVM_demo.py
 create mode 100644 GPy/examples/dimensionality_reduction.py
 rename GPy/examples/{poisson.py => non_gaussian.py} (96%)
 delete mode 100644 GPy/examples/oil_flow_demo.py
 delete mode 100644 GPy/examples/sparse_GPLVM_demo.py
 delete mode 100644 GPy/examples/sparse_GP_regression_demo.py
 delete mode 100644 GPy/examples/sparse_ep_fix.py
 delete mode 100644 GPy/examples/uncertain_input_GP_regression_demo.py
 delete mode 100644 GPy/examples/uncollapsed_GP_demo.py
 delete mode 100644 GPy/examples/unsupervised.py

diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
deleted file mode 100644
index e92856ab..00000000
--- a/GPy/examples/BGPLVM_demo.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import numpy as np
-import pylab as pb
-import GPy
-np.random.seed(123344)
-
-N = 10
-M = 3
-Q = 2
-D = 4
-#generate GPLVM-like data
-X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
-K = k.K(X)
-Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-
-k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
-# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
-# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-# k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
-
-m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-m.constrain_positive('(rbf|bias|noise|white|S)')
-# m.constrain_fixed('S', 1)
-
-# pb.figure()
-# m.plot()
-# pb.title('PCA initialisation')
-# pb.figure()
-# m.optimize(messages = 1)
-# m.plot()
-# pb.title('After optimisation')
-m.ensure_default_constraints()
-m.randomize()
-m.checkgrad(verbose = 1)
diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 2f3cf0f4..93994175 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -1,8 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
-# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
 import classification
 import regression
-import unsupervised
+import dimensionality_reduction
+import non_gaussian
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 031cc915..c29b8084 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -107,3 +107,80 @@ def toy_linear_1d_classification(seed=default_seed):
     print(m)
 
     return m
+
+def sparse_toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
+    Y = data['Y'][:, 0:1]
+    Y[Y == -1] = 0
+
+    # Kernel object
+    kernel = GPy.kern.rbf(1)
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
+
+    Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
+
+    # Model definition
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+
+    m.ensure_default_constraints()
+    # Optimize
+    m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize()
+    #m.EPEM() #FIXME
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f()
+    pb.subplot(212)
+    m.plot()
+    print(m)
+
+    return m
+
+def sparse_crescent_data(inducing=10, seed=default_seed):
+    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+
+    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+    :param seed : seed value for data generation.
+    :type seed: int
+    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
+    :type inducing: int
+    """
+
+    data = GPy.util.datasets.crescent_data(seed=seed)
+
+    # Kernel object
+    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
+
+    sample = np.random.randint(0,data['X'].shape[0],inducing)
+    Z = data['X'][sample,:]
+    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
+
+    # create sparse GP EP model
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+    m.ensure_default_constraints()
+    m.set('len',10.)
+
+    m.update_likelihood_approximation()
+
+    # optimize
+    m.optimize()
+    print(m)
+
+    # plot
+    m.plot()
+    return m
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
new file mode 100644
index 00000000..513d30d1
--- /dev/null
+++ b/GPy/examples/dimensionality_reduction.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+import GPy
+
+default_seed = np.random.seed(123344)
+
+def BGPLVM(seed = default_seed):
+    N = 10
+    M = 3
+    Q = 2
+    D = 4
+    #generate GPLVM-like data
+    X = np.random.rand(N, Q)
+    k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+    K = k.K(X)
+    Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+
+    k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
+
+    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+    m.constrain_positive('(rbf|bias|noise|white|S)')
+    # m.constrain_fixed('S', 1)
+
+    # pb.figure()
+    # m.plot()
+    # pb.title('PCA initialisation')
+    # pb.figure()
+    # m.optimize(messages = 1)
+    # m.plot()
+    # pb.title('After optimisation')
+    m.ensure_default_constraints()
+    m.randomize()
+    m.checkgrad(verbose = 1)
+
+    return m
+
+def GPLVM_oil_100():
+    data = GPy.util.datasets.oil_100()
+
+    # create simple GP model
+    m = GPy.models.GPLVM(data['X'], 2)
+
+
+    # optimize
+    m.ensure_default_constraints()
+    m.optimize()
+
+    # plot
+    print(m)
+    return m
diff --git a/GPy/examples/poisson.py b/GPy/examples/non_gaussian.py
similarity index 96%
rename from GPy/examples/poisson.py
rename to GPy/examples/non_gaussian.py
index ce68e921..e893ec2c 100644
--- a/GPy/examples/poisson.py
+++ b/GPy/examples/non_gaussian.py
@@ -11,7 +11,7 @@ import GPy
 
 default_seed=10000
 
-def  toy_1d(seed=default_seed):
+def  toy_poisson_1d(seed=default_seed):
     """
     Simple 1D classification example
     :param seed : seed value for data generation (default is 4).
diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py
deleted file mode 100644
index 1e9f4f5a..00000000
--- a/GPy/examples/oil_flow_demo.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import cPickle as pickle
-import numpy as np
-import pylab as pb
-import GPy
-import pylab as plt
-np.random.seed(3)
-
-def plot_oil(X, theta, labels, label):
-    plt.figure()
-    X = X[:,np.argsort(theta)[:2]]
-    flow_type = (X[labels[:,0]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
-    flow_type = (X[labels[:,1]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
-    flow_type = (X[labels[:,2]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
-    plt.title(label)
-
-data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
-
-Y = data['DataTrn']
-N, D = Y.shape
-selected = np.random.permutation(N)[:350]
-labels = data['DataTrnLbls'][selected]
-Y = Y[selected]
-N, D = Y.shape
-Y -= Y.mean(axis=0)
-# Y /= Y.std(axis=0)
-
-Q = 5
-k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
-m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
-m.constrain_positive('(rbf|bias|S|linear|white|noise)')
-
-# m.unconstrain('noise')
-# m.constrain_fixed('noise_precision', 50.0)
-# m.unconstrain('white')
-# m.constrain_bounded('white', 1e-6, 10.0)
-# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
-#m.optimize(messages = True)
-# m.optimize('tnc', messages = True)
-# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
-# # pb.figure()
-# m.plot()
-# pb.title('PCA initialisation')
-# pb.figure()
-# m.optimize(messages = 1)
-# m.plot()
-# pb.title('After optimisation')
-# m = GPy.models.GPLVM(Y, Q)
-# m.constrain_positive('(white|rbf|bias|noise)')
-# m.optimize()
-# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index d3442504..7ad23d24 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -108,9 +108,6 @@ def coregionalisation_toy2():
     pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
     return m
 
-
-
-
 def coregionalisation_toy():
     """
     A simple demonstration of coregionalisation on two sinusoidal functions
@@ -211,7 +208,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
 
     xlim = ax.get_xlim()
     ylim = ax.get_ylim()
-    
+
     # Now run a few optimizations
     models = []
     optim_point_x = np.empty(2)
@@ -219,18 +216,18 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
     np.random.seed(seed=seed)
     for i in range(0, model_restarts):
         kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
-        
+
         m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
         optim_point_x[0] = m.get('rbf_lengthscale')
         optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
-        
+
         # optimize
         m.ensure_default_constraints()
         m.optimize(xtol=1e-6,ftol=1e-6)
 
         optim_point_x[1] = m.get('rbf_lengthscale')
         optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
-        
+
         pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
         models.append(m)
 
@@ -264,7 +261,7 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
             total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
             noise_var *= total_var
             signal_var *= total_var
-            
+
             kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
 
             model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
@@ -273,3 +270,70 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
         lls.append(length_scale_lls)
     return np.array(lls)
 
+def sparse_GP_regression_1D(N = 400, M = 5):
+    """Run a 1D example of a sparse GP regression."""
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(N,1))
+    Y = np.sin(X)+np.random.randn(N,1)*0.05
+    # construct kernel
+    rbf =  GPy.kern.rbf(1)
+    noise = GPy.kern.white(1)
+    kernel = rbf + noise
+    # create simple GP model
+    m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
+
+    m.constrain_positive('(variance|lengthscale|precision)')
+
+    m.checkgrad(verbose=1)
+    m.optimize('tnc', messages = 1)
+    m.plot()
+    return m
+
+def sparse_GP_regression_2D(N = 400, M = 50):
+    """Run a 2D example of a sparse GP regression."""
+    X = np.random.uniform(-3.,3.,(N,2))
+    Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
+
+    # construct kernel
+    rbf =  GPy.kern.rbf(2)
+    noise = GPy.kern.white(2)
+    kernel = rbf + noise
+
+    # create simple GP model
+    m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
+
+    # contrain all parameters to be positive (but not inducing inputs)
+    m.constrain_positive('(variance|lengthscale|precision)')
+    m.set('len',2.)
+
+    m.checkgrad()
+
+    # optimize and plot
+    pb.figure()
+    m.optimize('tnc', messages = 1)
+    m.plot()
+    print(m)
+    return m
+
+def uncertain_inputs_sparse_regression():
+    """Run a 1D example of a sparse GP regression with uncertain inputs."""
+    # sample inputs and outputs
+    S = np.ones((20,1))
+    X = np.random.uniform(-3.,3.,(20,1))
+    Y = np.sin(X)+np.random.randn(20,1)*0.05
+    likelihood = GPy.likelihoods.Gaussian(Y)
+    Z = np.random.uniform(-3.,3.,(7,1))
+
+    k = GPy.kern.rbf(1) + GPy.kern.white(1)
+
+    # create simple GP model
+    m = GPy.models.sparse_GP(X, likelihood, kernel=k, Z=Z, X_uncertainty=S)
+
+    # contrain all parameters to be positive
+    m.constrain_positive('(variance|prec)')
+
+    # optimize and plot
+    m.optimize('tnc', max_f_eval = 1000, messages=1)
+    m.plot()
+    print(m)
+    return m
diff --git a/GPy/examples/sparse_GPLVM_demo.py b/GPy/examples/sparse_GPLVM_demo.py
deleted file mode 100644
index 5df72b8d..00000000
--- a/GPy/examples/sparse_GPLVM_demo.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-import GPy
-np.random.seed(1)
-print "sparse GPLVM with RBF kernel"
-
-N = 100
-M = 8
-Q = 1
-D = 2
-#generate GPLVM-like data
-X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
-K = k.K(X)
-Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-
-m = GPy.models.sparse_GPLVM(Y, Q, M=M)
-m.constrain_positive('(rbf|bias|noise|white)')
-
-pb.figure()
-m.plot()
-pb.title('PCA initialisation')
-pb.figure()
-m.optimize(messages = 1)
-m.plot()
-pb.title('After optimisation')
diff --git a/GPy/examples/sparse_GP_regression_demo.py b/GPy/examples/sparse_GP_regression_demo.py
deleted file mode 100644
index 808d943f..00000000
--- a/GPy/examples/sparse_GP_regression_demo.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-pb.ion()
-N = 400
-M = 5
-
-######################################
-## 1 dimensional example
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(N,1))
-Y = np.sin(X)+np.random.randn(N,1)*0.05
-
-# construct kernel
-rbf =  GPy.kern.rbf(1)
-noise = GPy.kern.white(1)
-kernel = rbf + noise
-
-# create simple GP model
-m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
-
-m.constrain_positive('(variance|lengthscale|precision)')
-
-m.checkgrad(verbose=1)
-m.optimize('tnc', messages = 1)
-m.plot()
-
-######################################
-## 2 dimensional example
-
-# # sample inputs and outputs
-# X = np.random.uniform(-3.,3.,(N,2))
-# Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
-
-# # construct kernel
-# rbf =  GPy.kern.rbf(2)
-# noise = GPy.kern.white(2)
-# kernel = rbf + noise
-
-# # create simple GP model
-# m2 = GPy.models.sparse_GP_regression(X,Y,kernel, M = 50)
-# create simple GP model
-
-# # contrain all parameters to be positive (but not inducing inputs)
-# m2.constrain_positive('(variance|lengthscale|precision)')
-
-# #check gradient FIXME unit test please
-# m2.checkgrad()
-
-# # optimize and plot
-# pb.figure()
-# m2.optimize('tnc', messages = 1)
-# m2.plot()
-# print(m2)
diff --git a/GPy/examples/sparse_ep_fix.py b/GPy/examples/sparse_ep_fix.py
deleted file mode 100644
index acbd506c..00000000
--- a/GPy/examples/sparse_ep_fix.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-N = 500
-M = 5
-
-default_seed=10000
-
-def crescent_data(inducing=10, seed=default_seed):
-    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
-
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
-    :param seed : seed value for data generation.
-    :type seed: int
-    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-    :type inducing: int
-    """
-
-    data = GPy.util.datasets.crescent_data(seed=seed)
-
-    # Kernel object
-    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
-
-    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
-
-    sample = np.random.randint(0,data['X'].shape[0],inducing)
-    Z = data['X'][sample,:]
-    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
-
-    # create sparse GP EP model
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
-    m.ensure_default_constraints()
-
-    m.update_likelihood_approximation()
-    print(m)
-
-    # optimize
-    m.optimize()
-    print(m)
-
-    # plot
-    m.plot()
-    return m
-
-
-def toy_linear_1d_classification(seed=default_seed):
-    """
-    Simple 1D classification example
-    :param seed : seed value for data generation (default is 4).
-    :type seed: int
-    """
-
-    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    Y = data['Y'][:, 0:1]
-    Y[Y == -1] = 0
-
-    # Kernel object
-    kernel = GPy.kern.rbf(1)
-
-    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(Y,distribution)
-
-    Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
-
-    # Model definition
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
-
-    m.ensure_default_constraints()
-    # Optimize
-    m.update_likelihood_approximation()
-    # Parameters optimization:
-    m.optimize()
-    #m.EPEM() #FIXME
-
-    # Plot
-    pb.subplot(211)
-    m.plot_f()
-    pb.subplot(212)
-    m.plot()
-    print(m)
-
-    return m
-
diff --git a/GPy/examples/uncertain_input_GP_regression_demo.py b/GPy/examples/uncertain_input_GP_regression_demo.py
deleted file mode 100644
index f0be5fe2..00000000
--- a/GPy/examples/uncertain_input_GP_regression_demo.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-pb.close('all')
-
-
-# sample inputs and outputs
-S = np.ones((20,1))
-X = np.random.uniform(-3.,3.,(20,1))
-Y = np.sin(X)+np.random.randn(20,1)*0.05
-
-k = GPy.kern.rbf(1) + GPy.kern.white(1)
-
-# create simple GP model
-m = GPy.models.sparse_GP_regression(X,Y,X_uncertainty=S,kernel=k)
-
-# contrain all parameters to be positive
-m.constrain_positive('(variance|prec)')
-
-# optimize and plot
-m.optimize('tnc', max_f_eval = 1000, messages=1)
-m.plot()
-print(m)
diff --git a/GPy/examples/uncollapsed_GP_demo.py b/GPy/examples/uncollapsed_GP_demo.py
deleted file mode 100644
index 5dc1ae1d..00000000
--- a/GPy/examples/uncollapsed_GP_demo.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel, 
-using the uncollapsed sparse GP (where the distribution of the 
-inducing points is explicitley represented)
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-pb.ion()
-N = 500
-M = 20
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(N,1))
-Y = np.sin(X)+np.random.randn(N,1)*0.05
-
-kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
-
-# create simple GP model
-m = GPy.models.uncollapsed_sparse_GP(X, Y, kernel=kernel, M=M)#, X_uncertainty=np.zeros_like(X)+0.01)
-
-# contrain all parameters to be positive
-m.ensure_default_constraints()
-m.checkgrad()
-# optimize and plot
-m.plot()
diff --git a/GPy/examples/unsupervised.py b/GPy/examples/unsupervised.py
deleted file mode 100644
index 08d81e05..00000000
--- a/GPy/examples/unsupervised.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-Usupervised learning with Gaussian Processes.
-"""
-import pylab as pb
-import numpy as np
-import GPy
-
-
-######################################
-## Oil data subsampled to 100 points.
-def oil_100():
-    data = GPy.util.datasets.oil_100()
-
-    # create simple GP model
-    m = GPy.models.GPLVM(data['X'], 2)
-
-
-    # optimize
-    m.ensure_default_constraints()
-    m.optimize()
-
-    # plot
-    print(m)
-    return m
-

From ea6da11aecdc62c40579fe5a2dae9aec4fce6458 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 14:26:26 +0000
Subject: [PATCH 059/105] Examples working

---
 GPy/examples/classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index c29b8084..77bd0b79 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -129,7 +129,8 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
     Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
 
     # Model definition
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=True)
+    m.set('len',.5)
 
     m.ensure_default_constraints()
     # Optimize

From 9b11424f1fa8baa4275395492c523a8ce827ed8f Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 14:28:20 +0000
Subject: [PATCH 060/105] implemented psi2 'cross terms' for rbfXbias

---
 GPy/kern/kern.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 8cadf662..b2970674 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -389,6 +389,11 @@ class kern(parameterised):
                 target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
             elif p2.name=='bias' and p1.name=='rbf':
                 target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
+            #linear X bias
+            elif p1.name=='bias' and p2.name=='linear':
+                raise NotImplementedError
+            elif p2.name=='bias' and p1.name=='linear':
+                raise NotImplementedError
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -396,7 +401,6 @@ class kern(parameterised):
                 raise NotImplementedError #TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
-
         return target
 
     def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
@@ -417,11 +421,11 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target[ps2])
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
             elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance,Z,mu,S,target[ps1])
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -444,9 +448,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target)
+                p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
             elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S,target)
+                p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -471,9 +475,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
             elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO

From d512e9a160968656278f9678e43292ef01965a06 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 14:40:02 +0000
Subject: [PATCH 061/105] temporarily removed a test (linear X bias)

---
 GPy/kern/linear.py          | 15 ++++++++-------
 GPy/testing/bgplvm_tests.py |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 7d817f62..ef6b72bb 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -81,6 +81,13 @@ class linear(kernpart):
             self._K_computations(X, X2)
             target += np.sum(self._dot_product*dL_dK)
 
+    def dKdiag_dtheta(self,dL_dKdiag, X, target):
+        tmp = dL_dKdiag[:,None]*X**2
+        if self.ARD:
+            target += tmp.sum(0)
+        else:
+            target += tmp.sum()
+
     def dK_dX(self,dL_dK,X,X2,target):
         target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
 
@@ -92,13 +99,6 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         target += np.sum(self.variances*self.mu2_S,1)
 
-    def dKdiag_dtheta(self,dL_dKdiag, X, target):
-        tmp = dL_dKdiag[:,None]*X**2
-        if self.ARD:
-            target += tmp.sum(0)
-        else:
-            target += tmp.sum()
-
     def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         tmp = dL_dpsi0[:, None] * self.mu2_S
@@ -134,6 +134,7 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
         target += psi2.sum(-1)
+        #TODO: this could be faster using np.tensordot
 
     def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index 80e6fecd..dda92b90 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -58,6 +58,7 @@ class BGPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
+    @unittest.skip('psi2 cross terms are NotImplemented for this combination')
     def test_linear_bias_kern(self):
         N, M, Q, D = 10, 3, 2, 4
         X = np.random.rand(N, Q)

From 4081a1526ae56976a19122b16d5a92c8606b36ab Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 14:47:48 +0000
Subject: [PATCH 062/105] attempted to make sparse models more stable through
 ordered multiplication

---
 GPy/models/sparse_GP.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f1439f76..f70938c9 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -103,8 +103,12 @@ class sparse_GP(GP):
 
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
-        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
+        tmp = np.dot(self.Lmi.T, self.LBi.T)
+        self.C = np.dot(tmp,tmp.T)
+        #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
+        #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
+        tmp = np.dot(self.C,self.psi1V/sf)
+        self.E = np.dot(tmp,tmp.T)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()

From e14576f5c9395ce2874109a2af0c5dcb8ff95a8c Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 14:53:09 +0000
Subject: [PATCH 063/105] Examples are working

---
 GPy/examples/regression.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 7ad23d24..6c22b68e 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -41,10 +41,6 @@ def rogers_girolami_olympics():
     print(m)
     return m
 
-def della_gatta_TRP63_gene_expression(number=942):
-    """Run a standard Gaussian process regression on the della Gatta et al TRP63 Gene Expression data set for a given gene number."""
-
-
 def toy_rbf_1d_50():
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d_50()
@@ -127,7 +123,7 @@ def coregionalisation_toy():
     m.constrain_fixed('rbf_var',1.)
     m.constrain_positive('kappa')
     m.ensure_default_constraints()
-    #m.optimize()
+    m.optimize()
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -155,7 +151,6 @@ def coregionalisation_sparse():
 
     M = 40
     Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
-    #Z = X.copy()
 
     k1 = GPy.kern.rbf(1)
     k2 = GPy.kern.coregionalise(2,2)
@@ -181,7 +176,6 @@ def coregionalisation_sparse():
     y = pb.ylim()[0]
     pb.plot(Z[:,0][Z[:,1]==0],np.zeros(np.sum(Z[:,1]==0))+y,'r|',mew=2)
     pb.plot(Z[:,0][Z[:,1]==1],np.zeros(np.sum(Z[:,1]==1))+y,'g|',mew=2)
-    print Z
     return m
 
 

From dfef8dc708f1374540aab5b296b127dc38159bed Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 14:54:11 +0000
Subject: [PATCH 064/105] setup.py requires nose now

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ca193fbc..b14c907e 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
       extras_require = {
         'docs':['Sphinx', 'ipython'],
       },

From d71aabceb50bd0a50f9105c569fd499adcfd2069 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 14:58:42 +0000
Subject: [PATCH 065/105] Changed example tests

---
 GPy/testing/examples_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 25cfad04..0e41a541 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -13,7 +13,7 @@ class ExamplesTests(unittest.TestCase):
         pass
 
     def test_all_examples(self):
-        pass
+        examples_module = __import__("GPy").examples
         #Load models
 
         #Loop through models

From 5b86fce1b3f206edfed2be5717f93fa1e925a023 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 16:46:47 +0000
Subject: [PATCH 066/105] changes tie_param to tie_params

---
 GPy/core/parameterised.py      |  2 +-
 GPy/examples/classification.py |  2 +-
 GPy/examples/tutorials.py      |  2 +-
 GPy/kern/kern.py               | 25 ++++++++++++++++++++++---
 GPy/testing/bgplvm_tests.py    |  2 +-
 GPy/testing/kernel_tests.py    |  2 +-
 6 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index 007f1b25..b5d880a3 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -56,7 +56,7 @@ class parameterised(object):
         return copy.deepcopy(self)
 
 
-    def tie_param(self, which):
+    def tie_params(self, which):
         matches = self.grep_param_names(which)
         assert matches.size > 0, "need at least something to tie together"
         if len(self.tied_indices):
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 77bd0b79..1d101d76 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -62,7 +62,7 @@ def oil():
 
     # Contrain all parameters to be positive
     m.constrain_positive('')
-    m.tie_param('lengthscale')
+    m.tie_params('lengthscale')
     m.update_likelihood_approximation()
 
     # Optimize
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index 9d892b8e..2bc9ba60 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -138,7 +138,7 @@ def tuto_kernel_overview():
 
     k.constrain_positive('var')
     k.constrain_fixed(np.array([1]),1.75)
-    k.tie_param('len')
+    k.tie_params('len')
     k.unconstrain('white')
     k.constrain_bounded('white',lower=1e-5,upper=.5)
     print k
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index b2970674..be45fa70 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -237,7 +237,7 @@ class kern(parameterised):
         for i in range(K1.Nparam + K2.Nparam):
             index = np.where(index_param==i)[0]
             if index.size > 1:
-                self.tie_param(index)
+                self.tie_params(index)
         for i in prev_constr_pos:
             self.constrain_positive(np.where(index_param==i)[0])
         for i in prev_constr_neg:
@@ -391,9 +391,13 @@ class kern(parameterised):
                 target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
             #linear X bias
             elif p1.name=='bias' and p2.name=='linear':
-                raise NotImplementedError
+                tmp = np.zeros((mu.shape[0],Z.shape[0]))
+                p2.psi1(Z,mu,S,tmp)
+                target += p1.variance*(tmp[:,:,None] + tmp[:,None,:])
             elif p2.name=='bias' and p1.name=='linear':
-                raise NotImplementedError
+                tmp = np.zeros((mu.shape[0],Z.shape[0]))
+                p1.psi1(Z,mu,S,tmp)
+                target += p2.variance*(tmp[:,:,None] + tmp[:,None,:])
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -426,6 +430,11 @@ class kern(parameterised):
             elif p2.name=='bias' and p1.name=='rbf':
                 p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
+            #linear X bias
+            elif p1.name=='bias' and p2.name=='linear':
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2., Z, mu, S, target[ps1])
+            elif p2.name=='bias' and p1.name=='linear':
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2., Z, mu, S, target[ps1])
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -451,6 +460,11 @@ class kern(parameterised):
                 p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
             elif p2.name=='bias' and p1.name=='rbf':
                 p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
+            #linear X bias
+            elif p1.name=='bias' and p2.name=='linear':
+                p2.dpsi1_dZ(dL_dpsi2.sum(1).T*p1.variance, Z, mu, S, target)
+            elif p2.name=='bias' and p1.name=='linear':
+                p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance, Z, mu, S, target)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -478,6 +492,11 @@ class kern(parameterised):
                 p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
             elif p2.name=='bias' and p1.name=='rbf':
                 p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
+            #linear X bias
+            elif p1.name=='bias' and p2.name=='linear':
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2., Z, mu, S, target_mu, target_S)
+            elif p2.name=='bias' and p1.name=='linear':
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2., Z, mu, S, target_mu, target_S)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index dda92b90..b182c1a8 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -58,7 +58,7 @@ class BGPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
-    @unittest.skip('psi2 cross terms are NotImplemented for this combination')
+    #@unittest.skip('psi2 cross terms are NotImplemented for this combination')
     def test_linear_bias_kern(self):
         N, M, Q, D = 10, 3, 2, 4
         X = np.random.rand(N, Q)
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index bb809ea6..f1762db8 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -8,7 +8,7 @@ import GPy
 class KernelTests(unittest.TestCase):
     def test_kerneltie(self):
         K = GPy.kern.rbf(5, ARD=True)
-        K.tie_param('[01]')
+        K.tie_params('[01]')
         K.constrain_fixed('2')
         X = np.random.rand(5,5)
         Y = np.ones((5,1))

From 0971a3faa4d6d88b75fdcf69f7114c0a820e0c99 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 16:50:44 +0000
Subject: [PATCH 067/105] changed the filename from BGPLVM to Bayesian_GPLVM to
 tidy the namespace a little

---
 GPy/models/{BGPLVM.py => Bayesian_GPLVM.py} | 0
 GPy/models/__init__.py                      | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename GPy/models/{BGPLVM.py => Bayesian_GPLVM.py} (100%)

diff --git a/GPy/models/BGPLVM.py b/GPy/models/Bayesian_GPLVM.py
similarity index 100%
rename from GPy/models/BGPLVM.py
rename to GPy/models/Bayesian_GPLVM.py
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index c099d0d5..22aa803c 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -10,4 +10,4 @@ from GPLVM import GPLVM
 from warped_GP import warpedGP
 from sparse_GPLVM import sparse_GPLVM
 from uncollapsed_sparse_GP import uncollapsed_sparse_GP
-from BGPLVM import Bayesian_GPLVM
+from Bayesian_GPLVM import Bayesian_GPLVM

From c39af496a633b44b2e42356a444dba81c6630b65 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 17:05:18 +0000
Subject: [PATCH 068/105] Added test generator (not quite finished yet)

---
 GPy/examples/regression.py    |  4 +--
 GPy/examples/tutorials.py     | 18 +++-------
 GPy/testing/examples_tests.py | 62 ++++++++++++++++++++++++++++-------
 3 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 6c22b68e..f5d0d3b1 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -194,7 +194,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
     # Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
     data['Y'] = data['Y'] - np.mean(data['Y'])
 
-    lls = GPy.examples.regression.contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
+    lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
     pb.contour(length_scales, log_SNRs, np.exp(lls), 20)
     ax = pb.gca()
     pb.xlabel('length scale')
@@ -229,7 +229,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
     ax.set_ylim(ylim)
     return (models, lls)
 
-def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
+def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
     """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
 
     :data_set: A data set from the utils.datasets director.
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index 9d892b8e..a199aba9 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -6,14 +6,14 @@
 Code of Tutorials
 """
 
+import pylab as pb
+pb.ion()
+import numpy as np
+import GPy
+
 def tuto_GP_regression():
     """The detailed explanations of the commands used in this file can be found in the tutorial section"""
 
-    import pylab as pb
-    pb.ion()
-    import numpy as np
-    import GPy
-
     X = np.random.uniform(-3.,3.,(20,1))
     Y = np.sin(X) + np.random.randn(20,1)*0.05
 
@@ -39,11 +39,6 @@ def tuto_GP_regression():
     #  2-dimensional example  #
     ###########################
 
-    import pylab as pb
-    pb.ion()
-    import numpy as np
-    import GPy
-
     # sample inputs and outputs
     X = np.random.uniform(-3.,3.,(50,2))
     Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
@@ -67,9 +62,6 @@ def tuto_GP_regression():
 
 def tuto_kernel_overview():
     """The detailed explanations of the commands used in this file can be found in the tutorial section"""
-    import pylab as pb
-    import numpy as np
-    import GPy
     pb.ion()
 
     ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 0e41a541..967d7a6a 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -4,23 +4,63 @@
 import unittest
 import numpy as np
 import GPy
+import inspect
+import pkgutil
+import os
+
 
 class ExamplesTests(unittest.TestCase):
-    def test_check_model_returned(self):
-        pass
+    def _checkgrad(self, model):
+        self.assertTrue(model.checkgrad())
 
-    def test_model_checkgrads(self):
-        pass
+    def _model_instance(self, model):
+        self.assertTrue(isinstance(model, GPy.models))
 
-    def test_all_examples(self):
-        examples_module = __import__("GPy").examples
-        #Load models
+"""
+def model_instance_generator(model):
+    def check_model_returned(self):
+        self._model_instance(model)
+    return check_model_returned
 
-        #Loop through models
-        #for model in models:
-            #self.assertTrue(m.checkgrad())
+def checkgrads_generator(model):
+    def model_checkgrads(self):
+        self._checkgrad(model)
+    return model_checkgrads
+"""
+def model_checkgrads(model):
+    assert model.checkgrad() is True
 
+def model_instance(model):
+    assert model.checkgrad() is True
+
+def test_models():
+    examples_path = os.path.dirname(GPy.examples.__file__)
+    #Load modules
+    for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
+        #Load examples
+        module_examples = loader.find_module(module_name).load_module(module_name)
+        functions = [ func for func in [inspect.getmembers(module_examples, predicate=inspect.isfunction)[0]] if func[0].startswith('_') is False ]
+        for example in functions:
+            print "Testing example: ", example[0]
+            #Generate model
+            model = example[1]()
+            print model
+
+            #Create tests for instance check
+            """
+            test = model_instance_generator(model)
+            test.__name__ = 'test_instance_%s' % example[0]
+            setattr(ExamplesTests, test.__name__, test)
+
+            #Create tests for checkgrads check
+            test = checkgrads_generator(model)
+            test.__name__ = 'test_checkgrads_%s' % example[0]
+            setattr(ExamplesTests, test.__name__, test)
+            """
+            model_checkgrads.description = 'test_checkgrads_%s' % example[0]
+            yield model_checkgrads, model
+            model_instance.description = 'test_checkgrads_%s' % example[0]
+            yield model_instance, model
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
-    unittest.main()

From a012cd64a59187d108dd376d91c22e75231b0857 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 17:25:08 +0000
Subject: [PATCH 069/105] Small changes

---
 GPy/kern/rbf.py      |  1 +
 doc/GPy.examples.rst | 14 +++-----------
 doc/GPy.rst          |  1 +
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 3c3d59e6..133895ff 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -55,6 +55,7 @@ class rbf(kernpart):
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
     def _get_params(self):
+        foo
         return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index ec283d21..d369de41 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -73,18 +73,10 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`tuto_GP_regression` Module
---------------------------------
+:mod:`tutorials` Module
+-----------------------
 
-.. automodule:: GPy.examples.tuto_GP_regression
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`tuto_kernel_overview` Module
-----------------------------------
-
-.. automodule:: GPy.examples.tuto_kernel_overview
+.. automodule:: GPy.examples.tutorials
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 3fd4bcfd..e56e48e1 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -20,5 +20,6 @@ Subpackages
     GPy.kern
     GPy.likelihoods
     GPy.models
+    GPy.testing
     GPy.util
 

From 5c768231eb94c538ecbe87dc70bc124ffb9fbf1e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 17:35:01 +0000
Subject: [PATCH 070/105] Add example test generation

---
 GPy/testing/examples_tests.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 967d7a6a..9636b286 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -27,19 +27,27 @@ def checkgrads_generator(model):
         self._checkgrad(model)
     return model_checkgrads
 """
+
 def model_checkgrads(model):
     assert model.checkgrad() is True
 
+
 def model_instance(model):
     assert model.checkgrad() is True
 
+
 def test_models():
     examples_path = os.path.dirname(GPy.examples.__file__)
     #Load modules
     for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
         #Load examples
         module_examples = loader.find_module(module_name).load_module(module_name)
-        functions = [ func for func in [inspect.getmembers(module_examples, predicate=inspect.isfunction)[0]] if func[0].startswith('_') is False ]
+        print "MODULE", module_examples
+        print "Before"
+        print inspect.getmembers(module_examples, predicate=inspect.isfunction)
+        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ]
+        print "After"
+        print functions
         for example in functions:
             print "Testing example: ", example[0]
             #Generate model
@@ -59,7 +67,7 @@ def test_models():
             """
             model_checkgrads.description = 'test_checkgrads_%s' % example[0]
             yield model_checkgrads, model
-            model_instance.description = 'test_checkgrads_%s' % example[0]
+            model_instance.description = 'test_instance_%s' % example[0]
             yield model_instance, model
 
 if __name__ == "__main__":

From cff1ad5db8d8c0736dab36124e31e63710c76e93 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 17:48:38 +0000
Subject: [PATCH 071/105] New rational quadratic kernel

---
 GPy/kern/__init__.py           |  2 +-
 GPy/kern/constructors.py       | 16 +++++++
 GPy/kern/rational_quadratic.py | 79 ++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/rational_quadratic.py

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 132fad41..6852384c 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index b848821b..983674b0 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -22,6 +22,7 @@ from prod import prod as prodpart
 from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
 from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
+from rational_quadratic import rational_quadratic as rational_quadraticpart
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -280,3 +281,18 @@ def coregionalise(Nout,R=1, W=None, kappa=None):
     return kern(1,[p])
 
 
+def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
+    """
+     Construct rational quadratic kernel.
+
+    :param D: the number of input dimensions
+    :type D: int (D=1 is the only value currently supported)
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param lengthscale: the lengthscale :math:`\ell`
+    :type lengthscale: float
+    :rtype: kern object
+
+    """
+    part = rational_quadraticpart(D,variance, lengthscale, power)
+    return kern(D, [part])
diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py
new file mode 100644
index 00000000..b71d1354
--- /dev/null
+++ b/GPy/kern/rational_quadratic.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import kernpart
+import numpy as np
+
+class rational_quadratic(kernpart):
+    """
+    rational quadratic kernel
+
+    .. math::
+
+       k(r) = \sigma^2 \left(1 + \frac{r^2}{2 \ell^2})^{- \alpha} \ \ \ \ \  \\text{ where  } r^2 = (x-y)^2
+
+    :param D: the number of input dimensions
+    :type D: int (D=1 is the only value currently supported)
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param lengthscale: the lengthscale :math:`\ell`
+    :type lengthscale: float
+    :rtype: kernpart object
+
+    """
+    def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
+        assert D == 1, "For this kernel we assume D=1"
+        self.D = D
+        self.Nparam = 3
+        self.name = 'rat_quad'
+        self.variance = variance
+        self.lengthscale = lengthscale
+        self.power = power
+
+    def _get_params(self):
+        return np.hstack((self.variance,self.lengthscale,self.power))
+
+    def _set_params(self,x):
+        self.variance = x[0]
+        self.lengthscale = x[1]
+        self.power = x[2]
+
+    def _get_param_names(self):
+        return ['variance','lengthscale','power']
+
+    def K(self,X,X2,target):
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+        target += self.variance*(1 + dist2/2.)**(-self.power)
+
+    def Kdiag(self,X,target):
+        target += self.variance
+
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+
+        dvar = (1 + dist2/2.)**(-self.power)
+        dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
+        dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
+
+        target[0] += np.sum(dvar*dL_dK)
+        target[1] += np.sum(dl*dL_dK)
+        target[2] += np.sum(dp*dL_dK)
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        target[0] += np.sum(dL_dKdiag)
+        # here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
+        target += np.sum(dL_dK*dX)
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        pass
+    

From e6acaf651e5798250c1f28265714e0b9edb850da Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 18:05:25 +0000
Subject: [PATCH 072/105] Should now test all (although upon error it stops
 trying to generate any more)

---
 GPy/testing/examples_tests.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 9636b286..5ae741ee 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -29,11 +29,11 @@ def checkgrads_generator(model):
 """
 
 def model_checkgrads(model):
-    assert model.checkgrad() is True
+    assert model.checkgrad()
 
 
 def model_instance(model):
-    assert model.checkgrad() is True
+    assert isinstance(model, GPy.core.model)
 
 
 def test_models():
@@ -45,7 +45,7 @@ def test_models():
         print "MODULE", module_examples
         print "Before"
         print inspect.getmembers(module_examples, predicate=inspect.isfunction)
-        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ]
+        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
         print "After"
         print functions
         for example in functions:
@@ -72,3 +72,4 @@ def test_models():
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
+    unittest.main()

From 62615f2ca891229bc9bb883bdfacecb00f17418b Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 18:09:47 +0000
Subject: [PATCH 073/105] Trying to shuffle

---
 GPy/testing/examples_tests.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 5ae741ee..07ef4177 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -7,6 +7,7 @@ import GPy
 import inspect
 import pkgutil
 import os
+import random
 
 
 class ExamplesTests(unittest.TestCase):
@@ -45,7 +46,7 @@ def test_models():
         print "MODULE", module_examples
         print "Before"
         print inspect.getmembers(module_examples, predicate=inspect.isfunction)
-        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
+        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ]
         print "After"
         print functions
         for example in functions:

From 2a1b5f94c8a0f682ee23d42b9953b1ca8a783b2a Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 18:14:23 +0000
Subject: [PATCH 074/105] Got rid of foo

---
 GPy/kern/rbf.py               | 1 -
 GPy/testing/examples_tests.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 133895ff..3c3d59e6 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -55,7 +55,6 @@ class rbf(kernpart):
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
     def _get_params(self):
-        foo
         return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 07ef4177..141d3999 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -46,7 +46,7 @@ def test_models():
         print "MODULE", module_examples
         print "Before"
         print inspect.getmembers(module_examples, predicate=inspect.isfunction)
-        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ]
+        functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
         print "After"
         print functions
         for example in functions:

From 0b1bc1961dd4e385ad292b2dccce898f802f674a Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 18:18:23 +0000
Subject: [PATCH 075/105] Example fixed

---
 GPy/examples/classification.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 77bd0b79..00842e3b 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -120,7 +120,7 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
     Y[Y == -1] = 0
 
     # Kernel object
-    kernel = GPy.kern.rbf(1)
+    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
 
     # Likelihood object
     distribution = GPy.likelihoods.likelihood_functions.probit()
@@ -129,8 +129,8 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
     Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
 
     # Model definition
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=True)
-    m.set('len',.5)
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=False)
+    m.set('len',2.)
 
     m.ensure_default_constraints()
     # Optimize

From 0c326ce497bf39790b04012bc705519683e8fddf Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 18:18:37 +0000
Subject: [PATCH 076/105] added plot_latent to GPLVM

---
 GPy/examples/dimensionality_reduction.py |  1 -
 GPy/models/GPLVM.py                      | 63 +++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 513d30d1..59f35c72 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -46,7 +46,6 @@ def GPLVM_oil_100():
     # create simple GP model
     m = GPy.models.GPLVM(data['X'], 2)
 
-
     # optimize
     m.ensure_default_constraints()
     m.optimize()
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 0274647d..d0dc766f 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -10,6 +10,7 @@ from ..core import model
 from ..util.linalg import pdinv, PCA
 from GP import GP
 from ..likelihoods import Gaussian
+from .. import util
 
 class GPLVM(GP):
     """
@@ -59,5 +60,63 @@ class GPLVM(GP):
         mu, var, upper, lower = self.predict(Xnew)
         pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
 
-    def plot_latent(self):
-        raise NotImplementedError
+    def plot_latent(self,labels=None, which_indices=None, resolution=50):
+        """
+        :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
+        :param resolution: the resolution of the grid on which to evaluate the predictive variance
+        """
+
+        if labels is None:
+            labels = np.ones(self.N)
+        if which_indices is None:
+            if self.Q==1:
+                input_1 = 0
+                input_2 = None
+            if self.Q==2:
+                input_1, input_2 = 0,1
+            else:
+                #try to find a linear of RBF kern in the kernel
+                k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
+                if (not len(k)==1) or (not k[0].ARD):
+                    raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
+                k = k[0]
+                if k.name=='rbf':
+                    input_1, input_2 = np.argsort(k.lengthscales)[:2]
+                elif k.name=='linear':
+                    input_1, input_2 = np.argsort(k.variances)[::-1][:2]
+
+        #first, plot the output variance as a function of the latent space
+        Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
+        mu, var, low, up = self.predict(Xtest)
+        pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear')
+
+
+        for i,ul in enumerate(np.unique(labels)):
+            if type(ul) is np.string_:
+                this_label = ul
+            elif type(ul) is np.int64:
+                this_label = 'class %i'%ul
+            else:
+                this_label = 'class %i'%i
+
+            index = np.nonzero(labels==ul)[0]
+            if self.Q==1:
+                x = self.X[index,input_1]
+                y = np.zeros(index.size)
+            else:
+                x = self.X[index,input_1]
+                y = self.X[index,input_2]
+            pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
+
+        pb.xlabel('latent dimension %i'%input_1)
+        pb.ylabel('latent dimension %i'%input_2)
+
+        if not np.all(labels==1.):
+            pb.legend(loc=0,numpoints=1)
+
+        pb.xlim(xmin[0],xmax[0])
+        pb.ylim(xmin[1],xmax[1])
+
+
+
+        

From 0e187b69212e2efbf06b4ee07fddb2b9e9e05de3 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 18:21:29 +0000
Subject: [PATCH 077/105] update in the rational quadratic kernel and new the
 tutorial on writting kernels

---
 GPy/kern/rational_quadratic.py    |   5 +-
 doc/GPy.examples.rst              |  68 +----------
 doc/GPy.kern.rst                  |   8 ++
 doc/GPy.models.rst                |   6 +-
 doc/GPy.rst                       |   8 --
 doc/GPy.testing.rst               |  59 ++++++++++
 doc/index.rst                     |   3 +-
 doc/tuto_creating_new_kernels.rst | 183 ++++++++++++++++++++++++++++++
 8 files changed, 263 insertions(+), 77 deletions(-)
 create mode 100644 doc/GPy.testing.rst
 create mode 100644 doc/tuto_creating_new_kernels.rst

diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py
index b71d1354..15200fd3 100644
--- a/GPy/kern/rational_quadratic.py
+++ b/GPy/kern/rational_quadratic.py
@@ -11,7 +11,7 @@ class rational_quadratic(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 \left(1 + \frac{r^2}{2 \ell^2})^{- \alpha} \ \ \ \ \  \\text{ where  } r^2 = (x-y)^2
+       k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \  \\text{ where  } r^2 = (x-y)^2
 
     :param D: the number of input dimensions
     :type D: int (D=1 is the only value currently supported)
@@ -19,6 +19,8 @@ class rational_quadratic(kernpart):
     :type variance: float
     :param lengthscale: the lengthscale :math:`\ell`
     :type lengthscale: float
+    :param power: the power :math:`\\alpha`
+    :type power: float
     :rtype: kernpart object
 
     """
@@ -76,4 +78,3 @@ class rational_quadratic(kernpart):
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
-    
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index d369de41..f17cf826 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -9,14 +9,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`BGPLVM_demo` Module
--------------------------
-
-.. automodule:: GPy.examples.BGPLVM_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`classification` Module
 ----------------------------
 
@@ -25,18 +17,18 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`oil_flow_demo` Module
----------------------------
+:mod:`dimensionality_reduction` Module
+--------------------------------------
 
-.. automodule:: GPy.examples.oil_flow_demo
+.. automodule:: GPy.examples.dimensionality_reduction
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`poisson` Module
----------------------
+:mod:`non_gaussian` Module
+--------------------------
 
-.. automodule:: GPy.examples.poisson
+.. automodule:: GPy.examples.non_gaussian
     :members:
     :undoc-members:
     :show-inheritance:
@@ -49,30 +41,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`sparse_GPLVM_demo` Module
--------------------------------
-
-.. automodule:: GPy.examples.sparse_GPLVM_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sparse_GP_regression_demo` Module
----------------------------------------
-
-.. automodule:: GPy.examples.sparse_GP_regression_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sparse_ep_fix` Module
----------------------------
-
-.. automodule:: GPy.examples.sparse_ep_fix
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`tutorials` Module
 -----------------------
 
@@ -81,27 +49,3 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`uncertain_input_GP_regression_demo` Module
-------------------------------------------------
-
-.. automodule:: GPy.examples.uncertain_input_GP_regression_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`uncollapsed_GP_demo` Module
----------------------------------
-
-.. automodule:: GPy.examples.uncollapsed_GP_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`unsupervised` Module
---------------------------
-
-.. automodule:: GPy.examples.unsupervised
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index 3ebeda40..aef712dc 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -137,6 +137,14 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`rational_quadratic` Module
+--------------------------------
+
+.. automodule:: GPy.kern.rational_quadratic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`rbf` Module
 -----------------
 
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index 8837ac4e..85bd727a 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -9,10 +9,10 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`BGPLVM` Module
---------------------
+:mod:`Bayesian_GPLVM` Module
+----------------------------
 
-.. automodule:: GPy.models.BGPLVM
+.. automodule:: GPy.models.Bayesian_GPLVM
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 242a22bc..e56e48e1 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -9,14 +9,6 @@ GPy Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`test_coreg` Module
-------------------------
-
-.. automodule:: GPy.test_coreg
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 Subpackages
 -----------
 
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
new file mode 100644
index 00000000..5b32558b
--- /dev/null
+++ b/doc/GPy.testing.rst
@@ -0,0 +1,59 @@
+testing Package
+===============
+
+:mod:`bgplvm_tests` Module
+--------------------------
+
+.. automodule:: GPy.testing.bgplvm_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`examples_tests` Module
+----------------------------
+
+.. automodule:: GPy.testing.examples_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gplvm_tests` Module
+-------------------------
+
+.. automodule:: GPy.testing.gplvm_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`kernel_tests` Module
+--------------------------
+
+.. automodule:: GPy.testing.kernel_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`prior_tests` Module
+-------------------------
+
+.. automodule:: GPy.testing.prior_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sparse_gplvm_tests` Module
+--------------------------------
+
+.. automodule:: GPy.testing.sparse_gplvm_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`unit_tests` Module
+------------------------
+
+.. automodule:: GPy.testing.unit_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/index.rst b/doc/index.rst
index 5066278f..a7b68c16 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -10,8 +10,7 @@ For a quick start, you can have a look at one of the tutorials:
 * `Basic Gaussian process regression <tuto_GP_regression.html>`_  
 * `Interacting with models <tuto_interacting_with_models.html>`_
 * `A kernel overview <tuto_kernel_overview.html>`_ 
-* Advanced GP regression (Forthcoming)
-* Writing kernels (Forthcoming)
+* `Writing new kernels <tuto_creating_new_kernels.html>`_
 
 You may also be interested by some examples in the GPy/examples folder.
 
diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst
new file mode 100644
index 00000000..672bc1e7
--- /dev/null
+++ b/doc/tuto_creating_new_kernels.rst
@@ -0,0 +1,183 @@
+********************
+Creating new kernels
+********************
+
+We will see in this tutorial how to create new kernels in GPy. We will also give details on how to implement each function of the kernel and illustrate with a running example: the rational quadratic kernel. 
+
+Structure of a kernel in GPy
+============================
+
+In GPy a kernel object is made of a list of kernpart objects, which correspond to symetric positive definite functions. More precisely, the kernel should be understood as the sum of the kernparts. In order to implement a new covariance, the following steps must be followed
+
+    1. implement the new covariance as a kernpart object
+    2. update the constructors that allow to use the kernpart as a kern object
+    3. update the __init__.py file
+
+Theses three steps are detailed below.
+
+Implementing a kernpart object
+==============================
+
+We advise the reader to start with copy-pasting an existing kernel and to modify the new file. We will now give a description of the various functions that can be found in a kernpart object.
+
+**Header**
+
+The header is similar to all kernels::
+
+    from kernpart import kernpart
+    import numpy as np
+
+    class rational_quadratic(kernpart):
+
+**__init__(self,D, param1, param2, ...)**
+
+The implementation of this function in mandatory.
+
+For all kernparts the first parameter ``D`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel.
+
+The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer).::
+
+    def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
+        assert D == 1, "For this kernel we assume D=1"
+        self.D = D
+        self.Nparam = 3
+        self.name = 'rat_quad'
+        self.variance = variance
+        self.lengthscale = lengthscale
+        self.power = power
+
+**_get_params(self)**
+
+The implementation of this function in mandatory.
+
+This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters.::
+
+    def _get_params(self):
+        return np.hstack((self.variance,self.lengthscale,self.power))
+
+**_set_params(self,x)**
+
+The implementation of this function in mandatory.
+
+The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...).::
+
+    def _set_params(self,x):
+        self.variance = x[0]
+        self.lengthscale = x[1]
+        self.power = x[2]
+
+**_get_param_names(self)**
+
+The implementation of this function in mandatory.
+
+It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names.::
+
+    def _get_param_names(self):
+        return ['variance','lengthscale','power']
+
+**K(self,X,X2,target)**
+
+The implementation of this function in mandatory.
+
+This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use.::
+
+    def K(self,X,X2,target):
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+        target += self.variance*(1 + dist2/2.)**(-self.power)
+
+**Kdiag(self,X,target)**
+
+The implementation of this function in mandatory.
+
+This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`.::
+
+    def Kdiag(self,X,target):
+        target += self.variance    
+
+
+**dK_dtheta(self,dL_dK,X,X2,target)**
+
+This function is required for the optimization of the parameters.
+
+Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second.::
+
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+
+        dvar = (1 + dist2/2.)**(-self.power)
+        dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
+        dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
+
+        target[0] += np.sum(dvar*dL_dK)
+        target[1] += np.sum(dl*dL_dK)
+        target[2] += np.sum(dp*dL_dK)
+
+
+**dKdiag_dtheta(self,dL_dKdiag,X,target)**
+
+This function is required for BGPLVM, sparse models and uncertain inputs.
+
+As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element.::
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        target[0] += np.sum(dL_dKdiag)
+        # here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
+
+**dK_dX(self,dL_dK,X,X2,target)**
+
+This function is required for GPLVM, BGPLVM, sparse models and uncertain inputs.
+
+Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array.::
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        if X2 is None: X2 = X
+        dist2 = np.square((X-X2.T)/self.lengthscale)
+
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
+        target += np.sum(dL_dK*dX)
+
+**dKdiag_dX(self,dL_dKdiag,X,target)**
+
+This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target.::
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        pass
+
+**Psi statistics**
+
+The psi statistics and their derivatives are required for BGPLVM and GPS with uncertain inputs.
+
+The expressions of the psi statistics are:
+
+TODO
+
+For the rational quadratic we have:
+
+TODO
+
+Update the constructor 
+======================
+
+Once the required functions have been implemented as a kernpart object, the file GPy/kern/constructors.py has to be updated to allow to build a kernel based on the kernpart object.
+
+The following line should be added in the preamble of the file::
+
+    from rational_quadratic import rational_quadratic as rational_quadratic_part
+
+as well as the following block::
+
+    def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
+        part = rational_quadraticpart(D,variance, lengthscale, power)
+        return kern(D, [part])
+
+
+Update initialization
+=====================
+
+The last step is to update the list of kernels imported from constructor in GPy/kern/__init__.py.
+
+
+

From b06b4ea8f1c8793540bec7d68340310489afdadd Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 11 Mar 2013 18:25:11 +0000
Subject: [PATCH 078/105] Fixed checkgrad test to randomize before checking

---
 GPy/testing/examples_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index 141d3999..feba2b50 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -30,6 +30,7 @@ def checkgrads_generator(model):
 """
 
 def model_checkgrads(model):
+    model.randomize()
     assert model.checkgrad()
 
 

From b336d914739f8965aeec322691d0b1638313e400 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 18:43:59 +0000
Subject: [PATCH 079/105] fixed bug in RBF, added inducing inputs to BGPLVM
 plots

---
 GPy/kern/rbf.py              | 1 -
 GPy/models/Bayesian_GPLVM.py | 4 ++++
 GPy/models/GPLVM.py          | 4 +---
 GPy/models/sparse_GPLVM.py   | 4 ++++
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 133895ff..3c3d59e6 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -55,7 +55,6 @@ class rbf(kernpart):
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
     def _get_params(self):
-        foo
         return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 0eb957a9..430c2718 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -83,3 +83,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
     def _log_likelihood_gradients(self):
         return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
+
+    def plot_latent(self, *args, **kwargs):
+        input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
+        pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index d0dc766f..b44801fc 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -117,6 +117,4 @@ class GPLVM(GP):
         pb.xlim(xmin[0],xmax[0])
         pb.ylim(xmin[1],xmax[1])
 
-
-
-        
+        return input_1, input_2
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index 542fbe0e..591c49b2 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -55,3 +55,7 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
         #passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
         mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
         pb.plot(mu[:, 0] , mu[:, 1], 'ko')
+
+    def plot_latent(self, *args, **kwargs):
+        input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
+        pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

From 3dd62c8251740440c037aead3b4bd2f855f9805b Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 18:45:04 +0000
Subject: [PATCH 080/105] Few bugs fixed in the documentation

---
 GPy/kern/rbf.py                   |  3 +--
 doc/tuto_creating_new_kernels.rst | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 133895ff..ae587202 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -12,7 +12,7 @@ class rbf(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
+       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
 
     where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
 
@@ -55,7 +55,6 @@ class rbf(kernpart):
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
     def _get_params(self):
-        foo
         return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst
index 672bc1e7..8ebf8b8f 100644
--- a/doc/tuto_creating_new_kernels.rst
+++ b/doc/tuto_creating_new_kernels.rst
@@ -22,7 +22,7 @@ We advise the reader to start with copy-pasting an existing kernel and to modify
 
 **Header**
 
-The header is similar to all kernels::
+The header is similar to all kernels: ::
 
     from kernpart import kernpart
     import numpy as np
@@ -35,7 +35,7 @@ The implementation of this function in mandatory.
 
 For all kernparts the first parameter ``D`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel.
 
-The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer).::
+The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer). ::
 
     def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
         assert D == 1, "For this kernel we assume D=1"
@@ -50,7 +50,7 @@ The following attributes are compulsory: ``self.D`` (the dimension, integer), ``
 
 The implementation of this function in mandatory.
 
-This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters.::
+This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters. ::
 
     def _get_params(self):
         return np.hstack((self.variance,self.lengthscale,self.power))
@@ -59,7 +59,7 @@ This function returns a one dimensional array of length ``self.Nparam`` containi
 
 The implementation of this function in mandatory.
 
-The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...).::
+The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...). ::
 
     def _set_params(self,x):
         self.variance = x[0]
@@ -70,7 +70,7 @@ The input is a one dimensional array of length ``self.Nparam`` containing the va
 
 The implementation of this function in mandatory.
 
-It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names.::
+It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names. ::
 
     def _get_param_names(self):
         return ['variance','lengthscale','power']
@@ -79,7 +79,7 @@ It returns a list of strings of length ``self.Nparam`` corresponding to the para
 
 The implementation of this function in mandatory.
 
-This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use.::
+This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use. ::
 
     def K(self,X,X2,target):
         if X2 is None: X2 = X
@@ -90,7 +90,7 @@ This function is used to compute the covariance matrix associated with the input
 
 The implementation of this function in mandatory.
 
-This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`.::
+This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`. ::
 
     def Kdiag(self,X,target):
         target += self.variance    
@@ -100,7 +100,7 @@ This function is similar to ``K`` but it computes only the values of the kernel
 
 This function is required for the optimization of the parameters.
 
-Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second.::
+Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second. ::
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         if X2 is None: X2 = X
@@ -119,7 +119,7 @@ Computes the derivative of the likelihood. As previously, the values are added t
 
 This function is required for BGPLVM, sparse models and uncertain inputs.
 
-As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element.::
+As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element. ::
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         target[0] += np.sum(dL_dKdiag)
@@ -129,7 +129,7 @@ As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag}
 
 This function is required for GPLVM, BGPLVM, sparse models and uncertain inputs.
 
-Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array.::
+Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array. ::
 
     def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
@@ -141,7 +141,7 @@ Computes the derivative of the likelihood with respect to the inputs ``X`` (a :m
 
 **dKdiag_dX(self,dL_dKdiag,X,target)**
 
-This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target.::
+This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target. ::
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
@@ -167,7 +167,7 @@ The following line should be added in the preamble of the file::
 
     from rational_quadratic import rational_quadratic as rational_quadratic_part
 
-as well as the following block::
+as well as the following block ::
 
     def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
         part = rational_quadraticpart(D,variance, lengthscale, power)

From cb082898d335aea8be110e615a9666065669da4e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 18:56:37 +0000
Subject: [PATCH 081/105] added trace_sum for efficiency

---
 GPy/models/sparse_GP.py | 6 +++---
 GPy/util/linalg.py      | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f70938c9..4846bf8a 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
@@ -107,7 +107,7 @@ class sparse_GP(GP):
         self.C = np.dot(tmp,tmp.T)
         #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
         #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
-        tmp = np.dot(self.C,self.psi1V/sf)
+        tmp = np.dot(self.C/sf,self.psi1V)
         self.E = np.dot(tmp,tmp.T)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
@@ -156,7 +156,7 @@ class sparse_GP(GP):
             beta = self.likelihood.precision
             dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
             dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
-            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
+            dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta
             dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
             self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
 
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 26105789..cf023284 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -14,6 +14,13 @@ import types
 #import scipy.lib.lapack.flapack
 import scipy as sp
 
+def trace_dot(a,b):
+    """
+    efficiently compute the trace of the matrix product of a and b
+    """
+    assert a.shape==b.T.shape
+    return np.dot(a.flatten(),b.T.flatten())
+
 def mdot(*args):
    """Multiply all the arguments using matrix product rules.
    The output is equivalent to multiplying the arguments one by one

From 6febbed36fe2ed6043c2d37cda36304e36cc83e6 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 19:13:19 +0000
Subject: [PATCH 082/105] tie_param changed to tie_params in tutorials

---
 doc/tuto_kernel_overview.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index dfb7fb3f..da19803b 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -133,7 +133,7 @@ Various constrains can be applied to the parameters of a kernel
     * ``constrain_fixed`` to fix the value of a parameter (the value will not be modified during optimisation)
     * ``constrain_positive`` to make sure the parameter is greater than 0.
     * ``constrain_bounded`` to impose the parameter to be in a given range.
-    * ``tie_param`` to impose the value of two (or more) parameters to be equal.
+    * ``tie_params`` to impose the value of two (or more) parameters to be equal.
 
 When calling one of these functions, the parameters to constrain can either by specified by a regular expression that matches its name or by a number that corresponds to the rank of the parameter. Here is an example ::
 
@@ -146,7 +146,7 @@ When calling one of these functions, the parameters to constrain can either by s
 
     k.constrain_positive('var')
     k.constrain_fixed(np.array([1]),1.75)
-    k.tie_param('len')
+    k.tie_params('len')
     k.unconstrain('white')
     k.constrain_bounded('white',lower=1e-5,upper=.5)
     print k

From 6c69fec8ea4266503d1789ea19dd86ff078dd852 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Mon, 11 Mar 2013 19:18:36 +0000
Subject: [PATCH 083/105] Tutorial finished

---
 GPy/kern/rbf.py                      |   1 -
 doc/GPy.examples.rst                 |  68 +--------
 doc/GPy.kern.rst                     |   8 ++
 doc/GPy.models.rst                   |   6 +-
 doc/GPy.rst                          |   8 --
 doc/tuto_interacting_with_models.rst | 197 ++++++++++++++++++++++++---
 6 files changed, 194 insertions(+), 94 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 133895ff..3c3d59e6 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -55,7 +55,6 @@ class rbf(kernpart):
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
     def _get_params(self):
-        foo
         return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index d369de41..f17cf826 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -9,14 +9,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`BGPLVM_demo` Module
--------------------------
-
-.. automodule:: GPy.examples.BGPLVM_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`classification` Module
 ----------------------------
 
@@ -25,18 +17,18 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`oil_flow_demo` Module
----------------------------
+:mod:`dimensionality_reduction` Module
+--------------------------------------
 
-.. automodule:: GPy.examples.oil_flow_demo
+.. automodule:: GPy.examples.dimensionality_reduction
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`poisson` Module
----------------------
+:mod:`non_gaussian` Module
+--------------------------
 
-.. automodule:: GPy.examples.poisson
+.. automodule:: GPy.examples.non_gaussian
     :members:
     :undoc-members:
     :show-inheritance:
@@ -49,30 +41,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`sparse_GPLVM_demo` Module
--------------------------------
-
-.. automodule:: GPy.examples.sparse_GPLVM_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sparse_GP_regression_demo` Module
----------------------------------------
-
-.. automodule:: GPy.examples.sparse_GP_regression_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sparse_ep_fix` Module
----------------------------
-
-.. automodule:: GPy.examples.sparse_ep_fix
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`tutorials` Module
 -----------------------
 
@@ -81,27 +49,3 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`uncertain_input_GP_regression_demo` Module
-------------------------------------------------
-
-.. automodule:: GPy.examples.uncertain_input_GP_regression_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`uncollapsed_GP_demo` Module
----------------------------------
-
-.. automodule:: GPy.examples.uncollapsed_GP_demo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`unsupervised` Module
---------------------------
-
-.. automodule:: GPy.examples.unsupervised
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index 3ebeda40..aef712dc 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -137,6 +137,14 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`rational_quadratic` Module
+--------------------------------
+
+.. automodule:: GPy.kern.rational_quadratic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`rbf` Module
 -----------------
 
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index 8837ac4e..85bd727a 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -9,10 +9,10 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`BGPLVM` Module
---------------------
+:mod:`Bayesian_GPLVM` Module
+----------------------------
 
-.. automodule:: GPy.models.BGPLVM
+.. automodule:: GPy.models.Bayesian_GPLVM
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 242a22bc..e56e48e1 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -9,14 +9,6 @@ GPy Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`test_coreg` Module
-------------------------
-
-.. automodule:: GPy.test_coreg
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 Subpackages
 -----------
 
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 370ffd95..3031a5e1 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -2,52 +2,212 @@
 Interacting with models
 *************************************
 
-The GPy model class has a set of features which are designed to make it simple to explore the parameter space of the model. By default, the scipy optimisers are used to fit GPy models (via model.optimize()), for which we provide mechanisms for 'free' optimisation: GPy can ensure that naturally positive parameters (such as variances) remain positive. But these mechanisms are much more powerful than simple reparameterisation, as we shall see. 
+The GPy model class has a set of features which are 
+designed to make it simple to explore the parameter 
+space of the model. By default, the scipy optimisers 
+are used to fit GPy models (via model.optimize()), 
+for which we provide mechanisms for 'free' optimisation: 
+GPy can ensure that naturally positive parameters 
+(such as variances) remain positive. But these mechanisms 
+are much more powerful than simple reparameterisation, 
+as we shall see. 
 
-All of the examples included in GPy return an instance of a model class. We'll use GPy.examples.?? as an example::
+Along this tutorial we'll use a sparse GP regression model 
+as example. This example can be in ``GPy.examples.regression``.  
+All of the examples included in GPy return an instance 
+of a model class, and therefore they can be called in 
+the following way: ::
 
     import pylab as pb
     pb.ion()
     import GPy
-    m = GPy.examples.??
+    m = GPy.examples.regression.sparse_GP_regression_1D()
 
 Examining the model using print
 ===============================
-To see the current state of the model parameters, and the model's (marginal) likelihood just print the model::
+To see the current state of the model parameters, 
+and the model's (marginal) likelihood just print the model ::
+
     print m
 
-?? output
+The first thing displayed on the screen is the log-likelihood 
+value of the model with its current parameters. Below the 
+log-likelihood, a table with all the model's parameters 
+is shown. For each parameter, the table contains the name 
+of the parameter, the current value, and in case there are 
+defined: constraints, ties and prior distrbutions associated. ::
 
-Getting the model's likelihood and gradients
-===========================================
-foobar
+	Log-likelihood: 6.309e+02
+
+	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
+	------------------------------------------------------------------
+	    iip_0_0      |  -1.4671  |               |        |         
+	    iip_1_0      |  2.6378   |               |        |         
+	    iip_2_0      |  -0.0396  |               |        |         
+	    iip_3_0      |  -2.6372  |               |        |         
+	    iip_4_0      |  1.4704   |               |        |         
+	 rbf_variance    |  1.5672   |     (+ve)     |        |         
+	rbf_lengthscale  |  2.5625   |     (+ve)     |        |         
+	white_variance   |  0.0000   |     (+ve)     |        |         
+	noise_variance   |  0.0022   |     (+ve)     |        |         
+
+In this case the kernel parameters (``rbf_variance``, 
+``rbf_lengthscale`` and ``white_variance``) as well as 
+the noise parameter (``noise_variance``), are constrained 
+to be positive, while the inducing inputs have not 
+constraints associated. Also there are no ties or prior defined.
 
 Setting and fetching parameters by name
 =======================================
-foobar
+Another way to interact with the model's parameters is through
+the functions ``_get_param_names()``, ``_get_params()`` and 
+``_set_params()``.
+
+``_get_param_names()`` returns a list of the parameters names ::
+
+	['iip_0_0',
+	 'iip_1_0',
+	 'iip_2_0',
+	 'iip_3_0',
+	 'iip_4_0',
+	 'rbf_variance',
+	 'rbf_lengthscale',
+	 'white_variance',
+	 'noise_variance']
+
+``_get_params()`` returns an array of the parameters values ::
+
+	array([ -1.46705227e+00,   2.63782176e+00,  -3.96422982e-02,
+		-2.63715255e+00,   1.47038653e+00,   1.56724596e+00,
+		 2.56248679e+00,   2.20963633e-10,   2.18379922e-03])
+
+``_set_params()`` takes an array as input and substitutes 
+the current values of the parameters for those of the array. For example,
+we can define a new array of values and change the parameters as follows: ::
+
+	new_params = np.array([1.,2.,3.,4.,1.,1.,1.,1.,1.])
+	m._set_params(new_params)
+
+If we call the function ``_get_params()`` again, we will obtain the new
+parameters we have just set.
+
+Parameters can be also set by name using the function ``_set()``. For example,
+lets change the lengthscale to .5: ::
+
+	m.set('rbf_lengthscale',.5)
+
+``_set()`` function accepts regular expression as it first
+input, and therefore all parameters matching that regular 
+expression are set to the given value. In this case rather 
+than passing as second output a single value, we can also 
+use a list of arrays. For example, lets change the inducing 
+inputs: ::
+
+	m.set('iip',np.arange(-4,0))
+
+Getting the model's likelihood and gradients
+===========================================
+Appart form the printing the model,  the marginal 
+log-likelihood can be obtained by using the function
+``log_likelihood()``. Also, the log-likelihood gradients
+wrt. each parameter can be obtained with the funcion
+``_log_likelihood_gradients()``. ::
+
+    m.log_likelihood()
+    -791.15371409346153
+
+    m._log_likelihood_gradients()
+    array([  7.08278455e-03,   1.37118783e+01,   2.66948031e+00,
+             3.50184014e+00,   7.08278455e-03,  -1.43501702e+02,
+	     6.10662266e+01,  -2.18472649e+02,   2.14663691e+02])
+
+Removing the model's constraints
+================================
+When we initially call the example, it was optimized and hence the
+log-likelihood gradients were close to zero. However, since
+we have been changing the parameters, the gradients are far from zero now.
+Next we are going to show how to optimize the model setting different 
+restrictions on the parameters. 
+
+Once a constrain has been set on a parameter, it is not possible to
+define a new constraint for it unless we explicitly remove the previous
+one. The command to remove the constraints is ``unconstrain()``, and
+just as the ``set()`` command, it also accepts regular expression.
+In this case we will remove all the constraints: ::
+
+	m.unconstrain('')
 
 Constraining and optimising the model
 =====================================
-A simple task in GPy is to ensure that the models' variances remain positive during optimisation. the models class has a function called constrain_positive(), which accepts a regex string as above. To constrain the models' variance to be positive::
-    m.constrain_positive('variance')
-    print m
+A requisite needed for some parameters, such as variances,
+is to be positive. This is constraint is easily set 
+with the function ``constrain_positive()``. Regular expressions
+are also accepted. ::
 
-Now we see that the variance of the model is constrained to be postive. GPy handles the effective change of gradients: see how m.objective_gradients has changed approriately
+    m.constrain_positive('var')
 
+For convenience, GPy also provides a catch all function 
+which ensures that anything which appears to require 
+positivity is constrianed appropriately::
 
-For convenience, we also provide a catch all function which ensures that anything which appears to require positivity is constrianed appropriately::
     m.ensure_default_constraints()
 
-
 Fixing parameters
 =================
+Parameters values can be fixed using ``constrain_fixed()``. 
+For example we can define the first inducing input to be 
+fixed on zero: ::
 
+    m.constrain_fixed('iip_0',0)
+	
+Bounding parameters
+===================
+Defining bounding constraints is an easily task in GPy too,
+it only requires to use the function ``constrain_bounded()``.
+For example, lets bound inducing inputs 2 and 3 to have
+values between -4 and -1: ::
+
+    m.constrain_bounded('iip_(1|2)',-4,-1)
 
 Tying Parameters
 ================
+The values of two or more parameters can be tied together,
+so that they share the same value during optimization.
+The function to do so is ``tie_params()``. For the example
+we are using, it doesn't make sense to tie parameters together,
+however for the sake of the example we will tie the white noise
+and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
+for a proper use of the tying capabilities.::
 
-Bounding parameters
-===================
+    m.tie_params('e_var')
+
+Optimizing the model
+====================
+Once we have finished defining the constraints, 
+we can now optimize the model with the function
+``optimize``.::
+
+    m.optimize()
+
+We can print again the model and check the new results.
+The table now shows that ``iip_0_0`` is fixed, ``iip_1_0`` 
+and ``iip_2_0`` are bounded and the kernel parameters are constrained to
+be positive. In addition the table now indicates that
+white_variance and noise_variance are tied together.::
+
+	Log-likelihood: 9.967e+01
+
+  	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
+	------------------------------------------------------------------
+	    iip_0_0      |  0.0000   |     Fixed     |        |         
+	    iip_1_0      |  -2.8834  |   (-4, -1)    |        |         
+	    iip_2_0      |  -1.9152  |   (-4, -1)    |        |         
+	    iip_3_0      |  1.5034   |               |        |         
+	    iip_4_0      |  -1.0162  |               |        |         
+	 rbf_variance    |  0.0158   |     (+ve)     |        |         
+	rbf_lengthscale  |  0.9760   |     (+ve)     |        |         
+	white_variance   |  0.0049   |     (+ve)     |  (0)   |         
+	noise_variance   |  0.0049   |     (+ve)     |  (0)   |         
 
 
 Further Reading
@@ -55,6 +215,3 @@ Further Reading
 All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models. 
 
 By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
-
-
-

From 4525ddd75a66b99ea1514ff3266e75ca985da135 Mon Sep 17 00:00:00 2001
From: andreas <andreas@Hulk.(none)>
Date: Mon, 11 Mar 2013 19:19:36 +0000
Subject: [PATCH 084/105] fixed plots for BGPLVM

---
 GPy/models/Bayesian_GPLVM.py | 4 ++--
 GPy/models/GPLVM.py          | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index 430c2718..a18ec9bb 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -85,5 +85,5 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
 
     def plot_latent(self, *args, **kwargs):
-        input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
-        pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')
+        input_1, input_2 = GPLVM.plot_latent(self, *args, **kwargs)
+        pb.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index b44801fc..5be54049 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -81,13 +81,16 @@ class GPLVM(GP):
                     raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
                 k = k[0]
                 if k.name=='rbf':
-                    input_1, input_2 = np.argsort(k.lengthscales)[:2]
+                    input_1, input_2 = np.argsort(k.lengthscale)[:2]
                 elif k.name=='linear':
                     input_1, input_2 = np.argsort(k.variances)[::-1][:2]
 
         #first, plot the output variance as a function of the latent space
         Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
-        mu, var, low, up = self.predict(Xtest)
+	Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
+	Xtest_full[:, :2] = Xtest        
+	mu, var, low, up = self.predict(Xtest_full)
+	var = var[:, :2]
         pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear')
 
 

From dc19f44a7f773546d947040c841520f5b9a682f5 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 11 Mar 2013 19:37:56 +0000
Subject: [PATCH 085/105] deactivated test_models()

---
 GPy/examples/classification.py | 1 -
 GPy/testing/examples_tests.py  | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index c6b7f0ac..5df019e4 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -10,7 +10,6 @@ import numpy as np
 import GPy
 
 default_seed=10000
-
 def crescent_data(seed=default_seed): #FIXME
     """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index feba2b50..a06f1090 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -8,7 +8,7 @@ import inspect
 import pkgutil
 import os
 import random
-
+from nose.tools import nottest
 
 class ExamplesTests(unittest.TestCase):
     def _checkgrad(self, model):
@@ -37,7 +37,7 @@ def model_checkgrads(model):
 def model_instance(model):
     assert isinstance(model, GPy.core.model)
 
-
+@nottest
 def test_models():
     examples_path = os.path.dirname(GPy.examples.__file__)
     #Load modules
@@ -51,6 +51,10 @@ def test_models():
         print "After"
         print functions
         for example in functions:
+            if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']:
+                print "SKIPPING"
+                continue
+
             print "Testing example: ", example[0]
             #Generate model
             model = example[1]()

From d3c87feffadbbc281f16cb470b0c5cae6731e923 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 19:43:46 +0000
Subject: [PATCH 086/105] some messing with the linear algebra in sparse_GP.
 This should be more efficient... let's hope nothing breaks

---
 GPy/models/sparse_GP.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 4846bf8a..f5279f86 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -102,17 +102,16 @@ class sparse_GP(GP):
         self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
 
         self.psi1V = np.dot(self.psi1, self.V)
-        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
         tmp = np.dot(self.Lmi.T, self.LBi.T)
         self.C = np.dot(tmp,tmp.T)
-        #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
-        #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
-        tmp = np.dot(self.C/sf,self.psi1V)
-        self.E = np.dot(tmp,tmp.T)
+        self.Cpsi1V = np.dot(self.C,self.psi1V)
+        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
+        self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
-        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
+        #self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
+        self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
         if self.likelihood.is_heteroscedastic:
             if self.has_uncertain_inputs:
                 self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
@@ -139,7 +138,7 @@ class sparse_GP(GP):
         # Compute dL_dKmm
         self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
         self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
-        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
+        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:
@@ -157,7 +156,7 @@ class sparse_GP(GP):
             dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
             dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
             dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta
-            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
+            dbeta += np.trace(self.Cpsi1VVpsi1)/beta - 0.5 * trace_dot(np.dot(self.C,self.psi2_beta_scaled) , self.Cpsi1VVpsi1 )/beta
             self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
 
 
@@ -198,7 +197,7 @@ class sparse_GP(GP):
             A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
             B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
         C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
-        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
+        D = 0.5*np.trace(self.Cpsi1VVpsi1)
         return A+B+C+D
 
     def _log_likelihood_gradients(self):

From 7e4b460cdbd4e37597efc9aa6403ed084b09e020 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 09:18:15 +0000
Subject: [PATCH 087/105] more messing with the linear algebra in sparse_GP

---
 GPy/models/sparse_GP.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f5279f86..be451c12 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -137,8 +137,9 @@ class sparse_GP(GP):
 
         # Compute dL_dKmm
         self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
-        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
-        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
+        #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
+        #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From 77df63952ff15f9a88ad43637264e5da947d86d7 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 09:36:11 +0000
Subject: [PATCH 088/105] updated list of implemented kernels in the
 documentation

---
 doc/kernel_implementation.rst | 60 ++++++++++++++++++-----------------
 doc/tuto_kernel_overview.rst  |  2 +-
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 99ee006b..521087ba 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -5,35 +5,37 @@ List of implemented kernels
 
 The following table shows the implemented kernels in GPy and gives the details of the implemented function for each kernel.
 
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-NAME                  get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-bias                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Brownian              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                                                
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-exponential           |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-finite_dimensional    |tick|       |tick|  |tick|  |tick|      |tick| 
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-linear                |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Matern32              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|        
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Matern52              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_exponential  |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern32     |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern52     |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-rbf                   |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-spline                |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-white                 |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+NAME                  Dimension   ARD   get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+bias                 n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Brownian             1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+exponential          n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+finite_dimensional   n           no     |tick|       |tick|  |tick|  |tick|      |tick| 
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+linear               n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern32             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|        
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern52             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_exponential 1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern32    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern52    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+rational quadratic   1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+rbf                  n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+spline               n           no     |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+white                n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
 Depending on the use, all functions may not be required
 
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index da19803b..27f895ba 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -39,7 +39,7 @@ return::
 Implemented kernels
 ===================
 
-Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ and the following figure gives a summary of most of them:
+Many kernels are already implemented in GPy. The following figure gives a summary of most of them (a comprehensive list can be list can be found `here <kernel_implementation.html>`_):
 
 .. figure::  Figures/tuto_kern_overview_allkern.png
     :align:  center

From f14302e8d0aa92bd61e8b2ad62db52b34e79c14e Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 09:41:27 +0000
Subject: [PATCH 089/105] updated list of implemented kernels in the
 documentation

---
 doc/kernel_implementation.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 521087ba..c566d21d 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -8,13 +8,13 @@ The following table shows the implemented kernels in GPy and gives the details o
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 NAME                  Dimension   ARD   get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-bias                 n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+bias                 n                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Brownian             1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
+Brownian             1                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 exponential          n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-finite_dimensional   n           no     |tick|       |tick|  |tick|  |tick|      |tick| 
+finite_dimensional   n                  |tick|       |tick|  |tick|  |tick|      |tick| 
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 linear               n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
@@ -22,19 +22,19 @@ Matern32             n           yes    |tick|       |tick|  |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 Matern52             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_exponential 1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_exponential 1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern32    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_Matern32    1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern52    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_Matern52    1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-rational quadratic   1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
+rational quadratic   1                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 rbf                  n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-spline               n           no     |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
+spline               1                  |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-white                n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+white                n                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
 Depending on the use, all functions may not be required

From bddeb998bf691e51514ba845cbecfa10484ff0fe Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 10:04:02 +0000
Subject: [PATCH 090/105] typo in comments

---
 GPy/models/sparse_GP.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index be451c12..a348c9f4 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -80,7 +80,7 @@ class sparse_GP(GP):
 
         #The rather complex computations of psi2_beta_scaled
         if self.likelihood.is_heteroscedastic:
-            assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
+            assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
             if self.has_uncertain_inputs:
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
             else:

From c3dae7b25255bd2168063226b0d97a2e6ce649f3 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 10:43:46 +0000
Subject: [PATCH 091/105] Fixed bug in dK_dX for the quadratic kernel

---
 GPy/kern/rational_quadratic.py    | 4 ++--
 doc/tuto_creating_new_kernels.rst | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py
index 15200fd3..561ea065 100644
--- a/GPy/kern/rational_quadratic.py
+++ b/GPy/kern/rational_quadratic.py
@@ -73,8 +73,8 @@ class rational_quadratic(kernpart):
         if X2 is None: X2 = X
         dist2 = np.square((X-X2.T)/self.lengthscale)
 
-        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
-        target += np.sum(dL_dK*dX)
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.lengthscale)**(-self.power-1)
+        target += np.sum(dL_dK*dX,1)[:,np.newaxis]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst
index 8ebf8b8f..24003ba2 100644
--- a/doc/tuto_creating_new_kernels.rst
+++ b/doc/tuto_creating_new_kernels.rst
@@ -136,8 +136,8 @@ Computes the derivative of the likelihood with respect to the inputs ``X`` (a :m
         if X2 is None: X2 = X
         dist2 = np.square((X-X2.T)/self.lengthscale)
 
-        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
-        target += np.sum(dL_dK*dX)
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.lengthscale)**(-self.power-1)
+        target += np.sum(dL_dK*dX,1)[:,np.newaxis]
 
 **dKdiag_dX(self,dL_dKdiag,X,target)**
 

From 09dd452b54c30f9ba113fe7709c886a6a0d66128 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 11:18:25 +0000
Subject: [PATCH 092/105] added SCG code

---
 GPy/inference/SCG.py | 142 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 GPy/inference/SCG.py

diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py
new file mode 100644
index 00000000..cbaee3e7
--- /dev/null
+++ b/GPy/inference/SCG.py
@@ -0,0 +1,142 @@
+#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
+
+#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
+
+#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+#      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+#      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+#      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+#      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+#      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+#      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+#      HOWEVER CAUSED AND ON ANY THEORY OF
+#      LIABILITY, WHETHER IN CONTRACT, STRICT
+#      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#      POSSIBILITY OF SUCH DAMAGE.
+
+
+import numpy as np
+
+def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-6):
+    """
+    Optimisation through Scaled Conjugate Gradients (SCG)
+
+    f: the objective function
+    gradf : the gradient function (should return a 1D np.ndarray)
+    x : the initial condition
+
+    Returns
+    x the optimal value for x
+    flog : a list of all the objective values
+    pointlog : a list of the x values tried
+    scalelog : a list of the scales used in optimisation (beta)
+
+    """
+
+    sigma0 = 1.0e-4
+    fold = f(x, *optargs)	# Initial function value.
+    fnow = fold
+    gradnew = gradf(x, *optargs)	# Initial gradient.
+    gradold = gradnew.copy()
+    d = -gradnew				# Initial search direction.
+    success = True				# Force calculation of directional derivs.
+    nsuccess = 0				# nsuccess counts number of successes.
+    beta = 1.0				# Initial scale parameter.
+    betamin = 1.0e-15 			# Lower bound on scale.
+    betamax = 1.0e100			# Upper bound on scale.
+
+    flog = [fold]
+    pointlog = [x.copy()]
+    scalelog = [beta]
+
+    iteration = 0
+
+    # Main optimization loop.
+    while iteration < maxiters:
+
+        # Calculate first and second directional derivatives.
+        if success:
+            mu = np.dot(d, gradnew)
+            if mu >= 0:
+                d = -gradnew
+                mu = np.dot(d, gradnew)
+            kappa = np.dot(d, d)
+            #if kappa < eps():
+                #return x, flog, pointlog, scalelog
+            sigma = sigma0/np.sqrt(kappa)
+            xplus = x + sigma*d
+            gplus = gradf(xplus, *optargs)
+            theta = np.dot(d, (gplus - gradnew))/sigma
+
+        # Increase effective curvature and evaluate step size alpha.
+        delta = theta + beta*kappa
+        if delta <= 0:
+            delta = beta*kappa
+            beta = beta - theta/kappa
+
+        alpha = - mu/delta
+
+        # Calculate the comparison ratio.
+        xnew = x + alpha*d
+        fnew = f(xnew, *optargs)
+        Delta = 2.*(fnew - fold)/(alpha*mu)
+        if Delta  >= 0.:
+            success = True
+            nsuccess += 1
+            x = xnew
+            fnow = fnew
+        else:
+            success = False
+            fnow = fold
+
+        # Store relevant variables
+        flog.append(fnow)		# Current function value
+        pointlog.append(x)           # Current position
+        scalelog.append(beta)    # current scale parameter
+
+        iteration += 1
+        if display:
+            print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta
+
+        if success:
+            # Test for termination
+            if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol:
+                return x, flog, pointlog, scalelog
+
+            else:
+                # Update variables for new position
+                fold = fnew
+                gradold = gradnew
+                gradnew = gradf(x, *optargs)
+                # If the gradient is zero then we are done.
+                if np.dot(gradnew,gradnew) == 0:
+                    return x, flog, pointlog, scalelog
+
+        # Adjust beta according to comparison ratio.
+        if Delta < 0.25:
+            beta = min(4.0*beta, betamax)
+        if Delta > 0.75:
+            beta = max(0.5*beta, betamin)
+
+        # Update search direction using Polak-Ribiere formula, or re-start
+        # in direction of negative gradient after nparams steps.
+        if nsuccess == x.size:
+            d = -gradnew
+            nsuccess = 0
+        elif success:
+            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
+            d = gamma*d - gradnew
+
+    # If we get here, then we haven't terminated in the given number of
+    # iterations.
+    if display:
+        print "maxiter exceeded"
+
+    return x, flog, pointlog, scalelog

From 7cb20798941b0d39d94637120ced0b3ad958e5b3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 11:44:28 +0000
Subject: [PATCH 093/105] non working integratino of SCG into GPy

---
 GPy/inference/optimization.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 94a5bcef..2e29fe6a 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -196,6 +196,16 @@ class opt_rasm(Optimizer):
 
         self.trace = opt_result[1]
 
+class opt_scg(Optimizer):
+    def __init__(self, *args, **kwargs):
+        Optimizer.__init__(self, *args, **kwargs)
+        self.opt_name = "Scaled Conjugate Gradients"
+
+    def opt(self, f_fp = None, f = None, fp = None):
+        assert not f is None
+        assert not fp is None
+        opt_result = SCG (f,fp,self.x_init, display=self.messages,
+
 def get_optimizer(f_min):
     # import rasmussens_minimize as rasm
     from SGD import opt_SGD

From 0503d0e3f5dd6a05540822083f3dfe04b2db0200 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 12:20:46 +0000
Subject: [PATCH 094/105] fixed trace_dot to be a litle faster...

---
 GPy/inference/optimization.py | 2 +-
 GPy/util/linalg.py            | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 2e29fe6a..0a72aa08 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -204,7 +204,7 @@ class opt_scg(Optimizer):
     def opt(self, f_fp = None, f = None, fp = None):
         assert not f is None
         assert not fp is None
-        opt_result = SCG (f,fp,self.x_init, display=self.messages,
+        opt_result = SCG (f,fp,self.x_init, display=self.messages)
 
 def get_optimizer(f_min):
     # import rasmussens_minimize as rasm
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index cf023284..f21502a5 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -18,8 +18,7 @@ def trace_dot(a,b):
     """
     efficiently compute the trace of the matrix product of a and b
     """
-    assert a.shape==b.T.shape
-    return np.dot(a.flatten(),b.T.flatten())
+    return np.sum(a*b)
 
 def mdot(*args):
    """Multiply all the arguments using matrix product rules.

From c4d190e0fdd0521bb154788bf4b83b5305de9676 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 14:01:14 +0000
Subject: [PATCH 095/105] made SCG work nicely with the optimization framework

---
 GPy/inference/SCG.py          | 27 ++++++++++++-----------
 GPy/inference/optimization.py | 40 ++++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py
index cbaee3e7..f5e7ab22 100644
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@@ -24,7 +24,7 @@
 
 import numpy as np
 
-def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-6):
+def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
     """
     Optimisation through Scaled Conjugate Gradients (SCG)
 
@@ -35,13 +35,12 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
     Returns
     x the optimal value for x
     flog : a list of all the objective values
-    pointlog : a list of the x values tried
-    scalelog : a list of the scales used in optimisation (beta)
 
     """
 
     sigma0 = 1.0e-4
     fold = f(x, *optargs)	# Initial function value.
+    function_eval = 1
     fnow = fold
     gradnew = gradf(x, *optargs)	# Initial gradient.
     gradold = gradnew.copy()
@@ -51,10 +50,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
     beta = 1.0				# Initial scale parameter.
     betamin = 1.0e-15 			# Lower bound on scale.
     betamax = 1.0e100			# Upper bound on scale.
+    status = "Not converged"
 
     flog = [fold]
-    pointlog = [x.copy()]
-    scalelog = [beta]
 
     iteration = 0
 
@@ -68,8 +66,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
                 d = -gradnew
                 mu = np.dot(d, gradnew)
             kappa = np.dot(d, d)
-            #if kappa < eps():
-                #return x, flog, pointlog, scalelog
             sigma = sigma0/np.sqrt(kappa)
             xplus = x + sigma*d
             gplus = gradf(xplus, *optargs)
@@ -86,6 +82,12 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
         # Calculate the comparison ratio.
         xnew = x + alpha*d
         fnew = f(xnew, *optargs)
+        function_eval += 1
+
+        if function_eval >= max_f_eval:
+            status = "Maximum number of function evaluations exceeded"
+            return x, flog, function_eval, status
+
         Delta = 2.*(fnew - fold)/(alpha*mu)
         if Delta  >= 0.:
             success = True
@@ -98,8 +100,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
 
         # Store relevant variables
         flog.append(fnow)		# Current function value
-        pointlog.append(x)           # Current position
-        scalelog.append(beta)    # current scale parameter
 
         iteration += 1
         if display:
@@ -108,7 +108,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
         if success:
             # Test for termination
             if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol:
-                return x, flog, pointlog, scalelog
+                return x, flog, function_eval, status
 
             else:
                 # Update variables for new position
@@ -117,7 +117,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
                 gradnew = gradf(x, *optargs)
                 # If the gradient is zero then we are done.
                 if np.dot(gradnew,gradnew) == 0:
-                    return x, flog, pointlog, scalelog
+                    return x, flog, function_eval, status
 
         # Adjust beta according to comparison ratio.
         if Delta < 0.25:
@@ -136,7 +136,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-
 
     # If we get here, then we haven't terminated in the given number of
     # iterations.
-    if display:
-        print "maxiter exceeded"
+    status = "maxiter exceeded"
 
-    return x, flog, pointlog, scalelog
+    return x, flog, function_eval, status
diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 0a72aa08..75cd94ba 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -1,18 +1,18 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
+import pdb
+import pylab as pb
+import datetime as dt
 from scipy import optimize
+import numpy as np
 
 try:
     import rasmussens_minimize as rasm
     rasm_available = True
 except ImportError:
     rasm_available = False
-
-import pdb
-import pylab as pb
-import datetime as dt
+from SCG import SCG
 
 class Optimizer():
     """
@@ -29,7 +29,7 @@ class Optimizer():
     :rtype: optimizer object.
 
     """
-    def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
+    def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None):
         self.opt_name = None
         self.x_init = x_init
         self.messages = messages
@@ -38,6 +38,7 @@ class Optimizer():
         self.funct_eval = None
         self.status = None
         self.max_f_eval = int(max_f_eval)
+        self.max_iters = int(max_iters)
         self.trace = None
         self.time = "Not available"
         self.xtol = xtol
@@ -63,12 +64,13 @@ class Optimizer():
             pb.xlabel('Iteration')
             pb.ylabel('f(x)')
 
-    def diagnostics(self):
-        print "Optimizer: \t\t\t\t %s" % self.opt_name
-        print "f(x_opt): \t\t\t\t %.3f" % self.f_opt
-        print "Number of function evaluations: \t %d" % self.funct_eval
-        print "Optimization status: \t\t\t %s" % self.status
-        print "Time elapsed: \t\t\t\t %s" % self.time
+    def __str__(self):
+        diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
+        diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
+        diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
+        diagnostics += "Optimization status: \t\t\t %s\n" % self.status
+        diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
+        return diagnostics
 
 class opt_tnc(Optimizer):
     def __init__(self, *args, **kwargs):
@@ -161,7 +163,6 @@ class opt_simplex(Optimizer):
         self.f_opt = opt_result[1]
         self.funct_eval = opt_result[3]
         self.status = statuses[opt_result[4]]
-
         self.trace = None
 
 
@@ -196,7 +197,7 @@ class opt_rasm(Optimizer):
 
         self.trace = opt_result[1]
 
-class opt_scg(Optimizer):
+class opt_SCG(Optimizer):
     def __init__(self, *args, **kwargs):
         Optimizer.__init__(self, *args, **kwargs)
         self.opt_name = "Scaled Conjugate Gradients"
@@ -204,15 +205,20 @@ class opt_scg(Optimizer):
     def opt(self, f_fp = None, f = None, fp = None):
         assert not f is None
         assert not fp is None
-        opt_result = SCG (f,fp,self.x_init, display=self.messages)
+        opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol)
+        self.x_opt = opt_result[0]
+        self.trace = opt_result[1]
+        self.f_opt = self.trace[-1]
+        self.funct_eval = opt_result[2]
+        self.status = opt_result[3]
 
 def get_optimizer(f_min):
-    # import rasmussens_minimize as rasm
     from SGD import opt_SGD
-    
+
     optimizers = {'fmin_tnc': opt_tnc,
           'simplex': opt_simplex,
           'lbfgsb': opt_lbfgsb,
+          'scg': opt_SCG,
           'sgd': opt_SGD}
 
     if rasm_available:

From cbcb3cb33757e835664aa6e746b236b5ce645ab4 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 14:01:32 +0000
Subject: [PATCH 096/105] better GPLVM oil flow demo

---
 GPy/examples/dimensionality_reduction.py | 6 ++++--
 GPy/models/GPLVM.py                      | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 59f35c72..d7610acb 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -44,12 +44,14 @@ def GPLVM_oil_100():
     data = GPy.util.datasets.oil_100()
 
     # create simple GP model
-    m = GPy.models.GPLVM(data['X'], 2)
+    kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
+    m = GPy.models.GPLVM(data['X'], 6, kernel = kernel)
 
     # optimize
     m.ensure_default_constraints()
-    m.optimize()
+    m.optimize(messages=1)
 
     # plot
     print(m)
+    m.plot_latent(labels=data['Y'].argmax(axis=1))
     return m
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 5be54049..32594594 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -66,6 +66,8 @@ class GPLVM(GP):
         :param resolution: the resolution of the grid on which to evaluate the predictive variance
         """
 
+        util.plot.Tango.reset()
+        
         if labels is None:
             labels = np.ones(self.N)
         if which_indices is None:
@@ -88,7 +90,7 @@ class GPLVM(GP):
         #first, plot the output variance as a function of the latent space
         Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
 	Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
-	Xtest_full[:, :2] = Xtest        
+	Xtest_full[:, :2] = Xtest
 	mu, var, low, up = self.predict(Xtest_full)
 	var = var[:, :2]
         pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear')

From f1af69e7b72e8970ed75f99f1879a48b941e20f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 14:26:07 +0000
Subject: [PATCH 097/105] added CI status

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0b5d00ce..09bc78f5 100644
--- a/README.md
+++ b/README.md
@@ -4,4 +4,7 @@ GPy
 A Gaussian processes framework in python
 
 * [Online documentation](https://gpy.readthedocs.org/en/latest/)
-* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
\ No newline at end of file
+* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
+
+
+Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)

From 21c3988a2152833e5fad4b966aa28a8611d82c48 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 15:36:29 +0000
Subject: [PATCH 098/105] ensure_default_constraints() now also works with the
 BGPLVM

---
 GPy/models/Bayesian_GPLVM.py |  2 +-
 GPy/testing/bgplvm_tests.py  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index a18ec9bb..8f9759c3 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -41,7 +41,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
     def _get_param_names(self):
         X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
-        S_names = sum([['S_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
+        S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
         return (X_names + S_names + sparse_GP._get_param_names(self))
 
     def _get_params(self):
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index b182c1a8..b11b4532 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -15,7 +15,7 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-        m.constrain_positive('(rbf|bias|noise|white|S)')
+        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -28,7 +28,7 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-        m.constrain_positive('(linear|bias|noise|white|S)')
+        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -41,7 +41,7 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-        m.constrain_positive('(rbf|bias|noise|white|S)')
+        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -54,7 +54,7 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-        m.constrain_positive('(rbf|bias|noise|white|S)')
+        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -68,9 +68,9 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-        m.constrain_positive('(linear|bias|noise|white|S)')
+        m.ensure_default_constraints()
         m.randomize()
-        self.assertTrue(m.checkgrad())        
+        self.assertTrue(m.checkgrad())
 
 
 if __name__ == "__main__":

From 1dc6ee574b40fc14339970ec3b4e7b5a8e8698bf Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 16:38:18 +0000
Subject: [PATCH 099/105] added decorator to silence errors

---
 GPy/util/__init__.py   |  1 +
 GPy/util/decorators.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 GPy/util/decorators.py

diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index 3c28cde3..c91557d0 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -10,3 +10,4 @@ import Tango
 import misc
 import warping_functions
 import datasets
+import decorators
diff --git a/GPy/util/decorators.py b/GPy/util/decorators.py
new file mode 100644
index 00000000..eb5fdf33
--- /dev/null
+++ b/GPy/util/decorators.py
@@ -0,0 +1,10 @@
+import numpy as np
+from functools import wraps
+
+def silence_errors(f):
+    status = np.seterr(all='ignore')
+    @wraps(f)
+    def wrapper(*args, **kwds):
+        return f(*args, **kwds)
+    np.seterr(**status)
+    return wrapper

From 5781fbd582180a1977ae1a22b2f73b473f4b8084 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 16:42:52 +0000
Subject: [PATCH 100/105] now it actually works

---
 GPy/util/decorators.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GPy/util/decorators.py b/GPy/util/decorators.py
index eb5fdf33..2cdf78f1 100644
--- a/GPy/util/decorators.py
+++ b/GPy/util/decorators.py
@@ -2,9 +2,10 @@ import numpy as np
 from functools import wraps
 
 def silence_errors(f):
-    status = np.seterr(all='ignore')
     @wraps(f)
     def wrapper(*args, **kwds):
-        return f(*args, **kwds)
-    np.seterr(**status)
+        status = np.seterr(all='ignore')
+        result = f(*args, **kwds)
+        np.seterr(**status)
+        return result
     return wrapper

From a342dc2f2329e63d6beb4f9dce2f68f2f0fd9b2d Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 16:43:51 +0000
Subject: [PATCH 101/105] errors handled in Mat32

---
 GPy/kern/periodic_Matern32.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index 898dff7b..662c1506 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -1,6 +1,7 @@
 from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
+from GPy.util.decorators import silence_errors
 
 class periodic_Matern32(kernpart):
     """
@@ -39,12 +40,16 @@ class periodic_Matern32(kernpart):
         def f(x):
             return alpha*np.cos(omega*x+phase)
         return f
+
+    @silence_errors
     def _cos_factorization(self,alpha,omega,phase):
         r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
         r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
         r =  np.sqrt(r1**2 + r2**2)
         psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
         return r,omega[:,0:1], psi
+
+    @silence_errors
     def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
         Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
         Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
@@ -55,6 +60,7 @@ class periodic_Matern32(kernpart):
     def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
+    
     def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
@@ -101,6 +107,7 @@ class periodic_Matern32(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
+    @silence_errors
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -172,6 +179,7 @@ class periodic_Matern32(kernpart):
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*dL_dK)
 
+    @silence_errors
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

From 78f7414642a59596d358e536dbb484b6e0f09305 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 12 Mar 2013 16:46:35 +0000
Subject: [PATCH 102/105] decorator documentation

---
 GPy/util/decorators.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/GPy/util/decorators.py b/GPy/util/decorators.py
index 2cdf78f1..c8aa08a2 100644
--- a/GPy/util/decorators.py
+++ b/GPy/util/decorators.py
@@ -2,6 +2,11 @@ import numpy as np
 from functools import wraps
 
 def silence_errors(f):
+    """
+    This wraps a function and it silences numpy errors that
+    happen during the execution. After the function has exited, it restores
+    the previous state of the warnings.
+    """
     @wraps(f)
     def wrapper(*args, **kwds):
         status = np.seterr(all='ignore')

From 7a9b6ad1131fd2ae5bf62104f6f2b209b82ee121 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 16:50:12 +0000
Subject: [PATCH 103/105] The warnings are now handeled properly in the
 periodic kernels

---
 GPy/kern/periodic_Matern32.py    |  4 ++++
 GPy/kern/periodic_Matern52.py    |  9 +++++++++
 GPy/kern/periodic_exponential.py | 10 +++++++++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index 662c1506..95684a02 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -1,3 +1,7 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
 from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
index c533961f..07cb11ea 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/periodic_Matern52.py
@@ -1,6 +1,11 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
 from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
+from GPy.util.decorators import silence_errors
 
 class periodic_Matern52(kernpart):
     """
@@ -40,6 +45,7 @@ class periodic_Matern52(kernpart):
             return alpha*np.cos(omega*x+phase)
         return f
 
+    @silence_errors
     def _cos_factorization(self,alpha,omega,phase):
         r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
         r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
@@ -57,6 +63,7 @@ class periodic_Matern52(kernpart):
     def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
+    
     def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
@@ -105,6 +112,7 @@ class periodic_Matern52(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
+    @silence_errors
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -184,6 +192,7 @@ class periodic_Matern52(kernpart):
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*dL_dK)
 
+    @silence_errors
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py
index b966bbef..0018a8f9 100644
--- a/GPy/kern/periodic_exponential.py
+++ b/GPy/kern/periodic_exponential.py
@@ -1,6 +1,11 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
 from kernpart import kernpart
 import numpy as np
 from GPy.util.linalg import mdot, pdinv
+from GPy.util.decorators import silence_errors
 
 class periodic_exponential(kernpart):
     """
@@ -40,6 +45,7 @@ class periodic_exponential(kernpart):
             return alpha*np.cos(omega*x+phase)
         return f
 
+    @silence_errors
     def _cos_factorization(self,alpha,omega,phase):
         r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
         r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
@@ -57,6 +63,7 @@ class periodic_exponential(kernpart):
     def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
+    
     def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
@@ -101,6 +108,7 @@ class periodic_exponential(kernpart):
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
         np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
 
+    @silence_errors
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -166,6 +174,7 @@ class periodic_exponential(kernpart):
         target[1] += np.sum(dK_dlen*dL_dK)
         target[2] += np.sum(dK_dper*dL_dK)
 
+    @silence_errors
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters"""
         FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@@ -225,4 +234,3 @@ class periodic_exponential(kernpart):
         target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
         target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
         target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
-

From 525ef56dcac62a99109cf7217472130af1f58521 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 17:04:48 +0000
Subject: [PATCH 104/105] increased stability of _compuations in sparse_GP

---
 GPy/models/sparse_GP.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index a348c9f4..3d44ad6b 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -153,14 +153,26 @@ class sparse_GP(GP):
             #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
         else:
             #likelihood is not heterscedatic
-            beta = self.likelihood.precision
-            dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
-            dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
-            dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta
-            dbeta += np.trace(self.Cpsi1VVpsi1)/beta - 0.5 * trace_dot(np.dot(self.C,self.psi2_beta_scaled) , self.Cpsi1VVpsi1 )/beta
-            self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
+            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
+            self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
+            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
+            self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
 
 
+
+    def log_likelihood(self):
+        """ Compute the (lower bound on the) log marginal likelihood """
+        sf2 = self.scale_factor**2
+        if self.likelihood.is_heteroscedastic:
+            A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
+            B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
+        else:
+            A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
+            B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
+        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
+        D = 0.5*np.trace(self.Cpsi1VVpsi1)
+        return A+B+C+D
+
     def _set_params(self, p):
         self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
         self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
@@ -188,18 +200,6 @@ class sparse_GP(GP):
             #self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
             self._set_params(self._get_params()) # update the GP
 
-    def log_likelihood(self):
-        """ Compute the (lower bound on the) log marginal likelihood """
-        sf2 = self.scale_factor**2
-        if self.likelihood.is_heteroscedastic:
-            A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
-            B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
-        else:
-            A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
-            B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
-        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
-        D = 0.5*np.trace(self.Cpsi1VVpsi1)
-        return A+B+C+D
 
     def _log_likelihood_gradients(self):
         return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))

From 4ada0587f64241cc61745d4d6f45f12dfbd8d92a Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 13 Mar 2013 09:27:36 +0000
Subject: [PATCH 105/105] changed version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b14c907e..ef5ff58d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ import os
 from setuptools import setup
 
 # Version number
-version = '0.1.3'
+version = '0.2'
 
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()