From 688d6ac7a51547e2906f5956912c20a1a1bca3db Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 16 Jan 2013 11:21:06 +0000
Subject: [PATCH 01/61] fixed up dK_dX in the exponential and Matern kerns

---
 GPy/kern/Matern32.py    |  6 +++---
 GPy/kern/Matern52.py    | 11 +++++------
 GPy/kern/exponential.py |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index 8223b37a..377b687f 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -78,14 +78,14 @@ class Matern32(kernpart):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
         target[0] += np.sum(partial)
 
-    def dK_dX(self,X,X2,target):
+    def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
         ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
-        dK_dX += -  np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
+        dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
         target += np.sum(dK_dX*partial.T[:,:,None],0)
-        
+
     def dKdiag_dX(self,X,target):
         pass
 
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index 65059a5b..172c0117 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -33,7 +33,6 @@ class Matern52(kernpart):
         self.Nparam = self.D + 1
         self.name = 'Mat52'
         self.set_param(np.hstack((variance,lengthscales)))
-        
 
     def get_param(self):
         """return the value of the parameters."""
@@ -77,12 +76,12 @@ class Matern52(kernpart):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
         target[0] += np.sum(partial)
 
-    def dK_dX(self,X,X2,target):
+    def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
         ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
-        dK_dX += -  np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
+        dK_dX = -  np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
         target += np.sum(dK_dX*partial.T[:,:,None],0)
 
     def dKdiag_dX(self,X,target):
@@ -97,11 +96,11 @@ class Matern52(kernpart):
         :param F1: vector of derivatives of F
         :type F1: np.array
         :param F2: vector of second derivatives of F
-        :type F2: np.array 
+        :type F2: np.array
         :param F3: vector of third derivatives of F
-        :type F3: np.array    
+        :type F3: np.array
         :param lower,upper: boundaries of the input domain
-        :type lower,upper: floats  
+        :type lower,upper: floats
         """
         assert self.D == 1
         def L(x,i):
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index ba97881e..8ad5b813 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -77,7 +77,7 @@ class exponential(kernpart):
         #NB: derivative of diagonal elements wrt lengthscale is 0
         target[0] += np.sum(partial)
 
-    def dK_dX(self,X,X2,target):
+    def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
         dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]

From 8da741f7257ca1ad8103935289cd8988dc4d7389 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 11:36:58 +0000
Subject: [PATCH 02/61] Adding travis.yml file for Travis continuous
 intregration service, may not work yet

---
 .travis.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..bc8eb3ab
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,12 @@
+language: python
+python:
+  - "2.7"
+# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install: 
+  - "pip install scipy>=0.10.1" 
+  - "pip install numpy>=1.6.2" 
+  - "pip install sphinx"
+  - "pip install ./"
+# command to run tests, e.g. python setup.py test
+script: 
+  - "nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing"

From 52a97763300b7926c5ceb2a97ffe328af0759705 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 11:42:46 +0000
Subject: [PATCH 03/61] Changing travis installation

---
 .travis.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bc8eb3ab..a4faeaee 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,10 +3,10 @@ python:
   - "2.7"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install: 
-  - "pip install scipy>=0.10.1" 
-  - "pip install numpy>=1.6.2" 
-  - "pip install sphinx"
-  - "pip install ./"
+  - pip install scipy>=0.10.1
+  - pip install numpy>=1.6.2
+  - pip install sphinx
+  - pip install . --use-mirrors
 # command to run tests, e.g. python setup.py test
 script: 
-  - "nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing"
+  - nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing

From 1aca88d27fb59a5e10c9a61dc9555c8cd9417507 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 12:14:31 +0000
Subject: [PATCH 04/61] Fixed version setting for numpy scipy installatioN

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a4faeaee..921cf083 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,8 @@ python:
   - "2.7"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install: 
-  - pip install scipy>=0.10.1
-  - pip install numpy>=1.6.2
+  - pip install "scipy>=0.10.1"
+  - pip install "numpy>=1.6.2"
   - pip install sphinx
   - pip install . --use-mirrors
 # command to run tests, e.g. python setup.py test

From 3c700dfa503f9e6fbd38b8da7cced139f2c4619f Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 12:31:37 +0000
Subject: [PATCH 05/61] Changed get_param and set_param to _get_params and
 _set_params

---
 GPy/core/model.py                       | 36 ++++-----
 GPy/core/parameterised.py               | 34 ++++-----
 GPy/examples/GPLVM_demo.py              | 28 -------
 GPy/examples/GP_regression_demo.py      | 51 -------------
 GPy/examples/GP_regression_kern_demo.py | 33 ---------
 GPy/examples/classification.py          |  6 +-
 GPy/examples/oil_flow_demo.py           | 53 --------------
 GPy/examples/regression.py              | 97 ++++++++++++++++++++++++-
 GPy/examples/warped_GP_demo.py          | 45 ------------
 GPy/inference/samplers.py               |  6 +-
 GPy/kern/Brownian.py                    |  8 +-
 GPy/kern/Matern32.py                    |  8 +-
 GPy/kern/Matern52.py                    |  8 +-
 GPy/kern/bias.py                        |  8 +-
 GPy/kern/exponential.py                 |  8 +-
 GPy/kern/finite_dimensional.py          |  8 +-
 GPy/kern/kern.py                        | 12 +--
 GPy/kern/kernpart.py                    |  6 +-
 GPy/kern/linear.py                      |  8 +-
 GPy/kern/linear_ARD.py                  |  8 +-
 GPy/kern/rbf.py                         |  8 +-
 GPy/kern/rbf_ARD.py                     | 18 ++---
 GPy/kern/spline.py                      |  8 +-
 GPy/kern/sympykern.py                   |  8 +-
 GPy/kern/white.py                       |  8 +-
 GPy/models/GPLVM.py                     |  8 +-
 GPy/models/GP_EP.py                     | 10 +--
 GPy/models/GP_regression.py             |  6 +-
 GPy/models/generalized_FITC.py          | 10 +--
 GPy/models/sparse_GPLVM.py              | 12 +--
 GPy/models/sparse_GP_regression.py      |  8 +-
 GPy/models/warped_GP.py                 | 18 ++---
 GPy/util/warping_functions.py           |  4 +-
 33 files changed, 239 insertions(+), 358 deletions(-)
 delete mode 100644 GPy/examples/GPLVM_demo.py
 delete mode 100644 GPy/examples/GP_regression_demo.py
 delete mode 100644 GPy/examples/GP_regression_kern_demo.py
 delete mode 100644 GPy/examples/oil_flow_demo.py
 delete mode 100644 GPy/examples/warped_GP_demo.py

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 46cf6ac9..d3c6e582 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -14,14 +14,14 @@ from ..inference import optimization
 class model(parameterised):
     def __init__(self):
         parameterised.__init__(self)
-        self.priors = [None for i in range(self.get_param().size)]
+        self.priors = [None for i in range(self._get_params().size)]
         self.optimization_runs = []
         self.sampling_runs = []
-        self.set_param(self.get_param())
+        self._set_params(self._get_params())
         self.preferred_optimizer = 'tnc'
-    def get_param(self):
+    def _get_params(self):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
-    def set_param(self,x):
+    def _set_params(self,x):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
@@ -67,7 +67,7 @@ class model(parameterised):
             unconst = np.setdiff1d(which, self.constrained_positive_indices)
             if len(unconst):
                 print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i,n in enumerate(self.get_param_names()) if i in unconst])
+                print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
                 print '\n'
                 self.constrain_positive(unconst)
         elif isinstance(what,priors.Gaussian):
@@ -86,7 +86,7 @@ class model(parameterised):
         """
         matches = self.grep_param_names(name)
         if len(matches):
-            return self.get_param()[matches]
+            return self._get_params()[matches]
         else:
             raise AttributeError, "no parameter matches %s"%name
 
@@ -96,9 +96,9 @@ class model(parameterised):
         """
         matches = self.grep_param_names(name)
         if len(matches):
-            x = self.get_param()
+            x = self._get_params()
             x[matches] = val
-            self.set_param(x)
+            self._set_params(x)
         else:
             raise AttributeError, "no parameter matches %s"%name
 
@@ -106,11 +106,11 @@ class model(parameterised):
 
     def log_prior(self):
         """evaluate the prior"""
-        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self.get_param()) if p is not None])
+        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
 
     def log_prior_gradients(self):
         """evaluate the gradients of the priors"""
-        x = self.get_param()
+        x = self._get_params()
         ret = np.zeros(x.size)
         [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
         return ret
@@ -121,7 +121,7 @@ class model(parameterised):
         Adjust the gradient for constraints and ties, return.
         """
         g = self.log_likelihood_gradients() + self.log_prior_gradients()
-        x = self.get_param()
+        x = self._get_params()
         g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
         g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
         [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
@@ -142,9 +142,9 @@ class model(parameterised):
         x = np.random.randn(x.size)
         self.expand_param(x)
         #now draw from prior where possible
-        x = self.get_param()
+        x = self._get_params()
         [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
-        self.set_param(x)
+        self._set_params(x)
         self.expand_param(self.extract_param())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
@@ -194,7 +194,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in self.all_constrained_indices()):
-                    name = self.get_param_names()[i]
+                    name = self._get_param_names()[i]
                     self.constrain_positive(name)
                     if warn:
                         print "Warning! constraining %s postive"%name
@@ -248,13 +248,13 @@ class model(parameterised):
 
         else:
             print "numerically calculating hessian. please be patient!"
-            x = self.get_param()
+            x = self._get_params()
             def f(x):
-                self.set_param(x)
+                self._set_params(x)
                 return self.log_likelihood()
             h = ndt.Hessian(f)
             A = -h(x)
-            self.set_param(x)
+            self._set_params(x)
         # check for almost zero components on the diagonal which screw up the cholesky
         aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
         A[aa,aa] = 0.
@@ -268,7 +268,7 @@ class model(parameterised):
             hld = np.sum(np.log(np.diag(jitchol(A)[0])))
         except:
             return np.nan
-        return 0.5*self.get_param().size*np.log(2*np.pi) + self.log_likelihood() - hld
+        return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
         s = parameterised.__str__(self).split('\n')
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index da0b6056..738bde5b 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -87,7 +87,7 @@ class parameterised(object):
 
         Returns
         -------
-        the indices of self.get_param_names which match the regular expression.
+        the indices of self._get_param_names which match the regular expression.
 
         Notes
         -----
@@ -96,9 +96,9 @@ class parameterised(object):
 
         if type(expr) is str:
             expr = re.compile(expr)
-            return np.nonzero([expr.search(name) for name in self.get_param_names()])[0]
+            return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
         elif type(expr) is re._pattern_type:
-            return np.nonzero([expr.search(name) for name in self.get_param_names()])[0]
+            return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
         else:
             return expr
 
@@ -115,11 +115,11 @@ class parameterised(object):
         assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
         self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
         #check to ensure constraint is in place
-        x = self.get_param()
+        x = self._get_params()
         for i,xx in enumerate(x):
             if (xx<0) & (i in matches):
                 x[i] = -xx
-        self.set_param(x)
+        self._set_params(x)
 
 
     def unconstrain(self,which):
@@ -163,11 +163,11 @@ class parameterised(object):
         assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
         self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
         #check to ensure constraint is in place
-        x = self.get_param()
+        x = self._get_params()
         for i,xx in enumerate(x):
             if (xx>0.) and (i in matches):
                 x[i] = -xx
-        self.set_param(x)
+        self._set_params(x)
 
 
 
@@ -187,11 +187,11 @@ class parameterised(object):
         self.constrained_bounded_uppers.append(upper)
         self.constrained_bounded_lowers.append(lower)
         #check to ensure constraint is in place
-        x = self.get_param()
+        x = self._get_params()
         for i,xx in enumerate(x):
             if ((xx<=lower)|(xx>=upper)) & (i in matches):
                 x[i] = sigmoid(xx)*(upper-lower) + lower
-        self.set_param(x)
+        self._set_params(x)
 
 
     def constrain_fixed(self, which, value = None):
@@ -213,14 +213,14 @@ class parameterised(object):
         if value != None:
             self.constrained_fixed_values.append(value)
         else:
-            self.constrained_fixed_values.append(self.get_param()[self.constrained_fixed_indices[-1]])
+            self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
 
         #self.constrained_fixed_values.append(value)
         self.expand_param(self.extract_param())
 
     def extract_param(self):
-        """use self.get_param to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
-        x = self.get_param()
+        """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
+        x = self._get_params()
         x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
         x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
         [np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
@@ -233,7 +233,7 @@ class parameterised(object):
 
 
     def expand_param(self,x):
-        """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self.set_param"""
+        """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
 
         #work out how many places are fixed, and where they are. tricky logic!
         Nfix_places = 0.
@@ -257,14 +257,14 @@ class parameterised(object):
         xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
         xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
         [np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        self.set_param(xx)
+        self._set_params(xx)
 
     def extract_param_names(self):
         """
         Returns the parameter names as propagated after constraining,
         tying or fixing, i.e. a list of the same length as extract_param()
         """
-        n =  self.get_param_names()
+        n =  self._get_param_names()
 
         #remove/concatenate the tied parameter names
         if len(self.tied_indices):
@@ -294,13 +294,13 @@ class parameterised(object):
         """
         Return a string describing the parameter names and their ties and constraints
         """
-        names = self.get_param_names()
+        names = self._get_param_names()
         N = len(names)
 
         if not N:
             return "This object has no free parameters."
         header = ['Name','Value','Constraints','Ties']
-        values = self.get_param() #map(str,self.get_param())
+        values = self._get_params() #map(str,self._get_params())
         #sort out the constraints
         constraints = ['']*len(names)
         for i in self.constrained_positive_indices:
diff --git a/GPy/examples/GPLVM_demo.py b/GPy/examples/GPLVM_demo.py
deleted file mode 100644
index 651af6be..00000000
--- a/GPy/examples/GPLVM_demo.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-import GPy
-np.random.seed(1)
-print "GPLVM with RBF kernel"
-
-N = 100
-Q = 1
-D = 2
-X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
-K = k.K(X)
-Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-
-m = GPy.models.GPLVM(Y, Q)
-m.constrain_positive('(rbf|bias|white)')
-
-pb.figure()
-m.plot()
-pb.title('PCA initialisation')
-pb.figure()
-m.optimize(messages = 1)
-m.plot()
-pb.title('After optimisation')
diff --git a/GPy/examples/GP_regression_demo.py b/GPy/examples/GP_regression_demo.py
deleted file mode 100644
index 0fe2fabd..00000000
--- a/GPy/examples/GP_regression_demo.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-"""
-Simple Gaussian Processes regression with an RBF kernel
-"""
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-pb.close('all')
-
-
-######################################
-## 1 dimensional example
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(20,1))
-Y = np.sin(X)+np.random.randn(20,1)*0.05
-
-# create simple GP model
-m = GPy.models.GP_regression(X,Y)
-
-# contrain all parameters to be positive
-m.constrain_positive('')
-
-# optimize and plot
-m.optimize('tnc', max_f_eval = 1000)
-m.plot()
-print(m)
-
-######################################
-## 2 dimensional example
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(40,2))
-Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(40,1)*0.05
-
-# create simple GP model
-m = GPy.models.GP_regression(X,Y)
-
-# contrain all parameters to be positive
-m.constrain_positive('')
-# optimize and plot
-pb.figure()
-m.optimize('tnc', max_f_eval = 1000)
-m.plot()
-print(m)
-
-
diff --git a/GPy/examples/GP_regression_kern_demo.py b/GPy/examples/GP_regression_kern_demo.py
deleted file mode 100644
index 8f0b9226..00000000
--- a/GPy/examples/GP_regression_kern_demo.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-"""
-Simple one-dimensional Gaussian Processes with assorted kernel functions
-"""
-import pylab as pb
-import numpy as np
-import GPy
-
-# sample inputs and outputs
-D = 1
-X = np.random.randn(10,D)*2
-X = np.linspace(-1.5,1.5,5)[:,None]
-X = np.append(X,[[5]],0)
-Y = np.sin(np.pi*X/2) #+np.random.randn(X.shape[0],1)*0.05
-
-models = [GPy.models.GP_regression(X,Y, k) for k in (GPy.kern.rbf(D), GPy.kern.Matern52(D), GPy.kern.Matern32(D), GPy.kern.exponential(D), GPy.kern.linear(D) + GPy.kern.white(D),  GPy.kern.bias(D) + GPy.kern.white(D))]
-
-pb.figure(figsize=(12,8))
-for i,m in enumerate(models):
-    m.constrain_positive('')
-    m.optimize()
-    pb.subplot(3,2,i+1)
-    m.plot()
-    #pb.title(m.kern.parts[0].name)
-
-GPy.util.plot.align_subplots(3,2,(-3,6),(-2.5,2.5))
-
-pb.show()
-
-
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index eb7a887e..989ed08a 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -8,8 +8,6 @@ Simple Gaussian Processes classification
 import pylab as pb
 import numpy as np
 import GPy
-pb.ion()
-pb.close('all')
 
 default_seed=10000
 ######################################
@@ -27,7 +25,7 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
     likelihood = GPy.inference.likelihoods.probit(data['Y'])
 
     if model_type=='Full':
-        m = GPy.models.simple_GP_EP(data['X'],likelihood)
+        m = GPy.models.GP_EP(data['X'],likelihood)
     else:
         # create sparse GP EP model
         m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
@@ -49,7 +47,7 @@ def oil():
     likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
 
     # create simple GP model
-    m = GPy.models.simple_GP_EP(data['X'],likelihood)
+    m = GPy.models.GP_EP(data['X'],likelihood)
 
     # contrain all parameters to be positive
     m.constrain_positive('')
diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py
deleted file mode 100644
index eee74461..00000000
--- a/GPy/examples/oil_flow_demo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import cPickle as pickle
-import numpy as np
-import pylab as pb
-import GPy
-import pylab as plt
-np.random.seed(1)
-
-def plot_oil(X, theta, labels, label):
-    plt.figure()
-    X = X[:,np.argsort(theta)[:2]]
-    flow_type = (X[labels[:,0]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
-    flow_type = (X[labels[:,1]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
-    flow_type = (X[labels[:,2]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
-    plt.title(label)
-
-data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r'))
-
-Y = data['DataTrn']
-N, D = Y.shape
-selected = np.random.permutation(N)[:200]
-labels = data['DataTrnLbls'][selected]
-Y = Y[selected]
-N, D = Y.shape
-Y -= Y.mean(axis=0)
-Y /= Y.std(axis=0)
-
-Q = 2
-m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15)
-m1.constrain_positive('(rbf|bias|noise)')
-m1.constrain_bounded('white', 1e-6, 1.0)
-
-plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization')
-# m.optimize(messages = True)
-m1.optimize('bfgs', messages = True)
-plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM')
-# pb.figure()
-# m.plot()
-# pb.title('PCA initialisation')
-# pb.figure()
-# m.optimize(messages = 1)
-# m.plot()
-# pb.title('After optimisation')
-m = GPy.models.GPLVM(Y, Q)
-m.constrain_positive('(white|rbf|bias|noise)')
-m.optimize()
-plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index d61c8b10..79763504 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -8,8 +8,6 @@ Gaussian Processes regression examples
 import pylab as pb
 import numpy as np
 import GPy
-pb.ion()
-pb.close('all')
 
 
 def toy_rbf_1d():
@@ -48,6 +46,10 @@ def rogers_girolami_olympics():
     print(m)
     return m
 
+def della_gatta_TRP63_gene_expression(number=942):
+    """Run a standard Gaussian process regression on the della Gatta et al TRP63 Gene Expression data set for a given gene number."""
+
+
 def toy_rbf_1d_50():
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d_50()
@@ -81,3 +83,94 @@ def silhouette():
 
     print(m)
     return m
+
+
+def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
+    """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
+
+    # Contour over a range of length scales and signal/noise ratios.
+    length_scales = np.linspace(0.1, 60., resolution)
+    log_SNRs = np.linspace(-3., 4., resolution)
+
+    data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
+    # Sub sample the data to ensure multiple optima
+    #data['Y'] = data['Y'][0::2, :]
+    #data['X'] = data['X'][0::2, :]
+
+    # Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
+    data['Y'] = data['Y'] - np.mean(data['Y'])
+
+    lls = GPy.examples.regression.contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
+    pb.contour(length_scales, log_SNRs, np.exp(lls), 20)
+    ax = pb.gca()
+    pb.xlabel('length scale')
+    pb.ylabel('log_10 SNR')
+
+    xlim = ax.get_xlim()
+    ylim = ax.get_ylim()
+    
+    # Now run a few optimizations
+    models = []
+    optim_point_x = np.empty(2)
+    optim_point_y = np.empty(2)
+    np.random.seed(seed=seed)
+    for i in range(0, model_restarts):
+        kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
+        
+        m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
+        params = m._get_params()
+        optim_point_x[0] = params[1]
+        optim_point_y[0] = np.log10(params[0]) - np.log10(params[2]);
+        
+        # contrain all parameters to be positive
+        m.constrain_positive('')
+        
+        # optimize
+        m.optimize(xtol=1e-6,ftol=1e-6)
+
+        params = m._get_params()
+        optim_point_x[1] = params[1]
+        optim_point_y[1] = np.log10(params[0]) - np.log10(params[2]);
+        print(m)
+        
+        pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
+        models.append(m)
+
+    ax.set_xlim(xlim)
+    ax.set_ylim(ylim)
+    return (models, lls)
+
+def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
+    """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
+
+    :data_set: A data set from the utils.datasets director.
+    :length_scales: a list of length scales to explore for the contour plot.
+    :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
+    :signal_kernel: a kernel to use for the 'signal' portion of the data."""
+
+    lls = []
+    total_var = np.var(data['Y'])
+    for log_SNR in log_SNRs:
+        SNR = 10**log_SNR
+        length_scale_lls = []
+        for length_scale in length_scales:
+            noise_var = 1.
+            signal_var = SNR
+            noise_var = noise_var/(noise_var + signal_var)*total_var
+            signal_var = signal_var/(noise_var + signal_var)*total_var
+
+            signal_kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale)
+            noise_kernel = GPy.kern.white(1, variance=noise_var)
+            kernel = signal_kernel + noise_kernel
+            K = kernel.K(data['X'])
+            total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
+            noise_var *= total_var
+            signal_var *= total_var
+            
+            kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
+
+            model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
+            model.constrain_positive('')
+            length_scale_lls.append(model.log_likelihood())
+        lls.append(length_scale_lls)
+    return np.array(lls)
diff --git a/GPy/examples/warped_GP_demo.py b/GPy/examples/warped_GP_demo.py
deleted file mode 100644
index 3b75694a..00000000
--- a/GPy/examples/warped_GP_demo.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import scipy as sp
-import pdb, sys, pickle
-import matplotlib.pylab as plt
-import GPy
-np.random.seed(1)
-
-N = 100
-# sample inputs and outputs
-X = np.random.uniform(-np.pi,np.pi,(N,1))
-Y = np.sin(X)+np.random.randn(N,1)*0.05
-# Y += np.abs(Y.min()) + 0.5
-Z = np.exp(Y)# Y**(1/3.0)
-
-# rescaling targets?
-Zmax = Z.max()
-Zmin = Z.min()
-Z = (Z-Zmin)/(Zmax-Zmin) - 0.5
-
-m = GPy.models.warpedGP(X, Z, warping_terms = 2)
-m.constrain_positive('(tanh_a|tanh_b|rbf|white|bias)')
-m.randomize()
-plt.figure()
-plt.xlabel('predicted f(Z)')
-plt.ylabel('actual f(Z)')
-plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'before training')
-m.optimize(messages = True)
-plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'after training')
-plt.legend(loc = 0)
-m.plot_warping()
-plt.figure()
-plt.title('warped GP fit')
-m.plot()
-
-m1 = GPy.models.GP_regression(X, Z)
-m1.constrain_positive('(rbf|white|bias)')
-m1.randomize()
-m1.optimize(messages = True)
-plt.figure()
-plt.title('GP fit')
-m1.plot()
diff --git a/GPy/inference/samplers.py b/GPy/inference/samplers.py
index 1216f1eb..da159c23 100644
--- a/GPy/inference/samplers.py
+++ b/GPy/inference/samplers.py
@@ -73,12 +73,12 @@ class Metropolis_Hastings:
 
     def predict(self,function,args):
         """Make a prediction for the function, to which we will pass the additional arguments"""
-        param = self.model.get_param()
+        param = self.model._get_params()
         fs = []
         for p in self.chain:
-            self.model.set_param(p)
+            self.model._set_params(p)
             fs.append(function(*args))
-        self.model.set_param(param)# reset model to starting state
+        self.model._set_params(param)# reset model to starting state
         return fs
 
 
diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py
index 07d5fbd6..fd19137c 100644
--- a/GPy/kern/Brownian.py
+++ b/GPy/kern/Brownian.py
@@ -23,16 +23,16 @@ class Brownian(kernpart):
         assert self.D==1, "Brownian motion in 1D only"
         self.Nparam = 1.
         self.name = 'Brownian'
-        self.set_param(np.array([variance]).flatten())
+        self._set_params(np.array([variance]).flatten())
 
-    def get_param(self):
+    def _get_params(self):
         return self.variance
 
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.shape==(1,)
         self.variance = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance']
 
     def K(self,X,X2,target):
diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index 377b687f..6517ac2c 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -34,19 +34,19 @@ class Matern32(kernpart):
             lengthscales = np.ones(self.D)
         self.Nparam = self.D + 1
         self.name = 'Mat32'
-        self.set_param(np.hstack((variance,lengthscales)))
+        self._set_params(np.hstack((variance,lengthscales)))
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscales))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==(self.D+1)
         self.variance = x[0]
         self.lengthscales = x[1:]
 
-    def get_param_names(self):
+    def _get_param_names(self):
         """return parameter names."""
         if self.D==1:
             return ['variance','lengthscale']
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index 172c0117..056da32d 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -32,19 +32,19 @@ class Matern52(kernpart):
             lengthscales = np.ones(self.D)
         self.Nparam = self.D + 1
         self.name = 'Mat52'
-        self.set_param(np.hstack((variance,lengthscales)))
+        self._set_params(np.hstack((variance,lengthscales)))
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscales))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==(self.D+1)
         self.variance = x[0]
         self.lengthscales = x[1:]
 
-    def get_param_names(self):
+    def _get_param_names(self):
         """return parameter names."""
         if self.D==1:
             return ['variance','lengthscale']
diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py
index a89f56c5..0a6872a5 100644
--- a/GPy/kern/bias.py
+++ b/GPy/kern/bias.py
@@ -17,16 +17,16 @@ class bias(kernpart):
         self.D = D
         self.Nparam = 1
         self.name = 'bias'
-        self.set_param(np.array([variance]).flatten())
+        self._set_params(np.array([variance]).flatten())
 
-    def get_param(self):
+    def _get_params(self):
         return self.variance
 
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.shape==(1,)
         self.variance = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance']
 
     def K(self,X,X2,target):
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 8ad5b813..2df6a958 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -32,19 +32,19 @@ class exponential(kernpart):
             lengthscales = np.ones(self.D)
         self.Nparam = self.D + 1
         self.name = 'exp'
-        self.set_param(np.hstack((variance,lengthscales)))
+        self._set_params(np.hstack((variance,lengthscales)))
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscales))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==(self.D+1)
         self.variance = x[0]
         self.lengthscales = x[1:]
 
-    def get_param_names(self):
+    def _get_param_names(self):
         """return parameter names."""
         if self.D==1:
             return ['variance','lengthscale']
diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/finite_dimensional.py
index 98c99628..36afd1dc 100644
--- a/GPy/kern/finite_dimensional.py
+++ b/GPy/kern/finite_dimensional.py
@@ -27,15 +27,15 @@ class finite_dimensional(kernpart):
             weights = np.ones(self.n)
         self.Nparam = self.n + 1
         self.name = 'finite_dim'
-        self.set_param(np.hstack((variance,weights)))
+        self._set_params(np.hstack((variance,weights)))
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack((self.variance,self.weights))
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.size == (self.Nparam)
         self.variance = x[0]
         self.weights = x[1:]
-    def get_param_names(self):
+    def _get_param_names(self):
         if self.n==1:
             return ['variance','weight']
         else:
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 3ba7d97b..5f259f55 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -133,20 +133,20 @@ class kern(parameterised):
         newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
         return newkern
 
-    def get_param(self):
-        return np.hstack([p.get_param() for p in self.parts])
+    def _get_params(self):
+        return np.hstack([p._get_params() for p in self.parts])
 
-    def set_param(self,x):
-        [p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
+    def _set_params(self,x):
+        [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
 
-    def get_param_names(self):
+    def _get_param_names(self):
         #this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
         part_names = np.array([k.name for k in self.parts],dtype=np.str)
         counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
         cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
         names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
 
-        return sum([[name+'_'+n for n in k.get_param_names()] for name,k in zip(names,self.parts)],[])
+        return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
 
     def K(self,X,X2=None,slices1=None,slices2=None):
         assert X.shape[1]==self.D
diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py
index d06749b8..3a5486de 100644
--- a/GPy/kern/kernpart.py
+++ b/GPy/kern/kernpart.py
@@ -16,11 +16,11 @@ class kernpart(object):
         self.Nparam = 1
         self.name = 'unnamed'
 
-    def get_param(self):
+    def _get_params(self):
         raise NotImplementedError
-    def set_param(self,x):
+    def _set_params(self,x):
         raise NotImplementedError
-    def get_param_names(self):
+    def _get_param_names(self):
         raise NotImplementedError
     def K(self,X,X2,target):
         raise NotImplementedError
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 7246244e..14c0ece3 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -20,16 +20,16 @@ class linear(kernpart):
             variance = 1.0
         self.Nparam = 1
         self.name = 'linear'
-        self.set_param(variance)
+        self._set_params(variance)
         self._Xcache, self._X2cache = np.empty(shape=(2,))
 
-    def get_param(self):
+    def _get_params(self):
         return self.variance
 
-    def set_param(self,x):
+    def _set_params(self,x):
         self.variance = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance']
 
     def K(self,X,X2,target):
diff --git a/GPy/kern/linear_ARD.py b/GPy/kern/linear_ARD.py
index 149bfb4d..b9112044 100644
--- a/GPy/kern/linear_ARD.py
+++ b/GPy/kern/linear_ARD.py
@@ -23,16 +23,16 @@ class linear_ARD(kernpart):
             variances = np.ones(self.D)
         self.Nparam = int(self.D)
         self.name = 'linear'
-        self.set_param(variances)
+        self._set_params(variances)
 
-    def get_param(self):
+    def _get_params(self):
         return self.variances
 
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.size==(self.Nparam)
         self.variances = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         if self.D==1:
             return ['variance']
         else:
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 9e2bb509..c929369f 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -30,23 +30,23 @@ class rbf(kernpart):
         self.D = D
         self.Nparam = 2
         self.name = 'rbf'
-        self.set_param(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale)))
 
         #initialize cache
         self._Z, self._mu, self._S = np.empty(shape=(3,1))
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack((self.variance,self.lengthscale))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         self.variance, self.lengthscale = x
         self.lengthscale2 = np.square(self.lengthscale)
         #reset cached results
         self._X, self._X2, self._params = np.empty(shape=(3,1))
         self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance','lengthscale']
 
     def K(self,X,X2,target):
diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py
index 732be590..12924b3f 100644
--- a/GPy/kern/rbf_ARD.py
+++ b/GPy/kern/rbf_ARD.py
@@ -22,16 +22,16 @@ class rbf_ARD(kernpart):
             lengthscales = np.ones(self.D)
         self.Nparam = self.D + 1
         self.name = 'rbf_ARD'
-        self.set_param(np.hstack((variance,lengthscales)))
+        self._set_params(np.hstack((variance,lengthscales)))
 
         #initialize cache
         self._Z, self._mu, self._S = np.empty(shape=(3,1))
         self._X, self._X2, self._params = np.empty(shape=(3,1))
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack((self.variance,self.lengthscales))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.size==(self.D+1)
         self.variance = x[0]
         self.lengthscales = x[1:]
@@ -39,7 +39,7 @@ class rbf_ARD(kernpart):
         #reset cached results
         self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
 
-    def get_param_names(self):
+    def _get_param_names(self):
         if self.D==1:
             return ['variance','lengthscale']
         else:
@@ -135,8 +135,8 @@ class rbf_ARD(kernpart):
             if X2 is None: X2 = X
             self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
             self._params = np.empty(shape=(1,0))#ensure the next section gets called
-        if not np.all(self._params == self.get_param()):
-            self._params == self.get_param()
+        if not np.all(self._params == self._get_params()):
+            self._params == self._get_params()
             self._K_dist2 = np.square(self._K_dist/self.lengthscales)
             self._K_exponent = -0.5*self._K_dist2.sum(-1)
             self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
@@ -189,7 +189,7 @@ if __name__=='__main__':
     from checkgrad import checkgrad
 
     def k_theta_test(param,k):
-        k.set_param(param)
+        k._set_params(param)
         K = k.K(Z)
         dK_dtheta = k.dK_dtheta(Z)
         f = np.sum(K)
@@ -216,7 +216,7 @@ if __name__=='__main__':
     checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
 
     def psi1_theta_test(theta,k):
-        k.set_param(theta)
+        k._set_params(theta)
         f = np.sum(k.psi1(Z,mu,S))
         df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
         return f,df
@@ -241,7 +241,7 @@ if __name__=='__main__':
     checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
 
     def psi2_theta_test(theta,k):
-        k.set_param(theta)
+        k._set_params(theta)
         f = np.sum(k.psi2(Z,mu,S))
         df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
         return f,df
diff --git a/GPy/kern/spline.py b/GPy/kern/spline.py
index 44033eea..030b2f02 100644
--- a/GPy/kern/spline.py
+++ b/GPy/kern/spline.py
@@ -25,15 +25,15 @@ class spline(kernpart):
         assert self.D==1
         self.Nparam = 1
         self.name = 'spline'
-        self.set_param(np.squeeze(variance))
+        self._set_params(np.squeeze(variance))
 
-    def get_param(self):
+    def _get_params(self):
         return self.variance
 
-    def set_param(self,x):
+    def _set_params(self,x):
         self.variance = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance']
 
     def K(self,X,X2,target):
diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py
index d9f89f5b..db3cc976 100644
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@@ -44,7 +44,7 @@ class spkern(kernpart):
         if param is None:
             param = np.ones(self.Nparam)
         assert param.size==self.Nparam
-        self.set_param(param)
+        self._set_params(param)
 
         #Differentiate!
         self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
@@ -247,12 +247,12 @@ class spkern(kernpart):
         Z = X
         weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
 
-    def set_param(self,param):
+    def _set_params(self,param):
         #print param.flags['C_CONTIGUOUS']
         self._param = param.copy()
 
-    def get_param(self):
+    def _get_params(self):
         return self._param
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return [x.name for x in self._sp_theta]
diff --git a/GPy/kern/white.py b/GPy/kern/white.py
index 587a2b4a..b3b00c48 100644
--- a/GPy/kern/white.py
+++ b/GPy/kern/white.py
@@ -17,16 +17,16 @@ class white(kernpart):
         self.D = D
         self.Nparam = 1
         self.name = 'white'
-        self.set_param(np.array([variance]).flatten())
+        self._set_params(np.array([variance]).flatten())
 
-    def get_param(self):
+    def _get_params(self):
         return self.variance
 
-    def set_param(self,x):
+    def _set_params(self,x):
         assert x.shape==(1,)
         self.variance = x
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return ['variance']
 
     def K(self,X,X2,target):
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 44147b73..0a168f25 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -33,16 +33,16 @@ class GPLVM(GP_regression):
         else:
             return np.random.randn(Y.shape[0], Q)
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
                 + self.kern.extract_param_names())
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack((self.X.flatten(), self.kern.extract_param()))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
-        GP_regression.set_param(self, x[self.X.size:])
+        GP_regression._set_params(self, x[self.X.size:])
 
     def log_likelihood_gradients(self):
         dL_dK = self.dL_dK()
diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py
index bb582674..5cb2ba45 100644
--- a/GPy/models/GP_EP.py
+++ b/GPy/models/GP_EP.py
@@ -41,13 +41,13 @@ class GP_EP(model):
         self.K = self.kernel.K(self.X)
         model.__init__(self)
 
-    def set_param(self,p):
+    def _set_params(self,p):
         self.kernel.expand_param(p)
 
-    def get_param(self):
+    def _get_params(self):
         return self.kernel.extract_param()
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return self.kernel.extract_param_names()
 
     def approximate_likelihood(self):
@@ -138,7 +138,7 @@ class GP_EP(model):
         """
         self.epsilon_em = epsilon
         log_likelihood_change = self.epsilon_em + 1.
-        self.parameters_path = [self.kernel.get_param()]
+        self.parameters_path = [self.kernel._get_params()]
         self.approximate_likelihood()
         self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
         self.log_likelihood_path = [self.log_likelihood()]
@@ -155,6 +155,6 @@ class GP_EP(model):
             else:
                 self.approximate_likelihood()
                 self.log_likelihood_path.append(self.log_likelihood())
-                self.parameters_path.append(self.kernel.get_param())
+                self.parameters_path.append(self.kernel._get_params())
                 self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
             iteration += 1
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index ee2bb448..da657950 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -70,15 +70,15 @@ class GP_regression(model):
 
         model.__init__(self)
 
-    def set_param(self,p):
+    def _set_params(self,p):
         self.kern.expand_param(p)
         self.K = self.kern.K(self.X,slices1=self.Xslices)
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
-    def get_param(self):
+    def _get_params(self):
         return self.kern.extract_param()
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return self.kern.extract_param_names()
 
     def _model_fit_term(self):
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index 4ef36f0e..40f18ff9 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -42,14 +42,14 @@ class generalized_FITC(model):
         self.jitter = 1e-12
         model.__init__(self)
 
-    def set_param(self,p):
+    def _set_params(self,p):
         self.kernel.expand_param(p[0:-self.Z.size])
         self.Z = p[-self.Z.size:].reshape(self.M,self.D)
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack([self.kernel.extract_param(),self.Z.flatten()])
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return self.kernel.extract_param_names()+['iip_%i'%i for i in range(self.Z.size)]
 
     def approximate_likelihood(self):
@@ -214,7 +214,7 @@ class generalized_FITC(model):
         """
         self.epsilon_em = epsilon
         log_likelihood_change = self.epsilon_em + 1.
-        self.parameters_path = [self.kernel.get_param()]
+        self.parameters_path = [self.kernel._get_params()]
         self.approximate_likelihood()
         self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
         self.inducing_inputs_path = [self.Z]
@@ -235,7 +235,7 @@ class generalized_FITC(model):
             else:
                 self.approximate_likelihood()
                 self.log_likelihood_path.append(self.log_likelihood())
-                self.parameters_path.append(self.kernel.get_param())
+                self.parameters_path.append(self.kernel._get_params())
                 self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
                 self.inducing_inputs_path.append(self.Z)
             iteration += 1
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index c5125c85..1c6a03cf 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -27,16 +27,16 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
         X = self.initialise_latent(init, Q, Y)
         sparse_GP_regression.__init__(self, X, Y, **kwargs)
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-                + sparse_GP_regression.get_param_names(self))
+                + sparse_GP_regression._get_param_names(self))
 
-    def get_param(self):
-        return np.hstack((self.X.flatten(), sparse_GP_regression.get_param(self)))
+    def _get_params(self):
+        return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
-        sparse_GP_regression.set_param(self, x[self.X.size:])
+        sparse_GP_regression._set_params(self, x[self.X.size:])
 
     def log_likelihood(self):
         return sparse_GP_regression.log_likelihood(self)
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index fe5f7cc1..fc04d5ed 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -59,10 +59,10 @@ class sparse_GP_regression(GP_regression):
         if self.has_uncertain_inputs:
             self.X_uncertainty /= np.square(self._Xstd)
 
-    def set_param(self, p):
+    def _set_params(self, p):
         self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
         self.beta = p[self.M*self.Q]
-        self.kern.set_param(p[self.Z.size + 1:])
+        self.kern._set_params(p[self.Z.size + 1:])
         self.beta2 = self.beta**2
         self._compute_kernel_matrices()
         self._computations()
@@ -106,10 +106,10 @@ class sparse_GP_regression(GP_regression):
         self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
         self.dL_dKmm +=  np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
 
-    def get_param(self):
+    def _get_params(self):
         return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
 
-    def get_param_names(self):
+    def _get_param_names(self):
         return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
 
     def log_likelihood(self):
diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index bf5af21f..ea3c44cc 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -13,7 +13,7 @@ from GP_regression import GP_regression
 
 class warpedGP(GP_regression):
     """
-    TODO: fucking docstrings!
+    TODO: fecking docstrings!
 
     @nfusi: I'#ve hacked a little on this, but no guarantees. J.
     """
@@ -30,17 +30,17 @@ class warpedGP(GP_regression):
         self.transform_data()
         GP_regression.__init__(self, X, self.Y, **kwargs)
 
-    def set_param(self, x):
+    def _set_params(self, x):
         self.warping_params = x[:self.warping_function.num_parameters].reshape(self.warp_params_shape).copy()
         self.transform_data()
-        GP_regression.set_param(self, x[self.warping_function.num_parameters:].copy())
+        GP_regression._set_params(self, x[self.warping_function.num_parameters:].copy())
 
-    def get_param(self):
-        return np.hstack((self.warping_params.flatten().copy(), GP_regression.get_param(self).copy()))
+    def _get_params(self):
+        return np.hstack((self.warping_params.flatten().copy(), GP_regression._get_params(self).copy()))
 
-    def get_param_names(self):
-        warping_names = self.warping_function.get_param_names()
-        param_names = GP_regression.get_param_names(self)
+    def _get_param_names(self):
+        warping_names = self.warping_function._get_param_names()
+        param_names = GP_regression._get_param_names(self)
         return warping_names + param_names
 
     def transform_data(self):
@@ -81,7 +81,7 @@ class warpedGP(GP_regression):
     def predict(self, X, in_unwarped_space = False, **kwargs):
         mu, var = GP_regression.predict(self, X, **kwargs)
 
-        # The plot() function calls set_param() before calling predict()
+        # The plot() function calls _set_params() before calling predict()
         # this is causing the observations to be plotted in the transformed
         # space (where Y lives), making the plot looks very wrong
         # if the predictions are made in the untransformed space
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index 5a6dc6b3..a87deb5b 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -33,7 +33,7 @@ class WarpingFunction(object):
         """inverse function transformation"""
         raise NotImplementedError
 
-    def get_param_names(self):
+    def _get_param_names(self):
         raise NotImplementedError
 
     def plot(self, psi, xmin, xmax):
@@ -151,7 +151,7 @@ class TanhWarpingFunction(WarpingFunction):
 
         return gradients
 
-    def get_param_names(self):
+    def _get_param_names(self):
         variables = ['a', 'b', 'c']
         names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
         return names

From 377cc383be80852416a630d35d48e3c860cd7425 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 12:39:54 +0000
Subject: [PATCH 06/61] Added apt-get scipy installation for travis, need to
 ensure version

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 921cf083..af20bec4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,7 @@ python:
   - "2.7"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install: 
-  - pip install "scipy>=0.10.1"
-  - pip install "numpy>=1.6.2"
+  - sudo apt-get install python-scipy
   - pip install sphinx
   - pip install . --use-mirrors
 # command to run tests, e.g. python setup.py test

From 1a795227dff689f0b3cee9de0ffca365074319a7 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 13:07:14 +0000
Subject: [PATCH 07/61] Removed latent force model fortran code from setup.py
 from github code

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 076af82c..497429d0 100644
--- a/setup.py
+++ b/setup.py
@@ -24,8 +24,8 @@ setup(name = 'GPy',
       package_data = {'GPy': ['GPy/examples']},
       py_modules = ['GPy.__init__'],
       long_description=read('README.md'),
-      ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
-                sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
+      #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
+      #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
       install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
       setup_requires=['sphinx'],
       cmdclass = {'build_sphinx': BuildDoc},

From f40b31bea72b4864753baccbc61754d705c20d60 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 13:33:49 +0000
Subject: [PATCH 08/61] Removed version specification of scipy

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 497429d0..ee44dde0 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      install_requires=['numpy>=1.6', 'scipy','matplotlib>=1.1'],
       setup_requires=['sphinx'],
       cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[

From 1674bc529bc49949fc39fe992ad5c487f942251d Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 13:37:17 +0000
Subject: [PATCH 09/61] expand_param and extract_param replaced with
 set_params_transformed and get_params_transformed

---
 GPy/core/model.py                  | 56 +++++++++++++++---------------
 GPy/core/parameterised.py          | 12 +++----
 GPy/inference/samplers.py          |  8 ++---
 GPy/models/GPLVM.py                |  4 +--
 GPy/models/GP_EP.py                | 10 +++---
 GPy/models/GP_regression.py        |  6 ++--
 GPy/models/generalized_FITC.py     |  8 ++---
 GPy/models/sparse_GP_regression.py |  4 +--
 8 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index d3c6e582..2bfb02a5 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -115,7 +115,7 @@ class model(parameterised):
         [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
         return ret
 
-    def extract_gradients(self):
+    def _log_likelihood_gradients_transformed(self):
         """
         Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
         Adjust the gradient for constraints and ties, return.
@@ -138,14 +138,14 @@ class model(parameterised):
         Make this draw from the prior if one exists, else draw from N(0,1)
         """
         #first take care of all parameters (from N(0,1))
-        x = self.extract_param()
+        x = self._get_params_transformed()
         x = np.random.randn(x.size)
-        self.expand_param(x)
+        self._set_params_transformed(x)
         #now draw from prior where possible
         x = self._get_params()
         [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
         self._set_params(x)
-        self.expand_param(self.extract_param())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
     def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs):
@@ -165,7 +165,7 @@ class model(parameterised):
         :verbose: whether to show informations about the current restart
         """
 
-        initial_parameters = self.extract_param()
+        initial_parameters = self._get_params_transformed()
         for i in range(Nrestarts):
             try:
                 self.randomize()
@@ -182,9 +182,9 @@ class model(parameterised):
                     raise e
         if len(self.optimization_runs):
             i = np.argmax([o.f_opt for o in self.optimization_runs])
-            self.expand_param(self.optimization_runs[i].x_opt)
+            self._set_params_transformed(self.optimization_runs[i].x_opt)
         else:
-            self.expand_param(initial_parameters)
+            self._set_params_transformed(initial_parameters)
 
     def ensure_default_constraints(self,warn=False):
         """
@@ -214,24 +214,24 @@ class model(parameterised):
             optimizer = self.preferred_optimizer
 
         def f(x):
-            self.expand_param(x)
+            self._set_params_transformed(x)
             return -self.log_likelihood()-self.log_prior()
         def fp(x):
-            self.expand_param(x)
-            return -self.extract_gradients()
+            self._set_params_transformed(x)
+            return -self._log_likelihood_gradients_transformed()
         def f_fp(x):
-            self.expand_param(x)
-            return -self.log_likelihood()-self.log_prior(),-self.extract_gradients()
+            self._set_params_transformed(x)
+            return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
 
         if start == None:
-            start = self.extract_param()
+            start = self._get_params_transformed()
 
         optimizer = optimization.get_optimizer(optimizer)
         opt = optimizer(start, model = self, **kwargs)
         opt.run(f_fp=f_fp, f=f, fp=fp)
         self.optimization_runs.append(opt)
 
-        self.expand_param(opt.x_opt)
+        self._set_params_transformed(opt.x_opt)
 
     def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
         # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@@ -292,18 +292,18 @@ class model(parameterised):
         If the overall gradient fails, invividual components are tested.
         """
 
-        x = self.extract_param().copy()
+        x = self._get_params_transformed().copy()
 
         #choose a random direction to step in:
         dx = step*np.sign(np.random.uniform(-1,1,x.size))
 
         #evaulate around the point x
-        self.expand_param(x+dx)
-        f1,g1 = self.log_likelihood() + self.log_prior(), self.extract_gradients()
-        self.expand_param(x-dx)
-        f2,g2 = self.log_likelihood() + self.log_prior(), self.extract_gradients()
-        self.expand_param(x)
-        gradient = self.extract_gradients()
+        self._set_params_transformed(x+dx)
+        f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+        self._set_params_transformed(x-dx)
+        f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+        self._set_params_transformed(x)
+        gradient = self._log_likelihood_gradients_transformed()
 
         numerical_gradient = (f1-f2)/(2*dx)
         ratio = (f1-f2)/(2*np.dot(dx,gradient))
@@ -319,7 +319,7 @@ class model(parameterised):
                 print "Global check failed. Testing individual gradients\n"
 
                 try:
-                    names = self.extract_param_names()
+                    names = self._get_param_names_transformed()
                 except NotImplementedError:
                     names = ['Variable %i'%i for i in range(len(x))]
 
@@ -338,13 +338,13 @@ class model(parameterised):
             for i in range(len(x)):
                 xx = x.copy()
                 xx[i] += step
-                self.expand_param(xx)
-                f1,g1 = self.log_likelihood() + self.log_prior(), self.extract_gradients()[i]
+                self._set_params_transformed(xx)
+                f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
                 xx[i] -= 2.*step
-                self.expand_param(xx)
-                f2,g2 = self.log_likelihood() + self.log_prior(), self.extract_gradients()[i]
-                self.expand_param(x)
-                gradient = self.extract_gradients()[i]
+                self._set_params_transformed(xx)
+                f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
+                self._set_params_transformed(x)
+                gradient = self._log_likelihood_gradients_transformed()[i]
 
 
                 numerical_gradient = (f1-f2)/(2*step)
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index 738bde5b..81b6aa8a 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -66,7 +66,7 @@ class parameterised(object):
         if hasattr(self,'prior'):
             pass
 
-        self.expand_param(self.extract_param())# sets tied parameters to single value
+        self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
 
     def untie_everything(self):
         """Unties all parameters by setting tied_indices to an empty list."""
@@ -216,9 +216,9 @@ class parameterised(object):
             self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
 
         #self.constrained_fixed_values.append(value)
-        self.expand_param(self.extract_param())
+        self._set_params_transformed(self._get_params_transformed())
 
-    def extract_param(self):
+    def _get_params_transformed(self):
         """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
         x = self._get_params()
         x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
@@ -232,7 +232,7 @@ class parameterised(object):
             return x
 
 
-    def expand_param(self,x):
+    def _set_params_transformed(self,x):
         """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
 
         #work out how many places are fixed, and where they are. tricky logic!
@@ -259,10 +259,10 @@ class parameterised(object):
         [np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
         self._set_params(xx)
 
-    def extract_param_names(self):
+    def _get_param_names_transformed(self):
         """
         Returns the parameter names as propagated after constraining,
-        tying or fixing, i.e. a list of the same length as extract_param()
+        tying or fixing, i.e. a list of the same length as _get_params_transformed()
         """
         n =  self._get_param_names()
 
diff --git a/GPy/inference/samplers.py b/GPy/inference/samplers.py
index da159c23..c2b47bce 100644
--- a/GPy/inference/samplers.py
+++ b/GPy/inference/samplers.py
@@ -17,7 +17,7 @@ class Metropolis_Hastings:
     def __init__(self,model,cov=None):
         """Metropolis Hastings, with tunings according to Gelman et al. """
         self.model = model
-        current = self.model.extract_param()
+        current = self.model._get_params_transformed()
         self.D = current.size
         self.chains = []
         if cov is None:
@@ -32,19 +32,19 @@ class Metropolis_Hastings:
         if start is None:
             self.model.randomize()
         else:
-            self.model.expand_param(start)
+            self.model._set_params_transformed(start)
 
 
 
     def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
-        current = self.model.extract_param()
+        current = self.model._get_params_transformed()
         fcurrent = self.model.log_likelihood() + self.model.log_prior()
         accepted = np.zeros(Ntotal,dtype=np.bool)
         for it in range(Ntotal):
             print "sample %d of %d\r"%(it,Ntotal),
             sys.stdout.flush()
             prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
-            self.model.expand_param(prop)
+            self.model._set_params_transformed(prop)
             fprop = self.model.log_likelihood() + self.model.log_prior()
 
             if fprop>fcurrent:#sample accepted, going 'uphill'
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 0a168f25..f4723396 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -35,10 +35,10 @@ class GPLVM(GP_regression):
 
     def _get_param_names(self):
         return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-                + self.kern.extract_param_names())
+                + self.kern._get_param_names_transformed())
 
     def _get_params(self):
-        return np.hstack((self.X.flatten(), self.kern.extract_param()))
+        return np.hstack((self.X.flatten(), self.kern._get_params_transformed()))
 
     def _set_params(self,x):
         self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py
index 5cb2ba45..6f5aad6e 100644
--- a/GPy/models/GP_EP.py
+++ b/GPy/models/GP_EP.py
@@ -42,13 +42,13 @@ class GP_EP(model):
         model.__init__(self)
 
     def _set_params(self,p):
-        self.kernel.expand_param(p)
+        self.kernel._set_params_transformed(p)
 
     def _get_params(self):
-        return self.kernel.extract_param()
+        return self.kernel._get_params_transformed()
 
     def _get_param_names(self):
-        return self.kernel.extract_param_names()
+        return self.kernel._get_param_names_transformed()
 
     def approximate_likelihood(self):
         self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
@@ -150,8 +150,8 @@ class GP_EP(model):
             log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
             if log_likelihood_change < 0:
                 print 'log_likelihood decrement'
-                self.kernel.expand_param(self.parameters_path[-1])
-                self.kernM.expand_param(self.parameters_path[-1])
+                self.kernel._set_params_transformed(self.parameters_path[-1])
+                self.kernM._set_params_transformed(self.parameters_path[-1])
             else:
                 self.approximate_likelihood()
                 self.log_likelihood_path.append(self.log_likelihood())
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index da657950..f34802bf 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -71,15 +71,15 @@ class GP_regression(model):
         model.__init__(self)
 
     def _set_params(self,p):
-        self.kern.expand_param(p)
+        self.kern._set_params_transformed(p)
         self.K = self.kern.K(self.X,slices1=self.Xslices)
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
     def _get_params(self):
-        return self.kern.extract_param()
+        return self.kern._get_params_transformed()
 
     def _get_param_names(self):
-        return self.kern.extract_param_names()
+        return self.kern._get_params_names_transformed()
 
     def _model_fit_term(self):
         """
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index 40f18ff9..a20666d0 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -43,14 +43,14 @@ class generalized_FITC(model):
         model.__init__(self)
 
     def _set_params(self,p):
-        self.kernel.expand_param(p[0:-self.Z.size])
+        self.kernel._set_params_transformed(p[0:-self.Z.size])
         self.Z = p[-self.Z.size:].reshape(self.M,self.D)
 
     def _get_params(self):
-        return np.hstack([self.kernel.extract_param(),self.Z.flatten()])
+        return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
 
     def _get_param_names(self):
-        return self.kernel.extract_param_names()+['iip_%i'%i for i in range(self.Z.size)]
+        return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
 
     def approximate_likelihood(self):
         self.Kmm = self.kernel.K(self.Z)
@@ -227,7 +227,7 @@ class generalized_FITC(model):
             log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
             if log_likelihood_change < 0:
                 print 'log_likelihood decrement'
-                self.kernel.expand_param(self.parameters_path[-1])
+                self.kernel._set_params_transformed(self.parameters_path[-1])
                 self.kernM = self.kernel.copy()
                 slef.kernM.expand_X(self.iducing_inputs_path[-1])
                 self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index fc04d5ed..59e6ebbb 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -107,10 +107,10 @@ class sparse_GP_regression(GP_regression):
         self.dL_dKmm +=  np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
 
     def _get_params(self):
-        return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
+        return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
 
     def _get_param_names(self):
-        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
+        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
 
     def log_likelihood(self):
         """

From 333d8d3e5ffd05aeb4e99602c5512b5e17cd627e Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 13:39:51 +0000
Subject: [PATCH 10/61] Fix error introduced into GP_regression when doing name
 changes.

---
 GPy/models/GP_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index f34802bf..91074269 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -79,7 +79,7 @@ class GP_regression(model):
         return self.kern._get_params_transformed()
 
     def _get_param_names(self):
-        return self.kern._get_params_names_transformed()
+        return self.kern._get_param_names_transformed()
 
     def _model_fit_term(self):
         """

From b43db3f8da8add0ef7fcd2d1a49b96412951b2e7 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 13:47:37 +0000
Subject: [PATCH 11/61] Modified log_like_gradients to make it
 _log_like_gradients and moved extract_gradient to
 _log_like_gradients_transformed.

---
 GPy/core/model.py                  | 6 +++---
 GPy/inference/likelihoods.py       | 2 +-
 GPy/models/GPLVM.py                | 2 +-
 GPy/models/GP_EP.py                | 2 +-
 GPy/models/GP_regression.py        | 2 +-
 GPy/models/generalized_FITC.py     | 2 +-
 GPy/models/sparse_GPLVM.py         | 4 ++--
 GPy/models/sparse_GP_regression.py | 2 +-
 GPy/models/warped_GP.py            | 4 ++--
 9 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 2bfb02a5..063eaf7d 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -25,7 +25,7 @@ class model(parameterised):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to utilise the model class"
 
     def set_prior(self,which,what):
@@ -108,7 +108,7 @@ class model(parameterised):
         """evaluate the prior"""
         return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
 
-    def log_prior_gradients(self):
+    def _log_prior_gradients(self):
         """evaluate the gradients of the priors"""
         x = self._get_params()
         ret = np.zeros(x.size)
@@ -120,7 +120,7 @@ class model(parameterised):
         Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
         Adjust the gradient for constraints and ties, return.
         """
-        g = self.log_likelihood_gradients() + self.log_prior_gradients()
+        g = self._log_likelihood_gradients() + self._log_prior_gradients()
         x = self._get_params()
         g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
         g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 6f5e81fa..53dd4248 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -99,6 +99,6 @@ class probit(likelihood):
     def predictive_mean(self,mu,variance):
         return stats.norm.cdf(mu/np.sqrt(1+variance))
 
-    def log_likelihood_gradients():
+    def _log_likelihood_gradients():
         raise NotImplementedError
 
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index f4723396..3749ee32 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -44,7 +44,7 @@ class GPLVM(GP_regression):
         self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
         GP_regression._set_params(self, x[self.X.size:])
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         dL_dK = self.dL_dK()
 
         dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X)
diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py
index 6f5aad6e..51d69d0a 100644
--- a/GPy/models/GP_EP.py
+++ b/GPy/models/GP_EP.py
@@ -78,7 +78,7 @@ class GP_EP(model):
         L3 = sum(np.log(self.ep_approx.Z_hat))
         return L1 + L2A + L2B + L3
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         dK_dp = self.kernel.dK_dtheta(self.X)
         self.dK_dp = dK_dp
         aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index 91074269..eee0fb58 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -103,7 +103,7 @@ class GP_regression(model):
 
         return dL_dK
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
 
     def predict(self,Xnew, slices=None, full_cov=False):
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index a20666d0..d8e9c23d 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -99,7 +99,7 @@ class generalized_FITC(model):
         E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
         return  A + B + C + D + E
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
         dKnn_dtheta = self.kernel.dK_dtheta(self.X)
         dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index 1c6a03cf..84a04bfd 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -49,8 +49,8 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
 
         return dL_dX
 
-    def log_likelihood_gradients(self):
-        return np.hstack((self.dL_dX().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
+    def _log_likelihood_gradients(self):
+        return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
 
     def plot(self):
         GPLVM.plot(self)
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 59e6ebbb..870828ca 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -168,7 +168,7 @@ class sparse_GP_regression(GP_regression):
             dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
         return dL_dZ
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
 
     def _raw_predict(self, Xnew, slices, full_cov=False):
diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index ea3c44cc..ff6267d2 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -59,8 +59,8 @@ class warpedGP(GP_regression):
         jacobian = self.warping_function.fgrad_y(self.Z, self.warping_params)
         return ll + np.log(jacobian).sum()
 
-    def log_likelihood_gradients(self):
-        ll_grads = GP_regression.log_likelihood_gradients(self)
+    def _log_likelihood_gradients(self):
+        ll_grads = GP_regression._log_likelihood_gradients(self)
         alpha = np.dot(self.Ki, self.Y.flatten())
         warping_grads = self.warping_function_gradients(alpha)
         return np.hstack((warping_grads.flatten(), ll_grads.flatten()))

From 68d7e2364862b83213a5824ae51b49bd83780fa7 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 13:58:41 +0000
Subject: [PATCH 12/61] rbf kernel now has an ARD flag

---
 GPy/examples/GP_regression_demo.py | 12 ++++--
 GPy/examples/sparse_GPLVM_demo.py  |  4 +-
 GPy/kern/constructors.py           |  4 +-
 GPy/kern/rbf.py                    | 65 ++++++++++++++++++++----------
 4 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/GPy/examples/GP_regression_demo.py b/GPy/examples/GP_regression_demo.py
index 0fe2fabd..96bd6dc3 100644
--- a/GPy/examples/GP_regression_demo.py
+++ b/GPy/examples/GP_regression_demo.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 """
 Simple Gaussian Processes regression with an RBF kernel
 """
@@ -19,8 +18,11 @@ pb.close('all')
 X = np.random.uniform(-3.,3.,(20,1))
 Y = np.sin(X)+np.random.randn(20,1)*0.05
 
+# define kernel
+ker = GPy.kern.rbf(1,ARD=False) + GPy.kern.white(1)
+
 # create simple GP model
-m = GPy.models.GP_regression(X,Y)
+m = GPy.models.GP_regression(X,Y,ker)
 
 # contrain all parameters to be positive
 m.constrain_positive('')
@@ -30,6 +32,7 @@ m.optimize('tnc', max_f_eval = 1000)
 m.plot()
 print(m)
 
+
 ######################################
 ## 2 dimensional example
 
@@ -37,8 +40,11 @@ print(m)
 X = np.random.uniform(-3.,3.,(40,2))
 Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(40,1)*0.05
 
+# define kernel
+ker = GPy.kern.rbf(2,ARD=True) + GPy.kern.white(2)
+
 # create simple GP model
-m = GPy.models.GP_regression(X,Y)
+m = GPy.models.GP_regression(X,Y,ker)
 
 # contrain all parameters to be positive
 m.constrain_positive('')
diff --git a/GPy/examples/sparse_GPLVM_demo.py b/GPy/examples/sparse_GPLVM_demo.py
index 6ca6c941..3f1969fe 100644
--- a/GPy/examples/sparse_GPLVM_demo.py
+++ b/GPy/examples/sparse_GPLVM_demo.py
@@ -10,11 +10,11 @@ print "sparse GPLVM with RBF kernel"
 
 N = 100
 M = 4
-Q = 1
+Q = 2
 D = 2
 #generate GPLVM-like data
 X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
+k = GPy.kern.rbf(Q,1.,2*np.ones((1,))) + GPy.kern.white(Q, 0.00001)
 K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T
 
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 0ddc09e3..e1304f11 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -22,7 +22,7 @@ from Brownian import Brownian as Brownianpart
 #using meta-classes to make the objects construct properly wthout them.
 
 
-def rbf(D,variance=1., lengthscale=1.):
+def rbf(D,variance=1., lengthscale=None,ARD=False):
     """
     Construct an RBF kernel
 
@@ -33,7 +33,7 @@ def rbf(D,variance=1., lengthscale=1.):
     :param lengthscale: the lengthscale of the kernel
     :type lengthscale: float
     """
-    part = rbfpart(D,variance,lengthscale)
+    part = rbfpart(D,variance,lengthscale,ARD)
     return kern(D, [part])
 
 def rbf_ARD(D,variance=1., lengthscales=None):
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 9e2bb509..775bb797 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -20,17 +20,33 @@ class rbf(kernpart):
     :type D: int
     :param variance: the variance of the kernel
     :type variance: float
-    :param lengthscale: the lengthscale of the kernel
-    :type lengthscale: float
+    :param lengthscale: the vector of lengthscale of the kernel
+    :type lengthscale: np.ndarray
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
+    :type ARD: Boolean
 
-    .. Note: for rbf with different lengthscale on each dimension, see rbf_ARD
     """
 
-    def __init__(self,D,variance=1.,lengthscale=1.):
+    def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
         self.D = D
-        self.Nparam = 2
-        self.name = 'rbf'
-        self.set_param(np.hstack((variance,lengthscale)))
+        self.ARD = ARD
+        if ARD == False:
+            self.Nparam = 2
+            self.name = 'rbf'
+            if lengthscale is not None:
+                assert lengthscale.shape == (1,)
+            else:
+                lengthscale = np.ones(1)
+        
+        else:
+            self.Nparam = self.D + 1
+            self.name = 'rbf_ARD'
+            if lengthscale is not None:
+                assert lengthscale.shape == (self.D,)
+            else:
+                lengthscale = np.ones(self.D)
+
+        self.set_param(np.hstack((variance,lengthscale)))        
 
         #initialize cache
         self._Z, self._mu, self._S = np.empty(shape=(3,1))
@@ -40,14 +56,19 @@ class rbf(kernpart):
         return np.hstack((self.variance,self.lengthscale))
 
     def set_param(self,x):
-        self.variance, self.lengthscale = x
+        assert x.size==(self.Nparam)
+        self.variance = x[0]
+        self.lengthscale = x[1:]
         self.lengthscale2 = np.square(self.lengthscale)
         #reset cached results
         self._X, self._X2, self._params = np.empty(shape=(3,1))
         self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
 
     def get_param_names(self):
-        return ['variance','lengthscale']
+        if self.Nparam == 2:
+            return ['variance','lengthscale']
+        else:
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]        
 
     def K(self,X,X2,target):
         if X2 is None:
@@ -61,7 +82,12 @@ class rbf(kernpart):
     def dK_dtheta(self,partial,X,X2,target):
         self._K_computations(X,X2)
         target[0] += np.sum(self._K_dvar*partial)
-        target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
+        if self.ARD == True:
+            dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
+            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        else:
+            target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial)
+        #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
 
     def dKdiag_dtheta(self,partial,X,target):
         #NB: derivative of diagonal elements wrt lengthscale is 0
@@ -81,15 +107,13 @@ class rbf(kernpart):
             self._X = X
             self._X2 = X2
             if X2 is None: X2 = X
-            XXT = np.dot(X,X2.T)
-            if X is X2:
-                self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
-            else:
-                self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
-            # TODO Remove comments if this is fine.
-            # Commented out by Neil as doesn't seem to be used elsewhere.
-            #self._K_exponent = -0.5*self._K_dist2
-            self._K_dvar = np.exp(-0.5*self._K_dist2)
+            self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
+            self._params = np.empty(shape=(1,0))#ensure the next section gets called
+        if not np.all(self._params == self.get_param()):
+            self._params == self.get_param()
+            self._K_dist2 = np.square(self._K_dist/self.lengthscale) 
+            #self._K_exponent = -0.5*self._K_dist2.sum(-1) #ND: commented out because seems not to be used
+            self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
 
     def psi0(self,Z,mu,S,target):
         target += self.variance
@@ -132,7 +156,7 @@ class rbf(kernpart):
         d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
         d_length = d_length.sum(0)
         target[0] += np.sum(partial*d_var)
-        target[1] += np.sum(d_length*partial)
+        target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
 
     def dpsi2_dZ(self,partial,Z,mu,S,target):
         """Returns shape N,M,M,Q"""
@@ -175,4 +199,3 @@ class rbf(kernpart):
             self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
 
             self._Z, self._mu, self._S = Z, mu,S
-

From 17ec4d6275c6887b0a45ee7c7b26f1fefa2d6b35 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 14:33:52 +0000
Subject: [PATCH 13/61] change in unit_test to take into account the ARD
 changes in rbf

---
 GPy/testing/unit_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index ff9aba0e..d2ef87f7 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -121,7 +121,7 @@ class GradientTests(unittest.TestCase):
         """ Testing GPLVM with rbf + bias and white kernel """
         N, Q, D = 50, 1, 2
         X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q, 0.5, 0.9) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05)
+        k = GPy.kern.rbf(Q, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         m = GPy.models.GPLVM(Y, Q, kernel = k)

From 11d088cf903191254b53a47a6647e5512a8511ec Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 15:14:23 +0000
Subject: [PATCH 14/61] added ARD flag to exponential

---
 GPy/kern/__init__.py     |  2 +-
 GPy/kern/constructors.py | 22 +++-------------
 GPy/kern/exponential.py  | 54 +++++++++++++++++++++++++---------------
 3 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index cd893bac..4a36d6d0 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy, sympykern
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index e1304f11..5f676d9b 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -36,20 +36,6 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
     part = rbfpart(D,variance,lengthscale,ARD)
     return kern(D, [part])
 
-def rbf_ARD(D,variance=1., lengthscales=None):
-    """
-    Construct an RBF kernel with Automatic Relevance Determination (ARD)
-
-    :param D: dimensionality of the kernel, obligatory
-    :type D: int
-    :param variance: the variance of the kernel
-    :type variance: float
-    :param lengthscales: the lengthscales of the kernel
-    :type lengthscales: None|np.ndarray
-    """
-    part = rbf_ARD_part(D,variance,lengthscales)
-    return kern(D, [part])
-
 def linear(D,lengthscales=None):
     """
      Construct a linear kernel.
@@ -86,7 +72,7 @@ def white(D,variance=1.):
     part = whitepart(D,variance)
     return kern(D, [part])
 
-def exponential(D,variance=1., lengthscales=None):
+def exponential(D,variance=1., lengthscale=None, ARD=False):
     """
      Construct a exponential kernel.
 
@@ -96,10 +82,10 @@ def exponential(D,variance=1., lengthscales=None):
      variance (float)
      lengthscales (np.ndarray)
     """
-    part = exponentialpart(D,variance, lengthscales)
+    part = exponentialpart(D,variance, lengthscale, ARD)
     return kern(D, [part])
 
-def Matern32(D,variance=1., lengthscales=None):
+def Matern32(D,variance=1., lengthscale=None, ARD=False):
     """
      Construct a Matern 3/2 kernel.
 
@@ -109,7 +95,7 @@ def Matern32(D,variance=1., lengthscales=None):
      variance (float)
      lengthscales (np.ndarray)
     """
-    part = Matern32part(D,variance, lengthscales)
+    part = Matern32part(D,variance, lengthscale, ARD)
     return kern(D, [part])
 
 def Matern52(D,variance=1., lengthscales=None):
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 2df6a958..21bb8398 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -24,37 +24,46 @@ class exponential(kernpart):
     :rtype: kernel object
 
     """
-    def __init__(self,D,variance=1.,lengthscales=None):
+    def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
         self.D = D
-        if lengthscales is not None:
-            assert lengthscales.shape==(self.D,)
+        self.ARD = ARD
+        if ARD == False:
+            self.Nparam = 2
+            self.name = 'exp'
+            if lengthscale is not None:
+                assert lengthscale.shape == (1,)
+            else:
+                lengthscale = np.ones(1)
         else:
-            lengthscales = np.ones(self.D)
-        self.Nparam = self.D + 1
-        self.name = 'exp'
-        self._set_params(np.hstack((variance,lengthscales)))
+            self.Nparam = self.D + 1
+            self.name = 'exp_ARD'
+            if lengthscale is not None:
+                assert lengthscale.shape == (self.D,)
+            else:
+                lengthscale = np.ones(self.D)
+        self._set_params(np.hstack((variance,lengthscale)))
 
     def _get_params(self):
         """return the value of the parameters."""
-        return np.hstack((self.variance,self.lengthscales))
+        return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==(self.D+1)
         self.variance = x[0]
-        self.lengthscales = x[1:]
+        self.lengthscale = x[1:]
 
     def _get_param_names(self):
         """return parameter names."""
-        if self.D==1:
+        if self.Nparam==2:
             return ['variance','lengthscale']
         else:
-            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         np.add(self.variance*np.exp(-dist), target,target)
 
     def Kdiag(self,X,target):
@@ -64,13 +73,18 @@ class exponential(kernpart):
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         invdist = 1./np.where(dist!=0.,dist,np.inf)
-        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
         dvar = np.exp(-dist)
-        dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
         target[0] += np.sum(dvar*partial)
-        target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        if self.ARD == True:
+            dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
+            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        else:
+            dl = self.variance*dvar*dist2M.sum(-1)*invdist
+            target[1] += np.sum(dl*partial)
+            #foo
 
     def dKdiag_dtheta(self,partial,X,target): 
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@@ -80,8 +94,8 @@ class exponential(kernpart):
     def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
-        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
         target += np.sum(dK_dX*partial.T[:,:,None],0)
 
@@ -101,14 +115,14 @@ class exponential(kernpart):
         """
         assert self.D == 1
         def L(x,i):
-            return(1./self.lengthscales*F[i](x) + F1[i](x))
+            return(1./self.lengthscale*F[i](x) + F1[i](x))
         n = F.shape[0]
         G = np.zeros((n,n))
         for i in range(n):
             for j in range(i,n):
                 G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
         Flower = np.array([f(lower) for f in F])[:,None]
-        return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
+        return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
 
 
 

From 99034d0fb0a96dc522d6c82ee0abba12e0cf5718 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 15:31:20 +0000
Subject: [PATCH 15/61] Mods to regression.py now that we have get to get
 parameters. Moved Youter to YYT.

---
 GPy/__init__.py                |  1 +
 GPy/core/model.py              | 25 +++++++++++++++++++++----
 GPy/examples/regression.py     | 31 +++++++++----------------------
 GPy/models/GP_regression.py    | 16 ++++++++--------
 GPy/models/generalized_FITC.py |  4 ++--
 GPy/models/warped_GP.py        |  4 ++--
 6 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 6993d5c2..381d6232 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -6,5 +6,6 @@ import kern
 import models
 import inference
 import util
+import examples
 #import examples TODO: discuss!
 from core import priors
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 063eaf7d..77e66600 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -80,19 +80,22 @@ class model(parameterised):
         for w in which:
             self.priors[w] = what
 
-    def get(self,name):
+    def get(self,name, return_names=False):
         """
-        get a model parameter by name
+        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
         """
         matches = self.grep_param_names(name)
         if len(matches):
-            return self._get_params()[matches]
+            if return_names:
+                return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
+            else:
+                return self._get_params()[matches]
         else:
             raise AttributeError, "no parameter matches %s"%name
 
     def set(self,name,val):
         """
-        Set a model parameter by name
+        Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
         """
         matches = self.grep_param_names(name)
         if len(matches):
@@ -102,6 +105,20 @@ class model(parameterised):
         else:
             raise AttributeError, "no parameter matches %s"%name
 
+    def get_gradient(self,name, return_names=False):
+        """
+        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            if return_names:
+                return self._log_likelihood_gradients()[matches],  np.asarray(self._get_param_names())[matches].tolist()
+            else:
+                return self._log_likelihood_gradients()[matches]
+        else:
+            raise AttributeError, "no parameter matches %s"%name
+
+
 
 
     def log_prior(self):
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 79763504..9444e899 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -17,10 +17,8 @@ def toy_rbf_1d():
     # create simple GP model
     m = GPy.models.GP_regression(data['X'],data['Y'])
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize
+    m.ensure_default_constraints()
     m.optimize()
 
     # plot
@@ -35,10 +33,8 @@ def rogers_girolami_olympics():
     # create simple GP model
     m = GPy.models.GP_regression(data['X'],data['Y'])
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize
+    m.ensure_default_constraints()
     m.optimize()
 
     # plot
@@ -57,10 +53,8 @@ def toy_rbf_1d_50():
     # create simple GP model
     m = GPy.models.GP_regression(data['X'],data['Y'])
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize
+    m.ensure_default_constraints()
     m.optimize()
 
     # plot
@@ -75,10 +69,8 @@ def silhouette():
     # create simple GP model
     m = GPy.models.GP_regression(data['X'],data['Y'])
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize
+    m.ensure_default_constraints()
     m.optimize()
 
     print(m)
@@ -118,20 +110,15 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
         kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
         
         m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
-        params = m._get_params()
-        optim_point_x[0] = params[1]
-        optim_point_y[0] = np.log10(params[0]) - np.log10(params[2]);
-        
-        # contrain all parameters to be positive
-        m.constrain_positive('')
+        optim_point_x[0] = m.get('rbf_lengthscale')
+        optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
         
         # optimize
+        m.ensure_default_constraints()
         m.optimize(xtol=1e-6,ftol=1e-6)
 
-        params = m._get_params()
-        optim_point_x[1] = params[1]
-        optim_point_y[1] = np.log10(params[0]) - np.log10(params[2]);
-        print(m)
+        optim_point_x[1] = m.get('rbf_lengthscale')
+        optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
         
         pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
         models.append(m)
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index eee0fb58..72a24307 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -63,10 +63,10 @@ class GP_regression(model):
             self._Ystd = np.ones((1,self.Y.shape[1]))
 
         if self.D > self.N:
-            # then it's more efficient to store Youter
-            self.Youter = np.dot(self.Y, self.Y.T)
+            # then it's more efficient to store YYT
+            self.YYT = np.dot(self.Y, self.Y.T)
         else:
-            self.Youter = None
+            self.YYT = None
 
         model.__init__(self)
 
@@ -83,23 +83,23 @@ class GP_regression(model):
 
     def _model_fit_term(self):
         """
-        Computes the model fit using Youter if it's available
+        Computes the model fit using YYT if it's available
         """
-        if self.Youter is None:
+        if self.YYT is None:
             return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
         else:
-            return -0.5*np.sum(np.multiply(self.Ki, self.Youter))
+            return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
 
     def log_likelihood(self):
         complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
         return complexity_term + self._model_fit_term()
 
     def dL_dK(self):
-        if self.Youter is None:
+        if self.YYT is None:
             alpha = np.dot(self.Ki,self.Y)
             dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
         else:
-            dL_dK = 0.5*(mdot(self.Ki, self.Youter, self.Ki) - self.D*self.Ki)
+            dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
 
         return dL_dK
 
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index d8e9c23d..a5ed8d0a 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -91,9 +91,9 @@ class generalized_FITC(model):
 
     def log_likelihood(self):
         self.posterior_param()
-        self.Youter = np.dot(self.mu_tilde,self.mu_tilde.T)
+        self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
         A = -self.hld
-        B = -.5*np.sum(self.Qi*self.Youter)
+        B = -.5*np.sum(self.Qi*self.YYT)
         C = sum(np.log(self.ep_approx.Z_hat))
         D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
         E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index ff6267d2..8ce80c76 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -48,9 +48,9 @@ class warpedGP(GP_regression):
 
         # this supports the 'smart' behaviour in GP_regression
         if self.D > self.N:
-            self.Youter = np.dot(self.Y, self.Y.T)
+            self.YYT = np.dot(self.Y, self.Y.T)
         else:
-            self.Youter = None
+            self.YYT = None
 
         return self.Y
 

From 0c30048e572568bfe6bace4f73fcd95c8796658d Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 15:34:06 +0000
Subject: [PATCH 16/61] More ARD flags in exponential and Matern32

---
 GPy/kern/Matern32.py    | 60 ++++++++++++++++++++++++++---------------
 GPy/kern/exponential.py |  5 ++--
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index 6517ac2c..1270e3f9 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -20,43 +20,52 @@ class Matern32(kernpart):
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
-    :param lengthscale: the lengthscales :math:`\ell_i`
+    :param lengthscale: the lengthscale :math:`\ell_i`
     :type lengthscale: np.ndarray of size (D,)
     :rtype: kernel object
 
     """
 
-    def __init__(self,D,variance=1.,lengthscales=None):
+    def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
         self.D = D
-        if lengthscales is not None:
-            assert lengthscales.shape==(self.D,)
+        self.ARD = ARD
+        if ARD == False:
+            self.Nparam = 2
+            self.name = 'Mat32'
+            if lengthscale is not None:
+                assert lengthscale.shape == (1,)
+            else:
+                lengthscale = np.ones(1)
         else:
-            lengthscales = np.ones(self.D)
-        self.Nparam = self.D + 1
-        self.name = 'Mat32'
-        self._set_params(np.hstack((variance,lengthscales)))
+            self.Nparam = self.D + 1
+            self.name = 'Mat32_ARD'
+            if lengthscale is not None:
+                assert lengthscale.shape == (self.D,)
+            else:
+                lengthscale = np.ones(self.D)
+        self._set_params(np.hstack((variance,lengthscale)))
 
     def _get_params(self):
         """return the value of the parameters."""
-        return np.hstack((self.variance,self.lengthscales))
+        return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
         """set the value of the parameters."""
-        assert x.size==(self.D+1)
+        assert x.size == self.Nparam
         self.variance = x[0]
-        self.lengthscales = x[1:]
+        self.lengthscale = x[1:]
 
     def _get_param_names(self):
         """return parameter names."""
-        if self.D==1:
+        if self.Nparam == 2:
             return ['variance','lengthscale']
         else:
-            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
 
     def Kdiag(self,X,target):
@@ -66,13 +75,20 @@ class Matern32(kernpart):
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
         invdist = 1./np.where(dist!=0.,dist,np.inf)
-        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
-        dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
+        #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
         target[0] += np.sum(dvar*partial)
-        target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        if self.ARD == True:
+            dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+            #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
+            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        else:
+            dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
+            #dl = self.variance*dvar*dist2M.sum(-1)*invdist
+            target[1] += np.sum(dl*partial)
 
     def dKdiag_dtheta(self,partial,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@@ -81,8 +97,8 @@ class Matern32(kernpart):
     def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
-        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
         target += np.sum(dK_dX*partial.T[:,:,None],0)
 
@@ -104,7 +120,7 @@ class Matern32(kernpart):
         """
         assert self.D == 1
         def L(x,i):
-            return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x))
+            return(3./self.lengthscale**2*F[i](x) + 2*np.sqrt(3)/self.lengthscale*F1[i](x) + F2[i](x))
         n = F.shape[0]
         G = np.zeros((n,n))
         for i in range(n):
@@ -114,5 +130,5 @@ class Matern32(kernpart):
         F1lower = np.array([f(lower) for f in F1])[:,None]
         #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
         #return(G)
-        return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
+        return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
 
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 21bb8398..0ea1e922 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -49,13 +49,13 @@ class exponential(kernpart):
 
     def _set_params(self,x):
         """set the value of the parameters."""
-        assert x.size==(self.D+1)
+        assert x.size == self.Nparam
         self.variance = x[0]
         self.lengthscale = x[1:]
 
     def _get_param_names(self):
         """return parameter names."""
-        if self.Nparam==2:
+        if self.Nparam == 2:
             return ['variance','lengthscale']
         else:
             return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
@@ -84,7 +84,6 @@ class exponential(kernpart):
         else:
             dl = self.variance*dvar*dist2M.sum(-1)*invdist
             target[1] += np.sum(dl*partial)
-            #foo
 
     def dKdiag_dtheta(self,partial,X,target): 
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""

From 121563cbf693f77267a77bc3f1a1274b1a5a80dc Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:00:10 +0000
Subject: [PATCH 17/61] Trying to get ReadTheDocs to recompile for us

---
 doc/conf.py   | 66 ++++++++++++++++++++++++++++++++++++++++++++-------
 doc/index.rst |  9 +++++--
 2 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 7e1ec813..80cd5602 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # GPy documentation build configuration file, created by
-# sphinx-quickstart on Wed Jan  9 15:21:20 2013.
+# sphinx-quickstart on Fri Jan 18 15:30:28 2013.
 #
 # This file is execfile()d with the current directory set to its containing dir.
 #
@@ -25,7 +25,7 @@ import sys, os
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.pngmath', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -41,16 +41,16 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'GPy'
-copyright = u'2013, The GPy authors'
+copyright = u'2013, Author'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.00001'
+version = ''
 # The full version, including alpha/beta/rc tags.
-release = '0.00001'
+release = ''
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -184,7 +184,7 @@ latex_elements = {
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
   ('index', 'GPy.tex', u'GPy Documentation',
-   u'The GPy authors', 'manual'),
+   u'Author', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -214,7 +214,7 @@ latex_documents = [
 # (source start file, name, description, authors, manual section).
 man_pages = [
     ('index', 'gpy', u'GPy Documentation',
-     [u'The GPy authors'], 1)
+     [u'Author'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -228,7 +228,7 @@ man_pages = [
 #  dir menu entry, description, category)
 texinfo_documents = [
   ('index', 'GPy', u'GPy Documentation',
-   u'The GPy authors', 'GPy', 'One line description of project.',
+   u'Author', 'GPy', 'One line description of project.',
    'Miscellaneous'),
 ]
 
@@ -240,3 +240,53 @@ texinfo_documents = [
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
 #texinfo_show_urls = 'footnote'
+
+
+# -- Options for Epub output ---------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = u'GPy'
+epub_author = u'Author'
+epub_publisher = u'Author'
+epub_copyright = u'2013, Author'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+#epub_exclude_files = []
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# ----------------------- READTHEDOCS ------------------
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+
+if on_rtd:
+  os.system("sphinx-apidoc -f -o . ../GPy")
+#sphinx-apidoc . -o doc -H 'GPy' -f
diff --git a/doc/index.rst b/doc/index.rst
index 46327bb7..34d3e59c 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,16 +1,21 @@
 .. GPy documentation master file, created by
-   sphinx-quickstart on Wed Jan  9 15:21:20 2013.
+   sphinx-quickstart on Fri Jan 18 15:30:28 2013.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
 Welcome to GPy's documentation!
 ===============================
 
+Now what if I add this other hting
+Just checking another thing
+
 Contents:
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 4
 
+   GPy
+   setup
 
 
 Indices and tables

From 69743be33e207ea145d63231f96b1e6d156930cd Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 16:03:20 +0000
Subject: [PATCH 18/61] last ARD flag changes to kernels

---
 GPy/__init__.py          |  2 +-
 GPy/kern/Matern32.py     |  6 ++--
 GPy/kern/Matern52.py     | 67 +++++++++++++++++++++++++---------------
 GPy/kern/constructors.py | 47 +++++++++++++++++-----------
 GPy/kern/exponential.py  |  6 ++--
 GPy/kern/rbf.py          |  2 +-
 6 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 381d6232..0afa92e4 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -6,6 +6,6 @@ import kern
 import models
 import inference
 import util
-import examples
+#import examples
 #import examples TODO: discuss!
 from core import priors
diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index 1270e3f9..cfad17c9 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -20,8 +20,10 @@ class Matern32(kernpart):
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
-    :param lengthscale: the lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (D,)
+    :param lengthscale: the vector of lengthscale :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
+    :type ARD: Boolean
     :rtype: kernel object
 
     """
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index 056da32d..84c71089 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -19,42 +19,53 @@ class Matern52(kernpart):
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
-    :param lengthscale: the lengthscales :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (D,)
+    :param lengthscale: the vector of lengthscale :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
+    :type ARD: Boolean
     :rtype: kernel object
 
     """
-    def __init__(self,D,variance=1.,lengthscales=None):
+    def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
         self.D = D
-        if lengthscales is not None:
-            assert lengthscales.shape==(self.D,)
+        self.ARD = ARD
+        if ARD == False:
+            self.Nparam = 2
+            self.name = 'Mat32'
+            if lengthscale is not None:
+                assert lengthscale.shape == (1,)
+            else:
+                lengthscale = np.ones(1)
         else:
-            lengthscales = np.ones(self.D)
-        self.Nparam = self.D + 1
-        self.name = 'Mat52'
-        self._set_params(np.hstack((variance,lengthscales)))
+            self.Nparam = self.D + 1
+            self.name = 'Mat32_ARD'
+            if lengthscale is not None:
+                assert lengthscale.shape == (self.D,)
+            else:
+                lengthscale = np.ones(self.D)
+        self._set_params(np.hstack((variance,lengthscale)))
 
     def _get_params(self):
         """return the value of the parameters."""
-        return np.hstack((self.variance,self.lengthscales))
+        return np.hstack((self.variance,self.lengthscale))
 
     def _set_params(self,x):
         """set the value of the parameters."""
-        assert x.size==(self.D+1)
+        assert x.size == self.Nparam
         self.variance = x[0]
-        self.lengthscales = x[1:]
+        self.lengthscale = x[1:]
 
     def _get_param_names(self):
         """return parameter names."""
-        if self.D==1:
+        if self.Nparam == 2:
             return ['variance','lengthscale']
         else:
-            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
 
     def Kdiag(self,X,target):
@@ -64,13 +75,19 @@ class Matern52(kernpart):
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
         invdist = 1./np.where(dist!=0.,dist,np.inf)
-        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
         dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
-        dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
         target[0] += np.sum(dvar*partial)
-        target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        if self.ARD:
+            dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+            #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+            target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
+        else:
+            dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
+            #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
+            target[1] += np.sum(dl*partial)
 
     def dKdiag_dtheta(self,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@@ -79,8 +96,8 @@ class Matern52(kernpart):
     def dK_dX(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         if X2 is None: X2 = X
-        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
-        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
         dK_dX = -  np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
         target += np.sum(dK_dX*partial.T[:,:,None],0)
 
@@ -104,18 +121,18 @@ class Matern52(kernpart):
         """
         assert self.D == 1
         def L(x,i):
-            return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x))
+            return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
         n = F.shape[0]
         G = np.zeros((n,n))
         for i in range(n):
             for j in range(i,n):
                 G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
-        G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5))
+        G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
         Flower = np.array([f(lower) for f in F])[:,None]
         F1lower = np.array([f(lower) for f in F1])[:,None]
         F2lower = np.array([f(lower) for f in F2])[:,None]
-        orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T)
-        orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
+        orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
+        orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
         return(1./self.variance* (G_coef*G + orig + orig2))
 
 
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 5f676d9b..883357aa 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -32,6 +32,8 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
     :type variance: float
     :param lengthscale: the lengthscale of the kernel
     :type lengthscale: float
+    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
+    :type ARD: Boolean
     """
     part = rbfpart(D,variance,lengthscale,ARD)
     return kern(D, [part])
@@ -74,13 +76,16 @@ def white(D,variance=1.):
 
 def exponential(D,variance=1., lengthscale=None, ARD=False):
     """
-     Construct a exponential kernel.
+    Construct an exponential kernel
 
-     Arguments
-     ---------
-     D (int), obligatory
-     variance (float)
-     lengthscales (np.ndarray)
+    :param D: dimensionality of the kernel, obligatory
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
+    :type ARD: Boolean
     """
     part = exponentialpart(D,variance, lengthscale, ARD)
     return kern(D, [part])
@@ -89,26 +94,32 @@ def Matern32(D,variance=1., lengthscale=None, ARD=False):
     """
      Construct a Matern 3/2 kernel.
 
-     Arguments
-     ---------
-     D (int), obligatory
-     variance (float)
-     lengthscales (np.ndarray)
+    :param D: dimensionality of the kernel, obligatory
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
+    :type ARD: Boolean
     """
     part = Matern32part(D,variance, lengthscale, ARD)
     return kern(D, [part])
 
-def Matern52(D,variance=1., lengthscales=None):
+def Matern52(D,variance=1., lengthscale=None, ARD=False):
     """
      Construct a Matern 5/2 kernel.
 
-     Arguments
-     ---------
-     D (int), obligatory
-     variance (float)
-     lengthscales (np.ndarray)
+    :param D: dimensionality of the kernel, obligatory
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
+    :type ARD: Boolean
     """
-    part = Matern52part(D,variance, lengthscales)
+    part = Matern52part(D,variance, lengthscale, ARD)
     return kern(D, [part])
 
 def bias(D,variance=1.):
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 0ea1e922..6c463a63 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -19,8 +19,10 @@ class exponential(kernpart):
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
-    :param lengthscale: the lengthscales :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (D,)
+    :param lengthscale: the vector of lengthscale :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
+    :type ARD: Boolean
     :rtype: kernel object
 
     """
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 13ecaf86..62a9c46d 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -21,7 +21,7 @@ class rbf(kernpart):
     :param variance: the variance of the kernel
     :type variance: float
     :param lengthscale: the vector of lengthscale of the kernel
-    :type lengthscale: np.ndarray
+    :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
 

From f3617eb07184f1926e0bb756ab92014582ee385e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:05:50 +0000
Subject: [PATCH 19/61] Testing docs more

---
 doc/conf.py   | 2 +-
 doc/index.rst | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 80cd5602..268f4d0a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -288,5 +288,5 @@ epub_copyright = u'2013, Author'
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
 if on_rtd:
-  os.system("sphinx-apidoc -f -o . ../GPy")
+  os.system("sphinx-apidoc -f -o docs .")
 #sphinx-apidoc . -o doc -H 'GPy' -f
diff --git a/doc/index.rst b/doc/index.rst
index 34d3e59c..f7ce7e1f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -6,6 +6,7 @@
 Welcome to GPy's documentation!
 ===============================
 
+And another thing
 Now what if I add this other hting
 Just checking another thing
 

From 49530b07ee58cd095db53a93df723594172800ba Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:06:42 +0000
Subject: [PATCH 20/61] Adding fake kernel to test docs

---
 GPy/kern/rbf-testing.py | 178 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)
 create mode 100644 GPy/kern/rbf-testing.py

diff --git a/GPy/kern/rbf-testing.py b/GPy/kern/rbf-testing.py
new file mode 100644
index 00000000..785e8fb5
--- /dev/null
+++ b/GPy/kern/rbf-testing.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import kernpart
+import numpy as np
+import hashlib
+
+class rbf_testing(kernpart):
+    """
+    Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel.
+
+    .. math::
+
+       k(r) = \sigma^2 \exp(- \frac{r^2}{2\ell}) \qquad \qquad \\text{ where  } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
+
+    where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+
+    .. Note: for rbf_testing with different lengthscale on each dimension, see rbf_testing_ARD
+    """
+
+    def __init__(self,D,variance=1.,lengthscale=1.):
+        self.D = D
+        self.Nparam = 2
+        self.name = 'rbf_testing'
+        self._set_params(np.hstack((variance,lengthscale)))
+
+        #initialize cache
+        self._Z, self._mu, self._S = np.empty(shape=(3,1))
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
+
+    def _get_params(self):
+        return np.hstack((self.variance,self.lengthscale))
+
+    def _set_params(self,x):
+        self.variance, self.lengthscale = x
+        self.lengthscale2 = np.square(self.lengthscale)
+        #reset cached results
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
+        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
+
+    def _get_param_names(self):
+        return ['variance','lengthscale']
+
+    def K(self,X,X2,target):
+        if X2 is None:
+            X2 = X
+        self._K_computations(X,X2)
+        np.add(self.variance*self._K_dvar, target,target)
+
+    def Kdiag(self,X,target):
+        np.add(target,self.variance,target)
+
+    def dK_dtheta(self,partial,X,X2,target):
+        self._K_computations(X,X2)
+        target[0] += np.sum(self._K_dvar*partial)
+        target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
+
+    def dKdiag_dtheta(self,partial,X,target):
+        #NB: derivative of diagonal elements wrt lengthscale is 0
+        target[0] += np.sum(partial)
+
+    def dK_dX(self,partial,X,X2,target):
+        self._K_computations(X,X2)
+        _K_dist = X[:,None,:]-X2[None,:,:]
+        dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
+        target += np.sum(dK_dX*partial.T[:,:,None],0)
+
+    def dKdiag_dX(self,partial,X,target):
+        pass
+
+    def _K_computations(self,X,X2):
+        if not (np.all(X==self._X) and np.all(X2==self._X2)):
+            self._X = X
+            self._X2 = X2
+            if X2 is None: X2 = X
+            XXT = np.dot(X,X2.T)
+            if X is X2:
+                self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
+            else:
+                self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
+            # TODO Remove comments if this is fine.
+            # Commented out by Neil as doesn't seem to be used elsewhere.
+            #self._K_exponent = -0.5*self._K_dist2
+            self._K_dvar = np.exp(-0.5*self._K_dist2)
+
+    def psi0(self,Z,mu,S,target):
+        target += self.variance
+
+    def dpsi0_dtheta(self,partial,Z,mu,S,target):
+        target[0] += 1.
+
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        pass
+
+    def psi1(self,Z,mu,S,target):
+        self._psi_computations(Z,mu,S)
+        target += self._psi1
+
+    def dpsi1_dtheta(self,partial,Z,mu,S,target):
+        self._psi_computations(Z,mu,S)
+        denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
+        d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
+        target[0] += np.sum(partial*self._psi1/self.variance)
+        target[1] += np.sum(d_length*partial[:,:,None])
+
+    def dpsi1_dZ(self,partial,Z,mu,S,target):
+        self._psi_computations(Z,mu,S)
+        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
+
+    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
+        self._psi_computations(Z,mu,S)
+        tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
+        target_mu += np.sum(partial*tmp*self._psi1_dist,1)
+        target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
+
+    def psi2(self,Z,mu,S,target):
+        self._psi_computations(Z,mu,S)
+        target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
+
+    def dpsi2_dtheta(self,partial,Z,mu,S,target):
+        """Shape N,M,M,Ntheta"""
+        self._psi_computations(Z,mu,S)
+        d_var = np.sum(2.*self._psi2/self.variance,0)
+        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
+        d_length = d_length.sum(0)
+        target[0] += np.sum(partial*d_var)
+        target[1] += np.sum(d_length*partial)
+
+    def dpsi2_dZ(self,partial,Z,mu,S,target):
+        """Returns shape N,M,M,Q"""
+        self._psi_computations(Z,mu,S)
+        dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
+        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
+
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Think N,M,M,Q """
+        self._psi_computations(Z,mu,S)
+        tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
+        target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
+        target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
+
+    def _psi_computations(self,Z,mu,S):
+        #here are the "statistics" for psi1 and psi2
+        if not np.all(Z==self._Z):
+            #Z has changed, compute Z specific stuff
+            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
+            self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
+            self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscale2 # M,M,Q
+            self._Z = Z
+
+        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
+            #something's changed. recompute EVERYTHING
+
+            #TODO: make more efficient for large Q (using NDL's dot product trick)
+            #psi1
+            self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
+            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
+            self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
+            self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
+            self._psi1 = self.variance*np.exp(self._psi1_exponent)
+
+            #psi2
+            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
+            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
+            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
+            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
+            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
+
+            self._Z, self._mu, self._S = Z, mu,S
+

From ad41c63d08cc250436ff9cb4530f2e178d24e7cf Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:10:51 +0000
Subject: [PATCH 21/61] Adding old command I read on the internet for own build

---
 doc/conf.py   | 2 +-
 doc/index.rst | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 268f4d0a..80cd5602 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -288,5 +288,5 @@ epub_copyright = u'2013, Author'
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
 if on_rtd:
-  os.system("sphinx-apidoc -f -o docs .")
+  os.system("sphinx-apidoc -f -o . ../GPy")
 #sphinx-apidoc . -o doc -H 'GPy' -f
diff --git a/doc/index.rst b/doc/index.rst
index f7ce7e1f..fb721e15 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -6,6 +6,7 @@
 Welcome to GPy's documentation!
 ===============================
 
+Added old command again
 And another thing
 Now what if I add this other hting
 Just checking another thing

From ffd05027cbdc4213a670c337859cd783001278ba Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 16:14:13 +0000
Subject: [PATCH 22/61] More tests in unit_tests

---
 GPy/kern/rbf.py           |  1 +
 GPy/testing/unit_tests.py | 41 ++++++++++++++++++++++++++-------------
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 62a9c46d..1e9b4379 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -24,6 +24,7 @@ class rbf(kernpart):
     :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
+    :rtype: kernel object
 
     """
 
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index d2ef87f7..c5db80bd 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -42,51 +42,66 @@ class GradientTests(unittest.TestCase):
         # contrain all parameters to be positive
         self.assertTrue(m.checkgrad())
 
-    def test_gp_regression_rbf_white_kern_1d(self):
+    def test_gp_regression_rbf_1d(self):
         ''' Testing the GP regression with rbf kernel with white kernel on 1d data '''
         rbf = GPy.kern.rbf(1)
         self.check_model_with_white(rbf, model_type='GP_regression', dimension=1)
 
-    def test_GP_regression_rbf_ARD_white_kern_2D(self):
-        ''' Testing the GP regression with rbf and white kernel on 2d data '''
-        k = GPy.kern.rbf_ARD(2)
-        self.check_model_with_white(k, model_type='GP_regression', dimension=2)
-
-    def test_GP_regression_rbf_white_kern_2D(self):
+    def test_GP_regression_rbf_2D(self):
         ''' Testing the GP regression with rbf and white kernel on 2d data '''
         rbf = GPy.kern.rbf(2)
         self.check_model_with_white(rbf, model_type='GP_regression', dimension=2)
 
-    def test_GP_regression_matern52_kern_1D(self):
+    def test_GP_regression_rbf_ARD_2D(self):
+        ''' Testing the GP regression with rbf and white kernel on 2d data '''
+        k = GPy.kern.rbf(2,ARD=True)
+        self.check_model_with_white(k, model_type='GP_regression', dimension=2)
+
+    def test_GP_regression_matern52_1D(self):
         ''' Testing the GP regression with matern52 kernel on 1d data '''
         matern52 = GPy.kern.Matern52(1)
         self.check_model_with_white(matern52, model_type='GP_regression', dimension=1)
 
-    def test_GP_regression_matern52_kern_2D(self):
+    def test_GP_regression_matern52_2D(self):
         ''' Testing the GP regression with matern52 kernel on 2d data '''
         matern52 = GPy.kern.Matern52(2)
         self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
 
-    def test_GP_regression_matern32_kern_1D(self):
+    def test_GP_regression_matern52_ARD_2D(self):
+        ''' Testing the GP regression with matern52 kernel on 2d data '''
+        matern52 = GPy.kern.Matern52(2,ARD=True)
+        self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
+
+    def test_GP_regression_matern32_1D(self):
         ''' Testing the GP regression with matern32 kernel on 1d data '''
         matern32 = GPy.kern.Matern32(1)
         self.check_model_with_white(matern32, model_type='GP_regression', dimension=1)
 
-    def test_GP_regression_matern32_kern_2D(self):
+    def test_GP_regression_matern32_2D(self):
         ''' Testing the GP regression with matern32 kernel on 2d data '''
         matern32 = GPy.kern.Matern32(2)
         self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
 
-    def test_GP_regression_exponential_kern_1D(self):
+    def test_GP_regression_matern32_ARD_2D(self):
+        ''' Testing the GP regression with matern32 kernel on 2d data '''
+        matern32 = GPy.kern.Matern32(2,ARD=True)
+        self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
+
+    def test_GP_regression_exponential_1D(self):
         ''' Testing the GP regression with exponential kernel on 1d data '''
         exponential = GPy.kern.exponential(1)
         self.check_model_with_white(exponential, model_type='GP_regression', dimension=1)
 
-    def test_GP_regression_exponential_kern_2D(self):
+    def test_GP_regression_exponential_2D(self):
         ''' Testing the GP regression with exponential kernel on 2d data '''
         exponential = GPy.kern.exponential(2)
         self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
 
+    def test_GP_regression_exponential_ARD_2D(self):
+        ''' Testing the GP regression with exponential kernel on 2d data '''
+        exponential = GPy.kern.exponential(2,ARD=True)
+        self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
+
     def test_GP_regression_bias_kern_1D(self):
         ''' Testing the GP regression with bias kernel on 1d data '''
         bias = GPy.kern.bias(1)

From 8696de241ac86e527dae8168068cceafd41fb247 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:27:58 +0000
Subject: [PATCH 23/61] Tried changing the location of the apidoc compilation

---
 doc/conf.py  |   4 +-
 doc/make.bat | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+), 2 deletions(-)
 create mode 100644 doc/make.bat

diff --git a/doc/conf.py b/doc/conf.py
index 80cd5602..bd8420c4 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -288,5 +288,5 @@ epub_copyright = u'2013, Author'
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
 if on_rtd:
-  os.system("sphinx-apidoc -f -o . ../GPy")
-#sphinx-apidoc . -o doc -H 'GPy' -f
+  os.system("pwd")
+  os.system("sphinx-apidoc -f -o . ../")
diff --git a/doc/make.bat b/doc/make.bat
new file mode 100644
index 00000000..40a74901
--- /dev/null
+++ b/doc/make.bat
@@ -0,0 +1,190 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\GPy.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\GPy.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+:end

From e061e1a638a22f4d00b5c63e86fb3e2d71656d02 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:33:59 +0000
Subject: [PATCH 24/61] Tried fixing build call

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index bd8420c4..fa6f8c34 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -289,4 +289,4 @@ on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
 if on_rtd:
   os.system("pwd")
-  os.system("sphinx-apidoc -f -o . ../")
+  os.system("sphinx-apidoc -f -o . ./GPy")

From b64d6e4acfdca47b23d57741a4977b7b9e1b93d6 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:36:08 +0000
Subject: [PATCH 25/61] The below again

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index fa6f8c34..cf9f7ca7 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -289,4 +289,4 @@ on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
 if on_rtd:
   os.system("pwd")
-  os.system("sphinx-apidoc -f -o . ./GPy")
+  os.system("sphinx-apidoc -f -o . ../GPy")

From 3045ee72c8c6049e5adc90e43840a9685329e5f8 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Fri, 18 Jan 2013 16:38:41 +0000
Subject: [PATCH 26/61] Added unsupervised.py examples file and datasets.py@

---
 GPy/examples/unsupervised.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 GPy/examples/unsupervised.py

diff --git a/GPy/examples/unsupervised.py b/GPy/examples/unsupervised.py
new file mode 100644
index 00000000..08d81e05
--- /dev/null
+++ b/GPy/examples/unsupervised.py
@@ -0,0 +1,25 @@
+"""
+Usupervised learning with Gaussian Processes.
+"""
+import pylab as pb
+import numpy as np
+import GPy
+
+
+######################################
+## Oil data subsampled to 100 points.
+def oil_100():
+    data = GPy.util.datasets.oil_100()
+
+    # create simple GP model
+    m = GPy.models.GPLVM(data['X'], 2)
+
+
+    # optimize
+    m.ensure_default_constraints()
+    m.optimize()
+
+    # plot
+    print(m)
+    return m
+

From 08ecb2f97f40272a1598f8cc46553ab92c15d6e8 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 16:41:18 +0000
Subject: [PATCH 27/61] Added pylab to requirements

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ee44dde0..4934e072 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['numpy>=1.6', 'scipy','matplotlib>=1.1'],
+      install_requires=['pylab, numpy>=1.6', 'scipy','matplotlib>=1.1'],
       setup_requires=['sphinx'],
       cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[

From e4e05ea5e5082a73b8e018ee54867b612d0898a7 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 17:05:57 +0000
Subject: [PATCH 28/61] Trying to give pylab dependency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4934e072..b8b8b24b 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['pylab, numpy>=1.6', 'scipy','matplotlib>=1.1'],
+      install_requires=['pylab', 'numpy>=1.6', 'scipy','matplotlib>=1.1'],
       setup_requires=['sphinx'],
       cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[

From 6089a34d8d96123de29cde40b2ff65aafe3d179c Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 17:32:46 +0000
Subject: [PATCH 29/61] added sympy dependency and scipy version dependency
 again

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b8b8b24b..432b8b13 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['pylab', 'numpy>=1.6', 'scipy','matplotlib>=1.1'],
+      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
       setup_requires=['sphinx'],
       cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[

From a76c53e6996712295b4f98b5ce21564f7165a769 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 17:43:32 +0000
Subject: [PATCH 30/61] bug fixed in example (in regression.py)

---
 GPy/kern/rbf.py      | 14 ++++++--------
 GPy/util/datasets.py |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 1e9b4379..b1134e25 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -8,13 +8,13 @@ import hashlib
 
 class rbf(kernpart):
     """
-    Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel.
+    Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
 
     .. math::
 
-       k(r) = \sigma^2 \exp(- \frac{r^2}{2\ell}) \qquad \qquad \\text{ where  } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
+       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
 
-    where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
+    where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
 
     :param D: the number of input dimensions
     :type D: int
@@ -37,8 +37,7 @@ class rbf(kernpart):
             if lengthscale is not None:
                 assert lengthscale.shape == (1,)
             else:
-                lengthscale = np.ones(1)
-        
+                lengthscale = np.ones(1)     
         else:
             self.Nparam = self.D + 1
             self.name = 'rbf_ARD'
@@ -109,11 +108,10 @@ class rbf(kernpart):
             self._X2 = X2
             if X2 is None: X2 = X
             self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
-            self._params = np.empty(shape=(1,0))#ensure the next section gets called
+            self._params = np.empty(shape=(1,0))    #ensure the next section gets called
         if not np.all(self._params == self._get_params()):
             self._params == self._get_params()
-            self._K_dist2 = np.square(self._K_dist/self.lengthscale) 
-            #self._K_exponent = -0.5*self._K_dist2.sum(-1) #ND: commented out because seems not to be used
+            self._K_dist2 = np.square(self._K_dist/self.lengthscale)
             self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
 
     def psi0(self,Z,mu,S,target):
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index bc7bf546..071db5d6 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -90,7 +90,7 @@ def toy_rbf_1d(seed=default_seed):
     N = 500
     X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
     X.sort(axis=0)
-    rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=0.25)
+    rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
     white = GPy.kern.white(numIn, variance=1e-2)
     kernel = rbf + white
     K = kernel.K(X)

From 3cac7c5094a6fae14f6ad7968905e7a453acff7f Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 17:44:25 +0000
Subject: [PATCH 31/61] Updated index.rst

---
 GPy/kern/rbf-testing.py | 178 ----------------------------------------
 doc/index.rst           |   8 +-
 2 files changed, 1 insertion(+), 185 deletions(-)
 delete mode 100644 GPy/kern/rbf-testing.py

diff --git a/GPy/kern/rbf-testing.py b/GPy/kern/rbf-testing.py
deleted file mode 100644
index 785e8fb5..00000000
--- a/GPy/kern/rbf-testing.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-import hashlib
-
-class rbf_testing(kernpart):
-    """
-    Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel.
-
-    .. math::
-
-       k(r) = \sigma^2 \exp(- \frac{r^2}{2\ell}) \qquad \qquad \\text{ where  } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
-
-    where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
-
-    :param D: the number of input dimensions
-    :type D: int
-    :param variance: the variance of the kernel
-    :type variance: float
-    :param lengthscale: the lengthscale of the kernel
-    :type lengthscale: float
-
-    .. Note: for rbf_testing with different lengthscale on each dimension, see rbf_testing_ARD
-    """
-
-    def __init__(self,D,variance=1.,lengthscale=1.):
-        self.D = D
-        self.Nparam = 2
-        self.name = 'rbf_testing'
-        self._set_params(np.hstack((variance,lengthscale)))
-
-        #initialize cache
-        self._Z, self._mu, self._S = np.empty(shape=(3,1))
-        self._X, self._X2, self._params = np.empty(shape=(3,1))
-
-    def _get_params(self):
-        return np.hstack((self.variance,self.lengthscale))
-
-    def _set_params(self,x):
-        self.variance, self.lengthscale = x
-        self.lengthscale2 = np.square(self.lengthscale)
-        #reset cached results
-        self._X, self._X2, self._params = np.empty(shape=(3,1))
-        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
-
-    def _get_param_names(self):
-        return ['variance','lengthscale']
-
-    def K(self,X,X2,target):
-        if X2 is None:
-            X2 = X
-        self._K_computations(X,X2)
-        np.add(self.variance*self._K_dvar, target,target)
-
-    def Kdiag(self,X,target):
-        np.add(target,self.variance,target)
-
-    def dK_dtheta(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        target[0] += np.sum(self._K_dvar*partial)
-        target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
-
-    def dKdiag_dtheta(self,partial,X,target):
-        #NB: derivative of diagonal elements wrt lengthscale is 0
-        target[0] += np.sum(partial)
-
-    def dK_dX(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        _K_dist = X[:,None,:]-X2[None,:,:]
-        dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
-
-    def dKdiag_dX(self,partial,X,target):
-        pass
-
-    def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2)):
-            self._X = X
-            self._X2 = X2
-            if X2 is None: X2 = X
-            XXT = np.dot(X,X2.T)
-            if X is X2:
-                self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
-            else:
-                self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
-            # TODO Remove comments if this is fine.
-            # Commented out by Neil as doesn't seem to be used elsewhere.
-            #self._K_exponent = -0.5*self._K_dist2
-            self._K_dvar = np.exp(-0.5*self._K_dist2)
-
-    def psi0(self,Z,mu,S,target):
-        target += self.variance
-
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += 1.
-
-    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-        pass
-
-    def psi1(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += self._psi1
-
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
-        d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
-        target[0] += np.sum(partial*self._psi1/self.variance)
-        target[1] += np.sum(d_length*partial[:,:,None])
-
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
-
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
-        target_mu += np.sum(partial*tmp*self._psi1_dist,1)
-        target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
-
-    def psi2(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
-
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
-        """Shape N,M,M,Ntheta"""
-        self._psi_computations(Z,mu,S)
-        d_var = np.sum(2.*self._psi2/self.variance,0)
-        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
-        d_length = d_length.sum(0)
-        target[0] += np.sum(partial*d_var)
-        target[1] += np.sum(d_length*partial)
-
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
-        """Returns shape N,M,M,Q"""
-        self._psi_computations(Z,mu,S)
-        dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
-        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
-
-    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
-        """Think N,M,M,Q """
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
-        target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
-
-    def _psi_computations(self,Z,mu,S):
-        #here are the "statistics" for psi1 and psi2
-        if not np.all(Z==self._Z):
-            #Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
-            self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscale2 # M,M,Q
-            self._Z = Z
-
-        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
-            #something's changed. recompute EVERYTHING
-
-            #TODO: make more efficient for large Q (using NDL's dot product trick)
-            #psi1
-            self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
-            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
-            self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
-            self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
-            self._psi1 = self.variance*np.exp(self._psi1_exponent)
-
-            #psi2
-            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
-            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
-            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
-            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
-            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
-
-            self._Z, self._mu, self._S = Z, mu,S
-
diff --git a/doc/index.rst b/doc/index.rst
index fb721e15..818a71db 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,23 +1,17 @@
 .. GPy documentation master file, created by
-   sphinx-quickstart on Fri Jan 18 15:30:28 2013.
+   sphinx-quickstart on Fri Jan 18 17:36:01 2013.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
 Welcome to GPy's documentation!
 ===============================
 
-Added old command again
-And another thing
-Now what if I add this other hting
-Just checking another thing
-
 Contents:
 
 .. toctree::
    :maxdepth: 4
 
    GPy
-   setup
 
 
 Indices and tables

From 6d98a230771a210a1144d1b7f6732655a8e1bb6a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 18 Jan 2013 17:38:50 +0000
Subject: [PATCH 32/61] fixed optimize_restarts

---
 GPy/core/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 46cf6ac9..f21db4be 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -181,7 +181,7 @@ class model(parameterised):
                 else:
                     raise e
         if len(self.optimization_runs):
-            i = np.argmax([o.f_opt for o in self.optimization_runs])
+            i = np.argmin([o.f_opt for o in self.optimization_runs])
             self.expand_param(self.optimization_runs[i].x_opt)
         else:
             self.expand_param(initial_parameters)

From fddbaeac77c55068f62593ea18ee0ef7ced87d18 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 17:55:39 +0000
Subject: [PATCH 33/61] delete unnecessary rbf_ARD.py

---
 GPy/kern/rbf_ARD.py | 251 --------------------------------------------
 1 file changed, 251 deletions(-)
 delete mode 100644 GPy/kern/rbf_ARD.py

diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py
deleted file mode 100644
index 12924b3f..00000000
--- a/GPy/kern/rbf_ARD.py
+++ /dev/null
@@ -1,251 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-import hashlib
-
-class rbf_ARD(kernpart):
-    def __init__(self,D,variance=1.,lengthscales=None):
-        """
-        Arguments
-        ----------
-        D: int - the number of input dimensions
-        variance: float
-        lengthscales : np.ndarray of shape (D,)
-        """
-        self.D = D
-        if lengthscales is not None:
-            assert lengthscales.shape==(self.D,)
-        else:
-            lengthscales = np.ones(self.D)
-        self.Nparam = self.D + 1
-        self.name = 'rbf_ARD'
-        self._set_params(np.hstack((variance,lengthscales)))
-
-        #initialize cache
-        self._Z, self._mu, self._S = np.empty(shape=(3,1))
-        self._X, self._X2, self._params = np.empty(shape=(3,1))
-
-    def _get_params(self):
-        return np.hstack((self.variance,self.lengthscales))
-
-    def _set_params(self,x):
-        assert x.size==(self.D+1)
-        self.variance = x[0]
-        self.lengthscales = x[1:]
-        self.lengthscales2 = np.square(self.lengthscales)
-        #reset cached results
-        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
-
-    def _get_param_names(self):
-        if self.D==1:
-            return ['variance','lengthscale']
-        else:
-            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
-
-    def K(self,X,X2,target):
-        self._K_computations(X,X2)
-        np.add(self.variance*self._K_dvar, target,target)
-
-    def Kdiag(self,X,target):
-        np.add(target,self.variance,target)
-
-    def dK_dtheta(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
-        target[0] += np.sum(self._K_dvar*partial)
-        target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
-
-    def dKdiag_dtheta(self,X,target):
-        target[0] += np.sum(partial)
-
-    def dK_dX(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
-        dK_dX = -dZ.transpose(1,0,2)
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
-
-    def dKdiag_dX(self,partial,X,target):
-        pass
-
-    def psi0(self,Z,mu,S,target):
-        target += self.variance
-
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += 1.
-
-    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-        pass
-
-    def psi1(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        np.add(target, self._psi1,target)
-
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
-        d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
-        target[0] += np.sum(partial*self._psi1/self.variance)
-        target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
-
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
-        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0)
-
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
-        """return shapes are N,M,Q"""
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
-        target_mu += np.sum(partial*tmp*self._psi1_dist,1)
-        target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
-
-    def psi2(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
-
-    def dpsi2_dtheta(self,Z,mu,S,target):
-        """Shape N,M,M,Ntheta"""
-        self._psi_computations(Z,mu,S)
-        d_var = np.sum(2.*self._psi2/self.variance,0)
-        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
-        d_length = d_length.sum(0)
-        target[0] += np.sum(partial*d_var)
-        target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
-
-    def dpsi2_dZ(self,Z,mu,S,target):
-        """Returns shape N,M,M,Q"""
-        self._psi_computations(Z,mu,S)
-        dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
-        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
-
-    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
-        """Think N,M,M,Q """
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
-        target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
-        target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
-
-    def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2)):
-            self._X = X
-            self._X2 = X2
-            if X2 is None: X2 = X
-            self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
-            self._params = np.empty(shape=(1,0))#ensure the next section gets called
-        if not np.all(self._params == self._get_params()):
-            self._params == self._get_params()
-            self._K_dist2 = np.square(self._K_dist/self.lengthscales)
-            self._K_exponent = -0.5*self._K_dist2.sum(-1)
-            self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
-
-    def _psi_computations(self,Z,mu,S):
-        #here are the "statistics" for psi1 and psi2
-        if not np.all(Z==self._Z):
-            #Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
-            self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
-            self._Z = Z
-
-        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
-            #something's changed. recompute EVERYTHING
-
-            #psi1
-            self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
-            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
-            self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
-            self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
-            self._psi1 = self.variance*np.exp(self._psi1_exponent)
-
-            #psi2
-            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
-            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
-            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
-            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
-            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
-
-            self._Z, self._mu, self._S = Z, mu,S
-
-
-if __name__=='__main__':
-    #run some simple tests on the kernel (TODO:move these to unititest)
-    #TODO: these are broken in this new structure!
-    N = 10
-    M = 5
-    Q = 3
-
-    Z = np.random.randn(M,Q)
-    mu = np.random.randn(N,Q)
-    S = np.random.rand(N,Q)
-
-    var = 2.5
-    lengthscales = np.ones(Q)*0.7
-
-    k = rbf(Q,var,lengthscales)
-
-    from checkgrad import checkgrad
-
-    def k_theta_test(param,k):
-        k._set_params(param)
-        K = k.K(Z)
-        dK_dtheta = k.dK_dtheta(Z)
-        f = np.sum(K)
-        df = dK_dtheta.sum(0).sum(0)
-        return f,np.array(df)
-    print "dk_dtheta_test"
-    checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
-
-
-    def psi1_mu_test(mu,k):
-        mu = mu.reshape(N,Q)
-        f = np.sum(k.psi1(Z,mu,S))
-        df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
-        return f,df.flatten()
-    print "psi1_mu_test"
-    checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
-
-    def psi1_S_test(S,k):
-        S = S.reshape(N,Q)
-        f = np.sum(k.psi1(Z,mu,S))
-        df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
-        return f,df.flatten()
-    print "psi1_S_test"
-    checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
-
-    def psi1_theta_test(theta,k):
-        k._set_params(theta)
-        f = np.sum(k.psi1(Z,mu,S))
-        df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
-        return f,df
-    print "psi1_theta_test"
-    checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
-
-
-    def psi2_mu_test(mu,k):
-        mu = mu.reshape(N,Q)
-        f = np.sum(k.psi2(Z,mu,S))
-        df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
-        return f,df.flatten()
-    print "psi2_mu_test"
-    checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
-
-    def psi2_S_test(S,k):
-        S = S.reshape(N,Q)
-        f = np.sum(k.psi2(Z,mu,S))
-        df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
-        return f,df.flatten()
-    print "psi2_S_test"
-    checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
-
-    def psi2_theta_test(theta,k):
-        k._set_params(theta)
-        f = np.sum(k.psi2(Z,mu,S))
-        df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
-        return f,df
-    print "psi2_theta_test"
-    checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
-
-

From a908664996713ff17b6f7014bc6043553f24ea73 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Jan 2013 18:10:03 +0000
Subject: [PATCH 34/61] Added path for RTD

---
 doc/conf.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index cf9f7ca7..474836a2 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -27,6 +27,14 @@ import sys, os
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
 
+# ----------------------- READTHEDOCS ------------------
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+
+if on_rtd:
+  sys.path.append("../GPy")
+  os.system("pwd")
+  os.system("sphinx-apidoc -f -o . ../GPy")
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
@@ -284,9 +292,3 @@ epub_copyright = u'2013, Author'
 # Allow duplicate toc entries.
 #epub_tocdup = True
 
-# ----------------------- READTHEDOCS ------------------
-on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
-
-if on_rtd:
-  os.system("pwd")
-  os.system("sphinx-apidoc -f -o . ../GPy")

From e8383c2d71ddfa91f52f805d266e251868a57f23 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Fri, 18 Jan 2013 18:15:36 +0000
Subject: [PATCH 35/61] restored examples folder

---
 GPy/__init__.py          | 2 +-
 GPy/kern/constructors.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 0afa92e4..381d6232 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -6,6 +6,6 @@ import kern
 import models
 import inference
 import util
-#import examples
+import examples
 #import examples TODO: discuss!
 from core import priors
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 883357aa..ab39fcb2 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -6,7 +6,6 @@ import numpy as np
 from kern import kern
 
 from rbf import rbf as rbfpart
-from rbf_ARD import rbf_ARD as rbf_ARD_part
 from white import white as whitepart
 from linear import linear as linearpart
 from linear_ARD import linear_ARD as linear_ARD_part

From eedf1f99d1f94967a82efb559992ff0d265aeb4c Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 21 Jan 2013 12:16:55 +0000
Subject: [PATCH 36/61] added some documentation and example files

---
 GPy/examples/__init__.py |   0
 doc/GPy.core.rst         |  35 ++++++++++
 doc/GPy.inference.rst    |  35 ++++++++++
 doc/GPy.kern.rst         | 139 +++++++++++++++++++++++++++++++++++++++
 doc/GPy.models.rst       |  75 +++++++++++++++++++++
 doc/GPy.rst              |  22 +++++++
 doc/GPy.util.rst         |  67 +++++++++++++++++++
 doc/modules.rst          |   7 ++
 8 files changed, 380 insertions(+)
 create mode 100644 GPy/examples/__init__.py
 create mode 100644 doc/GPy.core.rst
 create mode 100644 doc/GPy.inference.rst
 create mode 100644 doc/GPy.kern.rst
 create mode 100644 doc/GPy.models.rst
 create mode 100644 doc/GPy.rst
 create mode 100644 doc/GPy.util.rst
 create mode 100644 doc/modules.rst

diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/doc/GPy.core.rst b/doc/GPy.core.rst
new file mode 100644
index 00000000..e02aaa2a
--- /dev/null
+++ b/doc/GPy.core.rst
@@ -0,0 +1,35 @@
+core Package
+============
+
+:mod:`core` Package
+-------------------
+
+.. automodule:: GPy.core
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`model` Module
+-------------------
+
+.. automodule:: GPy.core.model
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`parameterised` Module
+---------------------------
+
+.. automodule:: GPy.core.parameterised
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`priors` Module
+--------------------
+
+.. automodule:: GPy.core.priors
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
new file mode 100644
index 00000000..6f4ab691
--- /dev/null
+++ b/doc/GPy.inference.rst
@@ -0,0 +1,35 @@
+inference Package
+=================
+
+:mod:`Expectation_Propagation` Module
+-------------------------------------
+
+.. automodule:: GPy.inference.Expectation_Propagation
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`likelihoods` Module
+-------------------------
+
+.. automodule:: GPy.inference.likelihoods
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`optimization` Module
+--------------------------
+
+.. automodule:: GPy.inference.optimization
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`samplers` Module
+----------------------
+
+.. automodule:: GPy.inference.samplers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
new file mode 100644
index 00000000..95943178
--- /dev/null
+++ b/doc/GPy.kern.rst
@@ -0,0 +1,139 @@
+kern Package
+============
+
+:mod:`kern` Package
+-------------------
+
+.. automodule:: GPy.kern
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`Brownian` Module
+----------------------
+
+.. automodule:: GPy.kern.Brownian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`Matern32` Module
+----------------------
+
+.. automodule:: GPy.kern.Matern32
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`Matern52` Module
+----------------------
+
+.. automodule:: GPy.kern.Matern52
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`bias` Module
+------------------
+
+.. automodule:: GPy.kern.bias
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`constructors` Module
+--------------------------
+
+.. automodule:: GPy.kern.constructors
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`exponential` Module
+-------------------------
+
+.. automodule:: GPy.kern.exponential
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`finite_dimensional` Module
+--------------------------------
+
+.. automodule:: GPy.kern.finite_dimensional
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`kern` Module
+------------------
+
+.. automodule:: GPy.kern.kern
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`kernpart` Module
+----------------------
+
+.. automodule:: GPy.kern.kernpart
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`linear` Module
+--------------------
+
+.. automodule:: GPy.kern.linear
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`linear_ARD` Module
+------------------------
+
+.. automodule:: GPy.kern.linear_ARD
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`rbf-testing` Module
+-------------------------
+
+.. automodule:: GPy.kern.rbf-testing
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`rbf` Module
+-----------------
+
+.. automodule:: GPy.kern.rbf
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`spline` Module
+--------------------
+
+.. automodule:: GPy.kern.spline
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sympykern` Module
+-----------------------
+
+.. automodule:: GPy.kern.sympykern
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`white` Module
+-------------------
+
+.. automodule:: GPy.kern.white
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
new file mode 100644
index 00000000..47af78ab
--- /dev/null
+++ b/doc/GPy.models.rst
@@ -0,0 +1,75 @@
+models Package
+==============
+
+:mod:`models` Package
+---------------------
+
+.. automodule:: GPy.models
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`GPLVM` Module
+-------------------
+
+.. automodule:: GPy.models.GPLVM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`GP_EP` Module
+-------------------
+
+.. automodule:: GPy.models.GP_EP
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`GP_regression` Module
+---------------------------
+
+.. automodule:: GPy.models.GP_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`generalized_FITC` Module
+------------------------------
+
+.. automodule:: GPy.models.generalized_FITC
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sparse_GPLVM` Module
+--------------------------
+
+.. automodule:: GPy.models.sparse_GPLVM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sparse_GP_regression` Module
+----------------------------------
+
+.. automodule:: GPy.models.sparse_GP_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`uncollapsed_sparse_GP` Module
+-----------------------------------
+
+.. automodule:: GPy.models.uncollapsed_sparse_GP
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`warped_GP` Module
+-----------------------
+
+.. automodule:: GPy.models.warped_GP
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.rst b/doc/GPy.rst
new file mode 100644
index 00000000..61a4242c
--- /dev/null
+++ b/doc/GPy.rst
@@ -0,0 +1,22 @@
+GPy Package
+===========
+
+:mod:`GPy` Package
+------------------
+
+.. automodule:: GPy.__init__
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.core
+    GPy.inference
+    GPy.kern
+    GPy.models
+    GPy.util
+
diff --git a/doc/GPy.util.rst b/doc/GPy.util.rst
new file mode 100644
index 00000000..5bec990b
--- /dev/null
+++ b/doc/GPy.util.rst
@@ -0,0 +1,67 @@
+util Package
+============
+
+:mod:`util` Package
+-------------------
+
+.. automodule:: GPy.util
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`Tango` Module
+-------------------
+
+.. automodule:: GPy.util.Tango
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`datasets` Module
+----------------------
+
+.. automodule:: GPy.util.datasets
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`linalg` Module
+--------------------
+
+.. automodule:: GPy.util.linalg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`misc` Module
+------------------
+
+.. automodule:: GPy.util.misc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`plot` Module
+------------------
+
+.. automodule:: GPy.util.plot
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`squashers` Module
+-----------------------
+
+.. automodule:: GPy.util.squashers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`warping_functions` Module
+-------------------------------
+
+.. automodule:: GPy.util.warping_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/modules.rst b/doc/modules.rst
new file mode 100644
index 00000000..9c698ca2
--- /dev/null
+++ b/doc/modules.rst
@@ -0,0 +1,7 @@
+GPy
+===
+
+.. toctree::
+   :maxdepth: 4
+
+   GPy

From 20cc336a7a20ec3f6cec5bfba67242f13342babf Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 21 Jan 2013 13:29:25 +0000
Subject: [PATCH 37/61] added periodic kernels

---
 GPy/kern/__init__.py     |  2 +-
 GPy/kern/constructors.py | 57 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 4a36d6d0..eae717be 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index ab39fcb2..49639711 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -16,6 +16,9 @@ from bias import bias as biaspart
 from finite_dimensional import finite_dimensional as finite_dimensionalpart
 from spline import spline as splinepart
 from Brownian import Brownian as Brownianpart
+from periodic_exponential import periodic_exponential as periodic_exponentialpart
+from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
+from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
 
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
@@ -196,3 +199,57 @@ def sympykern(D,k):
     A kernel from a symbolic sympy representation
     """
     return kern(D,[spkern(D,k)])
+
+def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+    """
+    Construct an periodic exponential kernel
+
+    :param D: dimensionality, only defined for D=1
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+    :param period: the period
+    :type period: float
+    :param n_freq: the number of frequencies considered for the periodic subspace
+    :type n_freq: int
+    """
+    part = periodic_exponentialpart(D,variance, lengthscale, ARD)
+    return kern(D, [part])
+
+def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+    """
+     Construct a periodic Matern 3/2 kernel.
+
+     :param D: dimensionality, only defined for D=1
+     :type D: int
+     :param variance: the variance of the kernel
+     :type variance: float
+     :param lengthscale: the lengthscale of the kernel
+     :type lengthscale: float
+     :param period: the period
+     :type period: float
+     :param n_freq: the number of frequencies considered for the periodic subspace
+     :type n_freq: int
+    """
+    part = periodic_Matern32part(D,variance, lengthscale, ARD)
+    return kern(D, [part])
+
+def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+    """
+     Construct a periodic Matern 5/2 kernel.
+
+     :param D: dimensionality, only defined for D=1
+     :type D: int
+     :param variance: the variance of the kernel
+     :type variance: float
+     :param lengthscale: the lengthscale of the kernel
+     :type lengthscale: float
+     :param period: the period
+     :type period: float
+     :param n_freq: the number of frequencies considered for the periodic subspace
+     :type n_freq: int
+    """
+    part = periodic_Matern52part(D,variance, lengthscale, ARD)
+    return kern(D, [part])

From 8d98e09b9eaabbcada161d98d293b3f2f999ba60 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 21 Jan 2013 13:31:27 +0000
Subject: [PATCH 38/61] added some more files for periodic kernels

---
 GPy/kern/periodic_Matern32.py    | 168 +++++++++++++++++++++++++++++
 GPy/kern/periodic_Matern52.py    | 177 +++++++++++++++++++++++++++++++
 GPy/kern/periodic_exponential.py | 164 ++++++++++++++++++++++++++++
 3 files changed, 509 insertions(+)
 create mode 100644 GPy/kern/periodic_Matern32.py
 create mode 100644 GPy/kern/periodic_Matern52.py
 create mode 100644 GPy/kern/periodic_exponential.py

diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
new file mode 100644
index 00000000..8f616baf
--- /dev/null
+++ b/GPy/kern/periodic_Matern32.py
@@ -0,0 +1,168 @@
+from kernpart import kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+
+class periodic_Matern32(kernpart):
+    """
+    Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for D=1.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance of the Matern kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the Matern kernel
+    :type lengthscale: np.ndarray of size (D,)
+    :param period: the period
+    :type period: float
+    :param n_freq: the number of frequencies considered for the periodic subspace
+    :type n_freq: int
+    :rtype: kernel object
+
+    """
+
+    def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+        assert D==1
+        self.name = 'periodic_Mat32'
+        self.D = D
+        if lengthscale is not None:
+            assert lengthscale.shape==(self.D,)
+        else:
+            lengthscale = np.ones(self.D)
+        self.lower,self.upper = lower, upper
+        self.Nparam = 3
+        self.n_freq = n_freq
+        self.n_basis = 2*n_freq
+        self.set_param(np.hstack((variance,lengthscale,period)))
+
+    def _cos(self,alpha,omega,phase):
+        def f(x):
+            return alpha*np.cos(omega*x+phase)
+        return f
+    def _cos_factorization(self,alpha,omega,phase):
+        r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
+        r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
+        r =  np.sqrt(r1**2 + r2**2)
+        psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
+        return r,omega[:,0:1], psi
+    def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
+        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
+        return Gint
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscale,self.period))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==3
+        self.variance = x[0]
+        self.lengthscale = x[1]
+        self.period = x[2]
+
+        self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
+        self.b = [1,self.lengthscale**2/3]
+
+        self.basis_alpha = np.ones((self.n_basis,))
+        self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
+        self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
+         
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G) 
+
+    def get_param_names(self):
+        """return parameter names."""
+        return ['variance','lengthscale','period']
+
+    def Gram_matrix(self):        
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        np.add(mdot(FX,self.Gi,FX2.T), target,target)
+
+    def dK_dtheta(self,partial,X,X2,target):
+        """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
+
+        #dK_dlen
+        da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
+        db_dlen = [0.,2*self.lengthscale/3.]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+        dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
+        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)        
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] 
+
+        dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
+        
+        dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
+
+        # np.add(target[:,:,0],dK_dvar, target[:,:,0])
+        target[0] += np.sum(dK_dvar*partial)
+        #np.add(target[:,:,1],dK_dlen, target[:,:,1])
+        target[1] += np.sum(dK_dlen*partial)
+        #np.add(target[:,:,2],dK_dper, target[:,:,2])
+        target[2] += np.sum(dK_dper*partial)
+
+
+
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
new file mode 100644
index 00000000..13540435
--- /dev/null
+++ b/GPy/kern/periodic_Matern52.py
@@ -0,0 +1,177 @@
+from kernpart import kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+
+class periodic_Matern52(kernpart):
+    """
+    Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for D=1.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance of the Matern kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the Matern kernel
+    :type lengthscale: np.ndarray of size (D,)
+    :param period: the period
+    :type period: float
+    :param n_freq: the number of frequencies considered for the periodic subspace
+    :type n_freq: int
+    :rtype: kernel object
+
+    """
+
+    def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+        assert D==1
+        self.name = 'periodic_Mat52'
+        self.D = D
+        if lengthscale is not None:
+            assert lengthscale.shape==(self.D,)
+        else:
+            lengthscale = np.ones(self.D)
+        self.lower,self.upper = lower, upper
+        self.Nparam = 3
+        self.n_freq = n_freq
+        self.n_basis = 2*n_freq
+        self.set_param(np.hstack((variance,lengthscale,period)))
+
+    def _cos(self,alpha,omega,phase):
+        def f(x):
+            return alpha*np.cos(omega*x+phase)
+        return f
+    def _cos_factorization(self,alpha,omega,phase):
+        r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
+        r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
+        r =  np.sqrt(r1**2 + r2**2)
+        psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
+        return r,omega[:,0:1], psi
+    def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
+        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
+        return Gint
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscale,self.period))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==3
+        self.variance = x[0]
+        self.lengthscale = x[1]
+        self.period = x[2]
+
+        self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
+        self.b  = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
+
+        self.basis_alpha = np.ones((2*self.n_freq,))
+        self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
+        self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
+         
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G) 
+
+    def get_param_names(self):
+        """return parameter names."""
+        return ['variance','lengthscale','period']
+
+    def Gram_matrix(self):        
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
+        Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
+        Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
+        lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T) 
+        return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        np.add(mdot(FX,self.Gi,FX2.T), target,target)
+
+    def dK_dtheta(self,partial,X,X2,target):
+        """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
+        Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
+        Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
+
+        #dK_dlen
+        da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
+        db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
+        dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+        dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
+        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)        
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+        dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
+
+        dlower_terms_dper  = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
+        dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
+        dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
+        dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
+        dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
+
+        dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
+        dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
+
+        # np.add(target[:,:,0],dK_dvar, target[:,:,0])
+        target[0] += np.sum(dK_dvar*partial)
+        #np.add(target[:,:,1],dK_dlen, target[:,:,1])
+        target[1] += np.sum(dK_dlen*partial)
+        #np.add(target[:,:,2],dK_dper, target[:,:,2])
+        target[2] += np.sum(dK_dper*partial)
+
+
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py
new file mode 100644
index 00000000..11716e13
--- /dev/null
+++ b/GPy/kern/periodic_exponential.py
@@ -0,0 +1,164 @@
+from kernpart import kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+
+class periodic_exponential(kernpart):
+    """
+    Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for D=1.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance of the Matern kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the Matern kernel
+    :type lengthscale: np.ndarray of size (D,)
+    :param period: the period
+    :type period: float
+    :param n_freq: the number of frequencies considered for the periodic subspace
+    :type n_freq: int
+    :rtype: kernel object
+
+    """
+
+    def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
+        assert D==1
+        self.name = 'periodic_exp'
+        self.D = D
+        if lengthscale is not None:
+            assert lengthscale.shape==(self.D,)
+        else:
+            lengthscale = np.ones(self.D)
+        self.lower,self.upper = lower, upper
+        self.Nparam = 3
+        self.n_freq = n_freq
+        self.n_basis = 2*n_freq
+        self.set_param(np.hstack((variance,lengthscale,period)))
+
+    def _cos(self,alpha,omega,phase):
+        def f(x):
+            return alpha*np.cos(omega*x+phase)
+        return f
+    def _cos_factorization(self,alpha,omega,phase):
+        r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
+        r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
+        r =  np.sqrt(r1**2 + r2**2)
+        psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
+        return r,omega[:,0:1], psi
+    def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
+        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
+        return Gint
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscale,self.period))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==3
+        self.variance = x[0]
+        self.lengthscale = x[1]
+        self.period = x[2]
+
+        self.a = [1./self.lengthscale, 1.]
+        self.b = [1]
+
+        self.basis_alpha = np.ones((self.n_basis,))
+        self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
+        self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
+         
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G) 
+
+    def get_param_names(self):
+        """return parameter names."""
+        return ['variance','lengthscale','period']
+
+    def Gram_matrix(self):        
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+        
+        return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        np.add(mdot(FX,self.Gi,FX2.T), target,target)
+
+    def dK_dtheta(self,partial,X,X2,target):
+        """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+
+        La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
+        Lo = np.column_stack((self.basis_omega,self.basis_omega))
+        Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
+        r,omega,phi =  self._cos_factorization(La,Lo,Lp)
+        Gint = self._int_computation( r,omega,phi, r,omega,phi)
+
+        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
+
+        #dK_dvar
+        dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
+
+        #dK_dlen
+        da_dlen = [-1./self.lengthscale**2,0.]
+        dLa_dlen =  np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
+        r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
+        dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
+        dGint_dlen = dGint_dlen + dGint_dlen.T
+        dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
+        dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
+
+        #dK_dper
+        dFX_dper  = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
+        dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
+
+        dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
+        dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+
+        IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
+        IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
+        IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
+        IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
+        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+
+        IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
+        IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
+        IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
+        IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
+        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
+
+        dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))       
+        r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
+
+        dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
+        dGint_dper = dGint_dper + dGint_dper.T
+
+        dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
+
+        dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
+        
+        dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
+
+        # np.add(target[:,:,0],dK_dvar, target[:,:,0])
+        target[0] += np.sum(dK_dvar*partial)
+        #np.add(target[:,:,1],dK_dlen, target[:,:,1])
+        target[1] += np.sum(dK_dlen*partial)
+        #np.add(target[:,:,2],dK_dper, target[:,:,2])
+        target[2] += np.sum(dK_dper*partial)
+
+

From b66eab22fbeac48c2b738845fc5a9125446fc06c Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 21 Jan 2013 18:03:24 +0000
Subject: [PATCH 39/61] few bugs fixed in periodic kernels

---
 GPy/kern/constructors.py         |  6 ++--
 GPy/kern/periodic_Matern32.py    | 50 +++++++++++++++++--------------
 GPy/kern/periodic_Matern52.py    | 49 +++++++++++++++++-------------
 GPy/kern/periodic_exponential.py | 51 ++++++++++++++++++--------------
 4 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 49639711..1bd241e7 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -215,7 +215,7 @@ def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_fre
     :param n_freq: the number of frequencies considered for the periodic subspace
     :type n_freq: int
     """
-    part = periodic_exponentialpart(D,variance, lengthscale, ARD)
+    part = periodic_exponentialpart(D,variance, lengthscale, period, n_freq, lower, upper)
     return kern(D, [part])
 
 def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
@@ -233,7 +233,7 @@ def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
      :param n_freq: the number of frequencies considered for the periodic subspace
      :type n_freq: int
     """
-    part = periodic_Matern32part(D,variance, lengthscale, ARD)
+    part = periodic_Matern32part(D,variance, lengthscale, period, n_freq, lower, upper)
     return kern(D, [part])
 
 def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
@@ -251,5 +251,5 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
      :param n_freq: the number of frequencies considered for the periodic subspace
      :type n_freq: int
     """
-    part = periodic_Matern52part(D,variance, lengthscale, ARD)
+    part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
     return kern(D, [part])
diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py
index 8f616baf..3e81044c 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/periodic_Matern32.py
@@ -32,7 +32,7 @@ class periodic_Matern32(kernpart):
         self.Nparam = 3
         self.n_freq = n_freq
         self.n_basis = 2*n_freq
-        self.set_param(np.hstack((variance,lengthscale,period)))
+        self._set_params(np.hstack((variance,lengthscale,period)))
 
     def _cos(self,alpha,omega,phase):
         def f(x):
@@ -45,16 +45,16 @@ class periodic_Matern32(kernpart):
         psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
         return r,omega[:,0:1], psi
     def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
-        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
         Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
-        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        #Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
         Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
         return Gint
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
         self.variance = x[0]
@@ -67,15 +67,15 @@ class periodic_Matern32(kernpart):
         self.basis_alpha = np.ones((self.n_basis,))
         self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
         self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
-         
-        self.G = self.Gram_matrix()
-        self.Gi = np.linalg.inv(self.G) 
 
-    def get_param_names(self):
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G)
+
+    def _get_param_names(self):
         """return parameter names."""
         return ['variance','lengthscale','period']
 
-    def Gram_matrix(self):        
+    def Gram_matrix(self):
         La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
         Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
         Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
@@ -88,11 +88,18 @@ class periodic_Matern32(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
-        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        if X2 is None:
+            FX2 = FX
+        else:
+            FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
         np.add(mdot(FX,self.Gi,FX2.T), target,target)
 
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
+
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -127,34 +134,34 @@ class periodic_Matern32(kernpart):
 
         dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
         dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
-        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
 
         IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
         IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
         IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
         IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
-        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
-        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+        #IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
 
         IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
         IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
         IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
         IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
-        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        #IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
         IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
 
         dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
         dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
-        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)        
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
 
         dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
         dGint_dper = dGint_dper + dGint_dper.T
 
         dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
-        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] 
+        dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
 
         dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
-        
+
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
         # np.add(target[:,:,0],dK_dvar, target[:,:,0])
@@ -163,6 +170,3 @@ class periodic_Matern32(kernpart):
         target[1] += np.sum(dK_dlen*partial)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
-
-
-
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py
index 13540435..abdd6a7d 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/periodic_Matern52.py
@@ -32,29 +32,31 @@ class periodic_Matern52(kernpart):
         self.Nparam = 3
         self.n_freq = n_freq
         self.n_basis = 2*n_freq
-        self.set_param(np.hstack((variance,lengthscale,period)))
+        self._set_params(np.hstack((variance,lengthscale,period)))
 
     def _cos(self,alpha,omega,phase):
         def f(x):
             return alpha*np.cos(omega*x+phase)
         return f
+
     def _cos_factorization(self,alpha,omega,phase):
         r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
         r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
         r =  np.sqrt(r1**2 + r2**2)
         psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
         return r,omega[:,0:1], psi
+    
     def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
-        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
         Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
-        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        #Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
         Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
         return Gint
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
         self.variance = x[0]
@@ -67,15 +69,15 @@ class periodic_Matern52(kernpart):
         self.basis_alpha = np.ones((2*self.n_freq,))
         self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
         self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
-         
-        self.G = self.Gram_matrix()
-        self.Gi = np.linalg.inv(self.G) 
 
-    def get_param_names(self):
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G)
+
+    def _get_param_names(self):
         """return parameter names."""
         return ['variance','lengthscale','period']
 
-    def Gram_matrix(self):        
+    def Gram_matrix(self):
         La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
         Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
         Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
@@ -85,16 +87,23 @@ class periodic_Matern52(kernpart):
         Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
         F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
         F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
-        lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T) 
+        lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
         return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
-        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        if X2 is None:
+            FX2 = FX
+        else:
+            FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
         np.add(mdot(FX,self.Gi,FX2.T), target,target)
 
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
+
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -131,25 +140,25 @@ class periodic_Matern52(kernpart):
 
         dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
         dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
-        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
 
         IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
         IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
         IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
         IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
-        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
-        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+        #IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
 
         IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
         IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
         IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
         IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
-        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        #IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
         IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
 
         dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
         dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
-        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)        
+        r2,omega2,phi2 =  self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
 
         dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) +  self._int_computation(r2,omega2,phi2, r,omega,phi)
         dGint_dper = dGint_dper + dGint_dper.T
@@ -173,5 +182,3 @@ class periodic_Matern52(kernpart):
         target[1] += np.sum(dK_dlen*partial)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
-
-
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py
index 11716e13..2637afe5 100644
--- a/GPy/kern/periodic_exponential.py
+++ b/GPy/kern/periodic_exponential.py
@@ -32,29 +32,31 @@ class periodic_exponential(kernpart):
         self.Nparam = 3
         self.n_freq = n_freq
         self.n_basis = 2*n_freq
-        self.set_param(np.hstack((variance,lengthscale,period)))
+        self._set_params(np.hstack((variance,lengthscale,period)))
 
     def _cos(self,alpha,omega,phase):
         def f(x):
             return alpha*np.cos(omega*x+phase)
         return f
+
     def _cos_factorization(self,alpha,omega,phase):
         r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
         r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
         r =  np.sqrt(r1**2 + r2**2)
         psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
         return r,omega[:,0:1], psi
+    
     def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
-        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )   
+        Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
         Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) +  np.cos(phi1-phi2.T)*(self.upper-self.lower)
-        Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
+        #Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
         Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
         return Gint
 
-    def get_param(self):
+    def _get_params(self):
         """return the value of the parameters."""
         return np.hstack((self.variance,self.lengthscale,self.period))
-    def set_param(self,x):
+    def _set_params(self,x):
         """set the value of the parameters."""
         assert x.size==3
         self.variance = x[0]
@@ -67,32 +69,37 @@ class periodic_exponential(kernpart):
         self.basis_alpha = np.ones((self.n_basis,))
         self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in  range(1,self.n_freq+1)],[]))
         self.basis_phi =   np.array(sum([[-np.pi/2, 0.]  for i in range(1,self.n_freq+1)],[]))
-         
-        self.G = self.Gram_matrix()
-        self.Gi = np.linalg.inv(self.G) 
 
-    def get_param_names(self):
+        self.G = self.Gram_matrix()
+        self.Gi = np.linalg.inv(self.G)
+
+    def _get_param_names(self):
         """return parameter names."""
         return ['variance','lengthscale','period']
 
-    def Gram_matrix(self):        
+    def Gram_matrix(self):
         La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
         Lo = np.column_stack((self.basis_omega,self.basis_omega))
         Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
         r,omega,phi =  self._cos_factorization(La,Lo,Lp)
         Gint = self._int_computation( r,omega,phi, r,omega,phi)
-
         Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
-        
         return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
-        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
+        FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        if X2 is None:
+            FX2 = FX
+        else:
+            FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
         np.add(mdot(FX,self.Gi,FX2.T), target,target)
 
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
+        np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
+
     def dK_dtheta(self,partial,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
         if X2 is None: X2 = X
@@ -125,24 +132,24 @@ class periodic_exponential(kernpart):
 
         dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
         dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
-        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)        
+        r1,omega1,phi1 =  self._cos_factorization(dLa_dper,Lo,dLp_dper)
 
         IPPprim1 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
         IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  +  1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
         IPPprim2 =  self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2)  + self.upper*np.cos(phi-phi1.T))
         IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2)  + self.lower*np.cos(phi-phi1.T))
-        IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
-        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)        
+        #IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
 
         IPPint1 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
         IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  +  1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
         IPPint2 =  1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi)  + 1./2*self.upper**2*np.cos(phi-phi1.T)
         IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi)  + 1./2*self.lower**2*np.cos(phi-phi1.T)
-        IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
+        #IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
         IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
 
         dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
-        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))       
+        dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
         r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
 
         dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
@@ -151,7 +158,7 @@ class periodic_exponential(kernpart):
         dFlower_dper  = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
 
         dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
-        
+
         dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
 
         # np.add(target[:,:,0],dK_dvar, target[:,:,0])
@@ -160,5 +167,3 @@ class periodic_exponential(kernpart):
         target[1] += np.sum(dK_dlen*partial)
         #np.add(target[:,:,2],dK_dper, target[:,:,2])
         target[2] += np.sum(dK_dper*partial)
-
-

From 82d5479438f300f46783c3b90c1981f4dfac4f7b Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 23 Jan 2013 11:22:20 +0000
Subject: [PATCH 40/61] Poisson likelihood.

---
 GPy/inference/likelihoods.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 53dd4248..098e525a 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -102,3 +102,5 @@ class probit(likelihood):
     def _log_likelihood_gradients():
         raise NotImplementedError
 
+class poisson(likelihood):
+    pass

From f949e4a2c8a1463b035398d10035bc6c49a3b9b2 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 23 Jan 2013 11:25:41 +0000
Subject: [PATCH 41/61] Poisson and Gaussian likelihood.

---
 GPy/inference/likelihoods.py | 98 +++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 098e525a..0eeeb982 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -103,4 +103,100 @@ class probit(likelihood):
         raise NotImplementedError
 
 class poisson(likelihood):
-    pass
+    """
+    Poisson likelihood
+    Y is expected to take values in {0,1,2,...}
+    -----
+    $$
+    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
+    $$
+    """
+    def moments_match(self,i,tau_i,v_i):
+        """
+        Moments match of the marginal approximation in EP algorithm
+
+        :param i: number of observation (int)
+        :param tau_i: precision of the cavity distribution (float)
+        :param v_i: mean/variance of the cavity distribution (float)
+        """
+        mu = v_i/tau_i
+        sigma = np.sqrt(1./tau_i)
+        def poisson_norm(f):
+            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
+            rate = np.exp( (f*self.scale)+self.location)
+            poisson = stats.poisson.pmf(float(self.Y[i]),rate)
+            return pdf_norm_f*poisson
+
+        def log_pnm(f):
+            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
+
+        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min()
+        golden_B = np.array([np.log(self.Y[i]),mu]).max()
+        golden_A = (golden_A - self.location)/self.scale
+        golden_B = (golden_B - self.location)/self.scale
+        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B))
+        width = 3./np.log(max(self.Y[i],2))
+
+        # Simpson's approximamtion
+        A = opt - width
+        B = opt + width
+        K =  10*int(np.log(max(self.Y[i],150)))
+        h = (B-A)/K
+        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)])
+        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]])
+        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]])
+        first = zeroth*x
+        second = first*x
+        Z_hat = sum(zeroth)*h/3
+        mu_hat = sum(first)*h/(3*Z_hat)
+        m2 = sum(second)*h/(3*Z_hat)
+        sigma2_hat = m2 - mu_hat**2
+        return float(Z_hat), float(mu_hat), float(sigma2_hat)
+
+    def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
+        pb.subplot(212)
+        #gpplot(X_new,F_new,np.sqrt(F2_new))
+        pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
+        pb.plot(X,self.Y,'kx',mew=1.5)
+        if U is not None:
+            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
+    def predictive_mean(self,mu,variance):
+        return np.exp(mu*self.scale + self.location)
+    def predictive_variance(self,mu,variance):
+        return mu
+    def _log_likelihood_gradients():
+        raise NotImplementedError
+
+class gaussian(likelihood):
+    """
+    Gaussian likelihood
+    Y is expected to take values in (-inf,inf)
+    """
+    def moments_match(self,i,tau_i,v_i):
+        """
+        Moments match of the marginal approximation in EP algorithm
+
+        :param i: number of observation (int)
+        :param tau_i: precision of the cavity distribution (float)
+        :param v_i: mean/variance of the cavity distribution (float)
+        """
+        mu = v_i/tau_i
+        sigma = np.sqrt(1./tau_i)
+        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
+        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
+        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
+        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
+        return Z_hat, mu_hat, sigma2_hat
+
+    def plot1Db(self,X,X_new,F_new,U=None):
+        assert X.shape[1] == 1, 'Number of dimensions must be 1'
+        gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
+        pb.plot(X,self.Y,'kx',mew=1.5)
+        if U is not None:
+            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
+
+    def predictive_mean(self,mu,Sigma):
+        return mu #return stats.poisson.pmf(h)
+
+    def _log_likelihood_gradients():
+        raise NotImplementedError

From 163f12ac471cfff448d3eb37618816d2d764ee5f Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Wed, 23 Jan 2013 13:02:21 +0000
Subject: [PATCH 42/61] __init__.py for Examples directory (see comments in
 code).

---
 GPy/examples/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index e69de29b..f02fc132 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import classification
+import regression
+import unsupervised

From 4dea5c45fbe4056caa138c3d337414c2bc378201 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Wed, 23 Jan 2013 13:04:20 +0000
Subject: [PATCH 43/61] __init__.py for Examples directory (see comments in
 code).

---
 GPy/examples/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index f02fc132..2f3cf0f4 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
+# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
 import classification
 import regression
 import unsupervised

From b2bc978767a5fa80c33f234b7ed46ab9e6cc9150 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 23 Jan 2013 16:12:04 +0000
Subject: [PATCH 44/61] TODO added

---
 GPy/inference/likelihoods.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 0eeeb982..d789830f 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -138,6 +138,7 @@ class poisson(likelihood):
         width = 3./np.log(max(self.Y[i],2))
 
         # Simpson's approximamtion
+        #TODO explain this algorithm
         A = opt - width
         B = opt + width
         K =  10*int(np.log(max(self.Y[i],150)))

From 54c4d6df145434c3c736f8aa517710b0749c120e Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 23 Jan 2013 16:53:27 +0000
Subject: [PATCH 45/61] Deleted line

---
 GPy/inference/likelihoods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index d789830f..3aa20d3f 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -197,7 +197,7 @@ class gaussian(likelihood):
             pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
 
     def predictive_mean(self,mu,Sigma):
-        return mu #return stats.poisson.pmf(h)
+        return mu
 
     def _log_likelihood_gradients():
         raise NotImplementedError

From 8c3aa5b64dd29db894ccffc141a77b9cba841eb4 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Wed, 23 Jan 2013 17:02:28 +0000
Subject: [PATCH 46/61] test

---
 GPy/inference/likelihoods.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 3aa20d3f..023f00ae 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -201,3 +201,4 @@ class gaussian(likelihood):
 
     def _log_likelihood_gradients():
         raise NotImplementedError
+#This is just a test

From d286ffe63363e00fcf2a038581b40ad95e8186e4 Mon Sep 17 00:00:00 2001
From: Ricardo <rick@ubuntu.(none)>
Date: Wed, 23 Jan 2013 22:54:04 +0000
Subject: [PATCH 47/61] Golden serach and Simpson's rule explained.

---
 GPy/inference/likelihoods.py | 49 +++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 023f00ae..5f0eb7ff 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -122,36 +122,50 @@ class poisson(likelihood):
         mu = v_i/tau_i
         sigma = np.sqrt(1./tau_i)
         def poisson_norm(f):
+            """
+            Product of the likelihood and the cavity distribution
+            """
             pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
             rate = np.exp( (f*self.scale)+self.location)
             poisson = stats.poisson.pmf(float(self.Y[i]),rate)
             return pdf_norm_f*poisson
 
         def log_pnm(f):
+            """
+            Log of poisson_norm
+            """
             return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
 
-        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min()
-        golden_B = np.array([np.log(self.Y[i]),mu]).max()
+        """
+        Golden Search and Simpson's Rule
+        --------------------------------
+        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
+        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
+        """
+        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
+
+        #Golden search
+        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
+        golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
         golden_A = (golden_A - self.location)/self.scale
         golden_B = (golden_B - self.location)/self.scale
-        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B))
-        width = 3./np.log(max(self.Y[i],2))
+        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
 
-        # Simpson's approximamtion
-        #TODO explain this algorithm
-        A = opt - width
-        B = opt + width
-        K =  10*int(np.log(max(self.Y[i],150)))
-        h = (B-A)/K
-        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)])
-        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]])
-        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]])
+        # Simpson's approximation
+        width = 3./np.log(max(self.Y[i],2))
+        A = opt - width #Lower limit
+        B = opt + width #Upper limit
+        K =  10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
+        h = (B-A)/K # length of the intervals
+        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
+        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
+        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
         first = zeroth*x
         second = first*x
-        Z_hat = sum(zeroth)*h/3
-        mu_hat = sum(first)*h/(3*Z_hat)
-        m2 = sum(second)*h/(3*Z_hat)
-        sigma2_hat = m2 - mu_hat**2
+        Z_hat = sum(zeroth)*h/3 # Zero-th moment
+        mu_hat = sum(first)*h/(3*Z_hat) # First moment
+        m2 = sum(second)*h/(3*Z_hat) # Second moment
+        sigma2_hat = m2 - mu_hat**2 # Second central moment
         return float(Z_hat), float(mu_hat), float(sigma2_hat)
 
     def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
@@ -201,4 +215,3 @@ class gaussian(likelihood):
 
     def _log_likelihood_gradients():
         raise NotImplementedError
-#This is just a test

From 4304c1dbe2f627f6adbbd5329a3baa22b9267997 Mon Sep 17 00:00:00 2001
From: Ricardo <rick@ubuntu.(none)>
Date: Thu, 24 Jan 2013 16:55:15 +0000
Subject: [PATCH 48/61] Missing scale and location arguments.

---
 GPy/inference/likelihoods.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 5f0eb7ff..c9b36e10 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -9,7 +9,7 @@ import pylab as pb
 from ..util.plot import gpplot
 
 class likelihood:
-    def __init__(self,Y):
+    def __init__(self,Y,location=0,scale=1):
         """
         Likelihood class for doing Expectation propagation
 
@@ -18,6 +18,8 @@ class likelihood:
         """
         self.Y = Y
         self.N = self.Y.shape[0]
+        self.location = location
+        self.scale = scale
 
     def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
         """

From c43904c3bfc64cd19b28d60ccef7e78aeeabf79d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Sun, 27 Jan 2013 18:12:29 +0000
Subject: [PATCH 49/61] untabified priors.py

---
 GPy/core/priors.py | 206 ++++++++++++++++++++++-----------------------
 1 file changed, 103 insertions(+), 103 deletions(-)

diff --git a/GPy/core/priors.py b/GPy/core/priors.py
index 7ad02e48..96c87ceb 100644
--- a/GPy/core/priors.py
+++ b/GPy/core/priors.py
@@ -8,120 +8,120 @@ from scipy.special import gammaln, digamma
 from ..util.linalg import pdinv
 
 class prior:
-	def pdf(self,x):
-		return np.exp(self.lnpdf(x))
-	def plot(self):
-		rvs = self.rvs(1000)
-		pb.hist(rvs,100,normed=True)
-		xmin,xmax = pb.xlim()
-		xx = np.linspace(xmin,xmax,1000)
-		pb.plot(xx,self.pdf(xx),'r',linewidth=2)
+    def pdf(self,x):
+        return np.exp(self.lnpdf(x))
+    def plot(self):
+        rvs = self.rvs(1000)
+        pb.hist(rvs,100,normed=True)
+        xmin,xmax = pb.xlim()
+        xx = np.linspace(xmin,xmax,1000)
+        pb.plot(xx,self.pdf(xx),'r',linewidth=2)
 
 class Gaussian(prior):
-	"""
-	Implementation of the univariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
-	Using Bishop 2006 notation"""
-	def __init__(self,mu,sigma):
-		self.mu = float(mu)
-		self.sigma = float(sigma)
-		self.sigma2 = np.square(self.sigma)
-		self.constant = -0.5*np.log(2*np.pi*self.sigma2)
-	def __str__(self):
-		return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
-	def lnpdf(self,x):
-		return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
-	def lnpdf_grad(self,x):
-		return -(x-self.mu)/self.sigma2
-	def rvs(self,n):
-		return np.random.randn(n)*self.sigma + self.mu
+    """
+    Implementation of the univariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
+    Using Bishop 2006 notation"""
+    def __init__(self,mu,sigma):
+        self.mu = float(mu)
+        self.sigma = float(sigma)
+        self.sigma2 = np.square(self.sigma)
+        self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+    def __str__(self):
+        return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+    def lnpdf(self,x):
+        return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
+    def lnpdf_grad(self,x):
+        return -(x-self.mu)/self.sigma2
+    def rvs(self,n):
+        return np.random.randn(n)*self.sigma + self.mu
 
 class log_Gaussian(prior):
-	"""
-	"""
-	def __init__(self,mu,sigma):
-		self.mu = float(mu)
-		self.sigma = float(sigma)
-		self.sigma2 = np.square(self.sigma)
-		self.constant = -0.5*np.log(2*np.pi*self.sigma2)
-	def __str__(self):
-		return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
-	def lnpdf(self,x):
-		return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
-	def lnpdf_grad(self,x):
-		return -((np.log(x)-self.mu)/self.sigma2+1.)/x
-	def rvs(self,n):
-		return np.exp(np.random.randn(n)*self.sigma + self.mu)
+    """
+    """
+    def __init__(self,mu,sigma):
+        self.mu = float(mu)
+        self.sigma = float(sigma)
+        self.sigma2 = np.square(self.sigma)
+        self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+    def __str__(self):
+        return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+    def lnpdf(self,x):
+        return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
+    def lnpdf_grad(self,x):
+        return -((np.log(x)-self.mu)/self.sigma2+1.)/x
+    def rvs(self,n):
+        return np.exp(np.random.randn(n)*self.sigma + self.mu)
 
 class multivariate_Gaussian:
-	"""
-	Implementation of the multivariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
-	Using Bishop 2006 notation"""
-	def __init__(self,mu,var):
-		self.mu = np.array(mu).flatten()
-		self.var = np.array(var)
-		assert len(self.var.shape)==2
-		assert self.var.shape[0]==self.var.shape[1]
-		assert self.var.shape[0]==self.mu.size
-		self.D = self.mu.size
-		self.inv, self.hld = pdinv(self.var)
-		self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
+    """
+    Implementation of the multivariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
+    Using Bishop 2006 notation"""
+    def __init__(self,mu,var):
+        self.mu = np.array(mu).flatten()
+        self.var = np.array(var)
+        assert len(self.var.shape)==2
+        assert self.var.shape[0]==self.var.shape[1]
+        assert self.var.shape[0]==self.mu.size
+        self.D = self.mu.size
+        self.inv, self.hld = pdinv(self.var)
+        self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
 
-	def summary(self):
-		pass #TODO
-	def pdf(self,x):
-		return np.exp(self.lnpdf(x))
-	def lnpdf(self,x):
-		d = x-self.mu
-		return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
-	def lnpdf_grad(self,x):
-		d = x-self.mu
-		return -np.dot(self.inv,d)
-	def rvs(self,n):
-		return np.random.multivariate_normal(self.mu, self.var,n)
-	def plot(self):
-		if self.D==2:
-			rvs = self.rvs(200)
-			pb.plot(rvs[:,0],rvs[:,1], 'kx', mew=1.5)
-			xmin,xmax = pb.xlim()
-			ymin,ymax = pb.ylim()
-			xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-			xflat = np.vstack((xx.flatten(),yy.flatten())).T
-			zz = self.pdf(xflat).reshape(100,100)
-			pb.contour(xx,yy,zz,linewidths=2)
+    def summary(self):
+        pass #TODO
+    def pdf(self,x):
+        return np.exp(self.lnpdf(x))
+    def lnpdf(self,x):
+        d = x-self.mu
+        return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
+    def lnpdf_grad(self,x):
+        d = x-self.mu
+        return -np.dot(self.inv,d)
+    def rvs(self,n):
+        return np.random.multivariate_normal(self.mu, self.var,n)
+    def plot(self):
+        if self.D==2:
+            rvs = self.rvs(200)
+            pb.plot(rvs[:,0],rvs[:,1], 'kx', mew=1.5)
+            xmin,xmax = pb.xlim()
+            ymin,ymax = pb.ylim()
+            xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
+            xflat = np.vstack((xx.flatten(),yy.flatten())).T
+            zz = self.pdf(xflat).reshape(100,100)
+            pb.contour(xx,yy,zz,linewidths=2)
 
 
 def gamma_from_EV(E,V):
-	"""create an instance of a gamma prior  by specifying the Expected value(s) and Variance(s) of the distribution"""
-	a = np.square(E)/V
-	b = E/V
-	return gamma(a,b)
+    """create an instance of a gamma prior  by specifying the Expected value(s) and Variance(s) of the distribution"""
+    a = np.square(E)/V
+    b = E/V
+    return gamma(a,b)
 
 
 class gamma(prior):
-	"""
-	Implementation of the Gamma probability function, coupled with random variables, since scipy.stats sucks.
-	Using Bishop 2006 notation
-	"""
-	def __init__(self,a,b):
-		self.a = float(a)
-		self.b = float(b)
-		self.constant = -gammaln(self.a) + a*np.log(b)
-	def __str__(self):
-		return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
-	def summary(self):
-		ret = {"E[x]": self.a/self.b,\
-			"E[ln x]": digamma(self.a) - np.log(self.b),\
-			"var[x]": self.a/self.b/self.b,\
-			"Entropy": gammaln(self.a) - (self.a-1.)*digamma(self.a) - np.log(self.b) + self.a}
-		if self.a >1:
-			ret['Mode'] = (self.a-1.)/self.b
-		else:
-			ret['mode'] = np.nan
-		return ret
-	def lnpdf(self,x):
-		return self.constant + (self.a-1)*np.log(x) - self.b*x
-	def lnpdf_grad(self,x):
-		return (self.a-1.)/x - self.b
-	def rvs(self,n):
-		return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
+    """
+    Implementation of the Gamma probability function, coupled with random variables, since scipy.stats sucks.
+    Using Bishop 2006 notation
+    """
+    def __init__(self,a,b):
+        self.a = float(a)
+        self.b = float(b)
+        self.constant = -gammaln(self.a) + a*np.log(b)
+    def __str__(self):
+        return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
+    def summary(self):
+        ret = {"E[x]": self.a/self.b,\
+            "E[ln x]": digamma(self.a) - np.log(self.b),\
+            "var[x]": self.a/self.b/self.b,\
+            "Entropy": gammaln(self.a) - (self.a-1.)*digamma(self.a) - np.log(self.b) + self.a}
+        if self.a >1:
+            ret['Mode'] = (self.a-1.)/self.b
+        else:
+            ret['mode'] = np.nan
+        return ret
+    def lnpdf(self,x):
+        return self.constant + (self.a-1)*np.log(x) - self.b*x
+    def lnpdf_grad(self,x):
+        return (self.a-1.)/x - self.b
+    def rvs(self,n):
+        return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
 

From b73cb33a9421bb80198ec18b706d27548646633b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Sun, 27 Jan 2013 18:23:19 +0000
Subject: [PATCH 50/61] some love for the priors class

---
 GPy/core/priors.py | 76 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/GPy/core/priors.py b/GPy/core/priors.py
index 96c87ceb..a5eae5b2 100644
--- a/GPy/core/priors.py
+++ b/GPy/core/priors.py
@@ -10,6 +10,7 @@ from ..util.linalg import pdinv
 class prior:
     def pdf(self,x):
         return np.exp(self.lnpdf(x))
+
     def plot(self):
         rvs = self.rvs(1000)
         pb.hist(rvs,100,normed=True)
@@ -17,45 +18,79 @@ class prior:
         xx = np.linspace(xmin,xmax,1000)
         pb.plot(xx,self.pdf(xx),'r',linewidth=2)
 
+
 class Gaussian(prior):
     """
-    Implementation of the univariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
-    Using Bishop 2006 notation"""
+    Implementation of the univariate Gaussian probability function, coupled with random variables.
+
+    :param mu: mean
+    :param sigma: standard deviation
+
+
+    .. Note:: Bishop 2006 notation is used throughout the code
+
+    """
+
     def __init__(self,mu,sigma):
         self.mu = float(mu)
         self.sigma = float(sigma)
         self.sigma2 = np.square(self.sigma)
         self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+
     def __str__(self):
         return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+
     def lnpdf(self,x):
         return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
+
     def lnpdf_grad(self,x):
         return -(x-self.mu)/self.sigma2
+
     def rvs(self,n):
         return np.random.randn(n)*self.sigma + self.mu
 
+
 class log_Gaussian(prior):
     """
+    Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
+
+    :param mu: mean
+    :param sigma: standard deviation
+
+    .. Note:: Bishop 2006 notation is used throughout the code
+
     """
+
     def __init__(self,mu,sigma):
         self.mu = float(mu)
         self.sigma = float(sigma)
         self.sigma2 = np.square(self.sigma)
         self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+
     def __str__(self):
         return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+
     def lnpdf(self,x):
         return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
+
     def lnpdf_grad(self,x):
         return -((np.log(x)-self.mu)/self.sigma2+1.)/x
+
     def rvs(self,n):
         return np.exp(np.random.randn(n)*self.sigma + self.mu)
 
+
 class multivariate_Gaussian:
     """
-    Implementation of the multivariate Gaussian probability function, coupled with random variables, since scipy.stats sucks.
-    Using Bishop 2006 notation"""
+    Implementation of the multivariate Gaussian probability function, coupled with random variables.
+
+    :param mu: mean (N-dimensional array)
+    :param var: covariance matrix (NxN)
+
+    .. Note:: Bishop 2006 notation is used throughout the code
+
+    """
+
     def __init__(self,mu,var):
         self.mu = np.array(mu).flatten()
         self.var = np.array(var)
@@ -67,17 +102,22 @@ class multivariate_Gaussian:
         self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
 
     def summary(self):
-        pass #TODO
+        raise NotImplementedError
+
     def pdf(self,x):
         return np.exp(self.lnpdf(x))
+
     def lnpdf(self,x):
         d = x-self.mu
         return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
+
     def lnpdf_grad(self,x):
         d = x-self.mu
         return -np.dot(self.inv,d)
+
     def rvs(self,n):
         return np.random.multivariate_normal(self.mu, self.var,n)
+
     def plot(self):
         if self.D==2:
             rvs = self.rvs(200)
@@ -91,7 +131,15 @@ class multivariate_Gaussian:
 
 
 def gamma_from_EV(E,V):
-    """create an instance of a gamma prior  by specifying the Expected value(s) and Variance(s) of the distribution"""
+    """
+    Creates an instance of a gamma prior  by specifying the Expected value(s)
+    and Variance(s) of the distribution.
+
+    :param E: expected value
+    :param V: variance
+
+    """
+
     a = np.square(E)/V
     b = E/V
     return gamma(a,b)
@@ -99,15 +147,23 @@ def gamma_from_EV(E,V):
 
 class gamma(prior):
     """
-    Implementation of the Gamma probability function, coupled with random variables, since scipy.stats sucks.
-    Using Bishop 2006 notation
+    Implementation of the Gamma probability function, coupled with random variables.
+
+    :param a: shape parameter
+    :param b: rate parameter (warning: it's the *inverse* of the scale)
+
+    .. Note:: Bishop 2006 notation is used throughout the code
+
     """
+    
     def __init__(self,a,b):
         self.a = float(a)
         self.b = float(b)
         self.constant = -gammaln(self.a) + a*np.log(b)
+
     def __str__(self):
         return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
+
     def summary(self):
         ret = {"E[x]": self.a/self.b,\
             "E[ln x]": digamma(self.a) - np.log(self.b),\
@@ -118,10 +174,12 @@ class gamma(prior):
         else:
             ret['mode'] = np.nan
         return ret
+
     def lnpdf(self,x):
         return self.constant + (self.a-1)*np.log(x) - self.b*x
+
     def lnpdf_grad(self,x):
         return (self.a-1.)/x - self.b
+
     def rvs(self,n):
         return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
-

From 1680c71445fd1b5cc91ec1873f3b863863ba7f99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Sun, 27 Jan 2013 18:34:40 +0000
Subject: [PATCH 51/61] added unit tests for priors

---
 GPy/testing/prior_tests.py | 61 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 GPy/testing/prior_tests.py

diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py
new file mode 100644
index 00000000..b3e3a440
--- /dev/null
+++ b/GPy/testing/prior_tests.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class PriorTests(unittest.TestCase):
+    def test_lognormal(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GP_regression(X, y)
+        m.ensure_default_constraints()
+        lognormal = GPy.priors.log_Gaussian(1, 2)
+        m.set_prior('rbf', lognormal)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_gamma(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GP_regression(X, y)
+        m.ensure_default_constraints()
+        gamma = GPy.priors.gamma(1, 1)
+        m.set_prior('rbf', gamma)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    def test_incompatibility(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        m = GPy.models.GP_regression(X, y)
+        m.ensure_default_constraints()
+        gaussian = GPy.priors.Gaussian(1, 1)
+        success = False
+
+        # setting a Gaussian prior on non-negative parameters
+        # should raise an assertionerror.
+        try:
+            m.set_prior('rbf', gaussian)
+        except AssertionError:
+            success = True
+
+        self.assertTrue(success)
+
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()

From 2f68f6de86424fab2bf83ecb33c1bc732ed8bf23 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Mon, 28 Jan 2013 16:21:32 +0000
Subject: [PATCH 52/61] linear kernel now has an ARD flag

---
 GPy/kern/__init__.py       |   2 +-
 GPy/kern/constructors.py   |  20 +----
 GPy/kern/linear.py         | 177 ++++++++++++++++++++-----------------
 GPy/kern/linear_ARD.py     | 108 ----------------------
 doc/tuto_GP_regression.rst | 105 ++++++++++++++++++++++
 5 files changed, 208 insertions(+), 204 deletions(-)
 delete mode 100644 GPy/kern/linear_ARD.py
 create mode 100644 doc/tuto_GP_regression.rst

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index eae717be..20d84949 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 1bd241e7..b483c823 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -8,7 +8,6 @@ from kern import kern
 from rbf import rbf as rbfpart
 from white import white as whitepart
 from linear import linear as linearpart
-from linear_ARD import linear_ARD as linear_ARD_part
 from exponential import exponential as exponentialpart
 from Matern32 import Matern32 as Matern32part
 from Matern52 import Matern52 as Matern52part
@@ -40,28 +39,17 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
     part = rbfpart(D,variance,lengthscale,ARD)
     return kern(D, [part])
 
-def linear(D,lengthscales=None):
+def linear(D,variances=None,ARD=True):
     """
      Construct a linear kernel.
 
      Arguments
      ---------
      D (int), obligatory
-     lengthscales (np.ndarray)
+     variances (np.ndarray)
+     ARD (boolean)
     """
-    part = linearpart(D,lengthscales)
-    return kern(D, [part])
-
-def linear_ARD(D,lengthscales=None):
-    """
-     Construct a linear ARD kernel.
-
-     Arguments
-     ---------
-     D (int), obligatory
-     lengthscales (np.ndarray)
-    """
-    part = linear_ARD_part(D,lengthscales)
+    part = linearpart(D,variances,ARD)
     return kern(D, [part])
 
 def white(D,variance=1.):
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 14c0ece3..4aaaa60d 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -1,124 +1,143 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 from kernpart import kernpart
 import numpy as np
+
 class linear(kernpart):
     """
     Linear kernel
 
+    .. math::
+
+       k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i
+
     :param D: the number of input dimensions
     :type D: int
-    :param variance: variance
-    :type variance: None|float
+    :param variances: the vector of variances :math:`\sigma^2_i`
+    :type variances: np.ndarray of size (1,) or (D,) depending on ARD
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
+    :type ARD: Boolean
+    :rtype: kernel object
     """
 
-    def __init__(self, D, variance=None):
+    def __init__(self,D,variances=None,ARD=True):
         self.D = D
-        if variance is None:
-            variance = 1.0
-        self.Nparam = 1
-        self.name = 'linear'
-        self._set_params(variance)
-        self._Xcache, self._X2cache = np.empty(shape=(2,))
+        self.ARD = ARD
+        if ARD == False:
+            self.Nparam = 1
+            self.name = 'linear'
+            if variances is not None:
+                assert variances.shape == (1,)
+            else:
+                variances = np.ones(1)
+            self._Xcache, self._X2cache = np.empty(shape=(2,))
+        else:
+            self.Nparam = self.D
+            self.name = 'linear_ARD'
+            if variances is not None:
+                assert variances.shape == (self.D,)
+            else:
+                variances = np.ones(self.D)
+        self._set_params(variances)
 
     def _get_params(self):
-        return self.variance
+        return self.variances
 
     def _set_params(self,x):
-        self.variance = x
+        assert x.size==(self.Nparam)
+        self.variances = x
 
     def _get_param_names(self):
-        return ['variance']
+        if self.Nparam == 1:
+            return ['variance']
+        else:
+            return ['variance_%i'%i for i in range(self.variances.size)]
 
     def K(self,X,X2,target):
-        self._K_computations(X, X2)
-        target += self.variance * self._dot_product
-
-    def Kdiag(self,X,target):
-        np.add(target,np.sum(self.variance*np.square(X),-1),target)
-
-    def dK_dtheta(self,partial,X,X2,target):
-        """
-        Computes the derivatives wrt theta
-        Return shape is NxMx(Ntheta)
-        """
-        self._K_computations(X, X2)
-        product = self._dot_product
-        # product = np.dot(X, X2.T)
-        target += np.sum(product*partial)
-
-    def dK_dX(self,partial,X,X2,target):
-        target += self.variance * np.sum(partial[:,None,:]*X2.T[None,:,:],-1)
-
-    def dKdiag_dtheta(self,partial,X,target):
-        target += np.sum(partial*np.square(X).sum(1))
+        if self.ARD:
+            XX = X*np.sqrt(self.variances)
+            XX2 = X2*np.sqrt(self.variances)
+            target += np.dot(XX, XX2.T)
+        else:
+            self._K_computations(X, X2)
+            target += self.variances * self._dot_product
 
     def _K_computations(self,X,X2):
-        # (Nicolo) changed the logic here. If X2 is None, we want to cache
-        # (X,X). In practice X2 should always be passed.
         if X2 is None:
             X2 = X
         if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
             self._Xcache = X
             self._X2cache = X2
-            self._dot_product = np.dot(X,X2.T) 
+            self._dot_product = np.dot(X,X2.T)
         else:
             # print "Cache hit!"
             pass # TODO: insert debug message here (logging framework)
 
+    def Kdiag(self,X,target):
+        np.add(target,np.sum(self.variances*np.square(X),-1),target)
 
-    # def psi0(self,Z,mu,S,target):
-    #     expected = np.square(mu) + S
-    #     np.add(target,np.sum(self.variance*expected),target)
+    def dK_dtheta(self,partial,X,X2,target):
+        if self.ARD:
+            product = X[:,None,:]*X2[None,:,:]
+            target += (partial[:,:,None]*product).sum(0).sum(0)
+        else:
+            self._K_computations(X, X2)
+            target += np.sum(self._dot_product*partial)
 
-    # def dpsi0_dtheta(self,Z,mu,S,target):
-    #     expected = np.square(mu) + S
-    #     return -2.*np.sum(expected,0)
+    def dK_dX(self,partial,X,X2,target):
+        target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
 
-    # def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-    #     np.add(target_mu,2*mu*self.variances,target_mu)
-    #     np.add(target_S,self.variances,target_S)
+    def psi0(self,Z,mu,S,target):
+        expected = np.square(mu) + S
+        target += np.sum(self.variances*expected)
 
-    # def dpsi0_dZ(self,Z,mu,S,target):
-    #     pass
+    def dpsi0_dtheta(self,Z,mu,S,target):
+        expected = np.square(mu) + S
+        return -2.*np.sum(expected,0)
 
-    # def psi1(self,Z,mu,S,target):
-    #     """the variance, it does nothing"""
-    #     self.K(mu,Z,target)
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        np.add(target_mu,2*mu*self.variances,target_mu)
+        np.add(target_S,self.variances,target_S)
 
-    # def dpsi1_dtheta(self,Z,mu,S,target):
-    #     """the variance, it does nothing"""
-    #     self.dK_dtheta(mu,Z,target)
+    def dpsi0_dZ(self,Z,mu,S,target):
+        pass
 
-    # def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
-    #     """Do nothing for S, it does not affect psi1"""
-    #     np.add(target_mu,Z/self.variances2,target_mu)
+    def psi1(self,Z,mu,S,target):
+        """the variance, it does nothing"""
+        self.K(mu,Z,target)
 
-    # def dpsi1_dZ(self,Z,mu,S,target):
-    #     self.dK_dX(mu,Z,target)
+    def dpsi1_dtheta(self,Z,mu,S,target):
+        """the variance, it does nothing"""
+        self.dK_dtheta(mu,Z,target)
 
-    # def psi2(self,Z,mu,S,target):
-    #     """Think N,M,M,Q """
-    #     mu2_S = np.square(mu)+SN,Q,
-    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
-    #     psi2 = ZZ*np.square(self.variances)*mu2_S
-    #     np.add(target, psi2.sum(-1),target) M,M
+    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Do nothing for S, it does not affect psi1"""
+        np.add(target_mu,Z/self.variances2,target_mu)
 
-    # def dpsi2_dtheta(self,Z,mu,S,target):
-    #     mu2_S = np.square(mu)+SN,Q,
-    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
-    #     target += 2.*ZZ*mu2_S*self.variances
+    def dpsi1_dZ(self,Z,mu,S,target):
+        self.dK_dX(mu,Z,target)
 
-    # def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
-    #     """Think N,M,M,Q """
-    #     mu2_S = np.sum(np.square(mu)+S,0)Q,
-    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
-    #     tmp = ZZ*np.square(self.variances) M,M,Q
-    #     np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q
-    #     np.add(target_S, tmp, target_S) N,M,M,Q
+    def psi2(self,Z,mu,S,target):
+        """Think N,M,M,Q """
+        mu2_S = np.square(mu)+S# N,Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        psi2 = ZZ*np.square(self.variances)*mu2_S
+        np.add(target, psi2.sum(-1),target) # M,M
 
-    # def dpsi2_dZ(self,Z,mu,S,target):
-    #     mu2_S = np.sum(np.square(mu)+S,0)Q,
-    #     target += Z[:,None,:]*np.square(self.variances)*mu2_S
+    def dpsi2_dtheta(self,Z,mu,S,target):
+        mu2_S = np.square(mu)+S# N,Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        target += 2.*ZZ*mu2_S*self.variances
+
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Think N,M,M,Q """
+        mu2_S = np.sum(np.square(mu)+S,0)# Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        tmp = ZZ*np.square(self.variances) # M,M,Q
+        np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
+        np.add(target_S, tmp, target_S) #N,M,M,Q
+
+    def dpsi2_dZ(self,Z,mu,S,target):
+        mu2_S = np.sum(np.square(mu)+S,0)# Q,
+        target += Z[:,None,:]*np.square(self.variances)*mu2_S
diff --git a/GPy/kern/linear_ARD.py b/GPy/kern/linear_ARD.py
deleted file mode 100644
index b9112044..00000000
--- a/GPy/kern/linear_ARD.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-
-class linear_ARD(kernpart):
-    """
-    Linear ARD kernel
-
-    :param D: the number of input dimensions
-    :type D: int
-    :param variances: ARD variances
-    :type variances: None|np.ndarray
-    """
-
-    def __init__(self,D,variances=None):
-        self.D = D
-        if variances is not None:
-            assert variances.shape==(self.D,)
-        else:
-            variances = np.ones(self.D)
-        self.Nparam = int(self.D)
-        self.name = 'linear'
-        self._set_params(variances)
-
-    def _get_params(self):
-        return self.variances
-
-    def _set_params(self,x):
-        assert x.size==(self.Nparam)
-        self.variances = x
-
-    def _get_param_names(self):
-        if self.D==1:
-            return ['variance']
-        else:
-            return ['variance_%i'%i for i in range(self.variances.size)]
-
-    def K(self,X,X2,target):
-        XX = X*np.sqrt(self.variances)
-        XX2 = X2*np.sqrt(self.variances)
-        target += np.dot(XX, XX2.T)
-
-    def Kdiag(self,X,target):
-        np.add(target,np.sum(self.variances*np.square(X),-1),target)
-
-    def dK_dtheta(self,partial,X,X2,target):
-        product = X[:,None,:]*X2[None,:,:]
-        target += (partial[:,:,None]*product).sum(0).sum(0)
-
-    def dK_dX(self,partial,X,X2,target):
-        target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
-
-    def psi0(self,Z,mu,S,target):
-        expected = np.square(mu) + S
-        np.add(target,np.sum(self.variances*expected),target)
-
-    def dpsi0_dtheta(self,Z,mu,S,target):
-        expected = np.square(mu) + S
-        return -2.*np.sum(expected,0)
-
-    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-        np.add(target_mu,2*mu*self.variances,target_mu)
-        np.add(target_S,self.variances,target_S)
-
-    def dpsi0_dZ(self,Z,mu,S,target):
-        pass
-
-    def psi1(self,Z,mu,S,target):
-        """the variance, it does nothing"""
-        self.K(mu,Z,target)
-
-    def dpsi1_dtheta(self,Z,mu,S,target):
-        """the variance, it does nothing"""
-        self.dK_dtheta(mu,Z,target)
-
-    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
-        """Do nothing for S, it does not affect psi1"""
-        np.add(target_mu,Z/self.variances2,target_mu)
-
-    def dpsi1_dZ(self,Z,mu,S,target):
-        self.dK_dX(mu,Z,target)
-
-    def psi2(self,Z,mu,S,target):
-        """Think N,M,M,Q """
-        mu2_S = np.square(mu)+S# N,Q,
-        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-        psi2 = ZZ*np.square(self.variances)*mu2_S
-        np.add(target, psi2.sum(-1),target) # M,M
-
-    def dpsi2_dtheta(self,Z,mu,S,target):
-        mu2_S = np.square(mu)+S# N,Q,
-        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-        target += 2.*ZZ*mu2_S*self.variances
-
-    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
-        """Think N,M,M,Q """
-        mu2_S = np.sum(np.square(mu)+S,0)# Q,
-        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-        tmp = ZZ*np.square(self.variances) # M,M,Q
-        np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
-        np.add(target_S, tmp, target_S) #N,M,M,Q
-
-    def dpsi2_dZ(self,Z,mu,S,target):
-        mu2_S = np.sum(np.square(mu)+S,0)# Q,
-        target += Z[:,None,:]*np.square(self.variances)*mu2_S
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
new file mode 100644
index 00000000..e1c795ed
--- /dev/null
+++ b/doc/tuto_GP_regression.rst
@@ -0,0 +1,105 @@
+
+*************************************
+Gaussian process regression tutorial
+*************************************
+
+We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process model, also known as a kriging model.
+
+We first import the libraries we will need: ::
+
+    import pylab as pb
+    pb.ion()
+    import numpy as np
+    import GPy
+
+1 dimensional model
+===================
+
+For this toy example, we assume we have the following inputs and outputs::
+
+    X = np.random.uniform(-3.,3.,(20,1))
+    Y = np.sin(X) + np.random.randn(20,1)*0.05
+
+Note that the observations Y include some noise.
+
+The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
+
+    Gaussian = GPy.kern.rbf(D=1)
+    noise = GPy.kern.white(D=1)
+    kernel = Gaussian + noise
+
+The parameter D stands for the dimension of the input space. Note that many other kernels are implemented such as:
+
+* linear (``GPy.kern.linear``)
+* exponential kernel (``GPy.kern.exponential``)
+* Matern 3/2 (``GPy.kern.Matern32``)
+* Matern 5/2 (``GPy.kern.Matern52``)
+* spline (``GPy.kern.spline``)
+* and many others...
+
+The inputs required for building the model are the observations and the kernel::
+
+    m = GPy.models.GP_regression(X,Y,kernel)
+
+The functions ``print`` and ``plot`` can help us understand the model we have just build::
+
+    print m
+    m.plot()
+
+The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
+
+* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
+* Run the optimization
+
+There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
+
+    m.constrain_positive('')
+
+but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
+
+    #m.unconstrain('')                            # Required if the model has been previously constrained
+    m.constrain_positive('rbf_variance')
+    m.constrain_bounded('lengthscale',1.,10. )
+    m.constrain_fixed('white',0.0025)
+
+Once the constrains have bee imposed, the model can be optimized::
+
+    m.optimize()
+
+If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
+
+    m.optimize_restarts(Nrestarts = 10)
+    m.plot()
+    print(m)
+
+2 dimensional example
+=====================
+
+Here is a 2 dimensional example::
+
+    import pylab as pb
+    pb.ion()
+    import numpy as np
+    import GPy
+
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(50,2))
+    Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
+
+    # define kernel
+    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+
+    # create simple GP model
+    m = GPy.models.GP_regression(X,Y,ker)
+
+    # contrain all parameters to be positive
+    m.constrain_positive('')
+
+    # optimize and plot
+    pb.figure()
+    m.optimize('tnc', max_f_eval = 1000)
+
+    m.plot()
+    print(m)
+
+The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic).

From 062cd0bc0ad295b652c2fa387f1f5ae23b44d13c Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 29 Jan 2013 17:27:29 +0000
Subject: [PATCH 53/61] added links to readme

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 02d6af0e..1137b985 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 GPy
-
 ===
 
-Gaussian processes framework in python 
\ No newline at end of file
+A Gaussian processes framework in python.
+
+[Online documentation](https://gpy.readthedocs.org/en/latest/)
+[Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
\ No newline at end of file

From f815cb154e56f8577beb9478b1bf02f64673eb35 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Tue, 29 Jan 2013 17:28:04 +0000
Subject: [PATCH 54/61] added links to readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1137b985..c69352b8 100644
--- a/README.md
+++ b/README.md
@@ -3,5 +3,5 @@ GPy
 
 A Gaussian processes framework in python.
 
-[Online documentation](https://gpy.readthedocs.org/en/latest/)
-[Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
\ No newline at end of file
+* [Online documentation](https://gpy.readthedocs.org/en/latest/)
+* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
\ No newline at end of file

From ce3e2e41f4d98aafc37dc02dfdf4800ccf0128b4 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 15:14:54 +0000
Subject: [PATCH 55/61] trying to get psi2 cross terms to work

---
 GPy/kern/kern.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 89c02bc2..393554bf 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -284,6 +284,8 @@ class kern(parameterised):
         # 1. get all the psi1 statistics
         psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
         [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
+        partial1 = np.zeros_like(partial1)
+
         # 2. get all the dpsi1/dtheta gradients
         psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
         [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
@@ -292,7 +294,7 @@ class kern(parameterised):
         for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
             gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0)
 
-            target += 0#(gne[None] + gne[:, None]).sum(0)
+            target += (gne[None] + gne[:, None]).sum(0)
         return target
 
     def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):

From 079a20620a22b2a3b3aa678069224d07194d8dae Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 15:32:19 +0000
Subject: [PATCH 56/61] removed imports from __init__.py

---
 GPy/models/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index ce7a0d0a..9d5b1d00 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -3,7 +3,7 @@
 
 
 from GP_regression import GP_regression
-from sparse_GP_regression import sparse_GP_regression, sgp_debugB, sgp_debugC, sgp_debugE
+from sparse_GP_regression import sparse_GP_regression
 from GPLVM import GPLVM
 from warped_GP import warpedGP
 from GP_EP import GP_EP

From 2493e2d336ea3a93a105565579932c008fdae8f0 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 15:51:36 +0000
Subject: [PATCH 57/61] integrated sparse GP regression and BGPLVM classes

---
 GPy/kern/rbf.py                    |  2 +-
 GPy/models/BGPLVM.py               | 14 ++++++-------
 GPy/models/sparse_GP_regression.py | 32 ++++++++++++++++--------------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 208a9209..a7c52180 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -32,7 +32,7 @@ class rbf(kernpart):
     def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
         self.D = D
         self.ARD = ARD
-        if ARD == False:
+        if not ARD:
             self.Nparam = 2
             self.name = 'rbf'
             if lengthscale is not None:
diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py
index 3fc257e9..05dad318 100644
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@@ -25,12 +25,12 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
         S = np.ones_like(X) * 1e-2# 
         sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
 
-    def get_param_names(self):
+    def _get_param_names(self):
         X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
         S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-        return (X_names + S_names + sparse_GP_regression.get_param_names(self))
+        return (X_names + S_names + sparse_GP_regression._get_param_names(self))
 
-    def get_param(self):
+    def _get_params(self):
         """
         Horizontally stacks the parameters in order to present them to the optimizer.
         The resulting 1-D array has this structure:
@@ -40,13 +40,13 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
         ===============================================================
 
         """
-        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self)))
+        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self)))
 
-    def set_param(self,x):
+    def _set_params(self,x):
         N, Q = self.N, self.Q
         self.X = x[:self.X.size].reshape(N,Q).copy()
         self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
-        sparse_GP_regression.set_param(self, x[(2*N*Q):])
+        sparse_GP_regression._set_params(self, x[(2*N*Q):])
 
     def dL_dmuS(self):
         dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
@@ -57,6 +57,6 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
 
         return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
 
-    def log_likelihood_gradients(self):
+    def _log_likelihood_gradients(self):
         return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
 
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 9d9649ad..07ce4d97 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -107,6 +107,20 @@ class sparse_GP_regression(GP_regression):
         self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
         self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
 
+
+    def _set_params(self, p):
+        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
+        self.beta = p[self.M*self.Q]
+        self.kern._set_params(p[self.Z.size + 1:])
+        self._computations()
+
+    def _get_params(self):
+        return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
+
+    def _get_param_names(self):
+        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
+
+        
     def log_likelihood(self):
         """ Compute the (lower bound on the) log marginal likelihood """
         sf2 = self.scale_factor**2
@@ -116,18 +130,9 @@ class sparse_GP_regression(GP_regression):
         D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
         return A+B+C+D
 
-    def set_param(self, p):
-        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
-        self.beta = p[self.M*self.Q]
-        self.kern.set_param(p[self.Z.size + 1:])
-        self._computations()
-
-    def get_param(self):
-        return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
-
-    def get_param_names(self):
-        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
-
+    def _log_likelihood_gradients(self):
+        return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
+    
     def dL_dbeta(self):
         """
         Compute the gradient of the log likelihood wrt beta.
@@ -172,9 +177,6 @@ class sparse_GP_regression(GP_regression):
             dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
         return dL_dZ
 
-    def log_likelihood_gradients(self):
-        return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
-
     def _raw_predict(self, Xnew, slices, full_cov=False):
         """Internal helper function for making predictions, does not account for normalisation"""
 

From 6cde99c4866adb4df3ce2c61fae4106d4de6d020 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 15:52:49 +0000
Subject: [PATCH 58/61] now skipping FITC test

---
 GPy/testing/unit_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index c5db80bd..a302b25f 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -166,6 +166,7 @@ class GradientTests(unittest.TestCase):
         m.approximate_likelihood()
         self.assertTrue(m.checkgrad())
 
+    @unittest.skip("FITC will be broken for a while")
     def test_generalized_FITC(self):
         N = 20
         X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]

From be5b775729df10645df1d40079c1c82869ff4ce4 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 15:54:47 +0000
Subject: [PATCH 59/61] added BGPLVM demo (not working yet)

---
 GPy/examples/BGPLVM_demo.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 GPy/examples/BGPLVM_demo.py

diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
new file mode 100644
index 00000000..08e994f8
--- /dev/null
+++ b/GPy/examples/BGPLVM_demo.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+import GPy
+np.random.seed(123344)
+
+N = 10
+M = 3
+Q = 2
+D = 3
+#generate GPLVM-like data
+X = np.random.rand(N, Q)
+k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+K = k.K(X)
+Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+
+k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+# k = GPy.kern.rbf_ARD(Q)  + GPy.kern.white(Q, 0.00001)
+m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+m.constrain_positive('(rbf|bias|noise|white|S)')
+# m.constrain_fixed('S', 1)
+
+# pb.figure()
+# m.plot()
+# pb.title('PCA initialisation')
+# pb.figure()
+# m.optimize(messages = 1)
+# m.plot()
+# pb.title('After optimisation')
+m.randomize()
+m.checkgrad(verbose = 1)

From 8a5f075ef033d227ac6e2035db191a77bb7e7166 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 30 Jan 2013 16:27:45 +0000
Subject: [PATCH 60/61] RBF (both ARD and non-ARD) kernels working nicely with
 psi statistics

---
 GPy/examples/BGPLVM_demo.py | 10 +++---
 GPy/kern/rbf.py             | 64 ++++++++++++++++++++++++-------------
 GPy/models/BGPLVM.py        |  2 +-
 3 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
index 08e994f8..02092dbf 100644
--- a/GPy/examples/BGPLVM_demo.py
+++ b/GPy/examples/BGPLVM_demo.py
@@ -7,17 +7,17 @@ import GPy
 np.random.seed(123344)
 
 N = 10
-M = 3
-Q = 2
-D = 3
+M = 5
+Q = 3
+D = 4
 #generate GPLVM-like data
 X = np.random.rand(N, Q)
 k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
 K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T
 
-k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-# k = GPy.kern.rbf_ARD(Q)  + GPy.kern.white(Q, 0.00001)
+# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
 m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
 m.constrain_positive('(rbf|bias|noise|white|S)')
 # m.constrain_fixed('S', 1)
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index a7c52180..ea76e4f5 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -38,7 +38,7 @@ class rbf(kernpart):
             if lengthscale is not None:
                 assert lengthscale.shape == (1,)
             else:
-                lengthscale = np.ones(1)     
+                lengthscale = np.ones(1)
         else:
             self.Nparam = self.D + 1
             self.name = 'rbf_ARD'
@@ -47,7 +47,7 @@ class rbf(kernpart):
             else:
                 lengthscale = np.ones(self.D)
 
-        self._set_params(np.hstack((variance,lengthscale)))        
+        self._set_params(np.hstack((variance,lengthscale)))
 
         #initialize cache
         self._Z, self._mu, self._S = np.empty(shape=(3,1))
@@ -69,7 +69,7 @@ class rbf(kernpart):
         if self.Nparam == 2:
             return ['variance','lengthscale']
         else:
-            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]        
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
 
     def K(self,X,X2,target):
         if X2 is None:
@@ -103,17 +103,10 @@ class rbf(kernpart):
     def dKdiag_dX(self,partial,X,target):
         pass
 
-    def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2)):
-            self._X = X
-            self._X2 = X2
-            if X2 is None: X2 = X
-            self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
-            self._params = np.empty(shape=(1,0))    #ensure the next section gets called
-        if not np.all(self._params == self._get_params()):
-            self._params == self._get_params()
-            self._K_dist2 = np.square(self._K_dist/self.lengthscale)
-            self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
+
+    #---------------------------------------#
+    #             PSI statistics            #
+    #---------------------------------------#
 
     def psi0(self,Z,mu,S,target):
         target += self.variance
@@ -133,7 +126,11 @@ class rbf(kernpart):
         denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
         d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
         target[0] += np.sum(partial*self._psi1/self.variance)
-        target[1] += np.sum(d_length*partial[:,:,None])
+        dpsi1_dlength = d_length*partial[:,:,None]
+        if not self.ARD:
+            target[1] += dpsi1_dlength.sum()
+        else:
+            target[1:] += dpsi1_dlength.sum(0).sum(0)
 
     def dpsi1_dZ(self,partial,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
@@ -149,30 +146,52 @@ class rbf(kernpart):
 
     def psi2(self,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
-        target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
+        target += self._psi2
 
     def dpsi2_dtheta(self,partial,Z,mu,S,target):
         """Shape N,M,M,Ntheta"""
         self._psi_computations(Z,mu,S)
-        d_var = np.sum(2.*self._psi2/self.variance,0)
+        d_var = 2.*self._psi2/self.variance
         d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
         d_length = d_length.sum(0)
         target[0] += np.sum(partial*d_var)
-        target[1] += np.sum(d_length*partial[:,:,None])
+        dpsi2_dlength = d_length*partial[:,:,:,None]
+        if not self.ARD:
+            target[1] += dpsi2_dlength.sum()
+        else:
+            target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
 
     def dpsi2_dZ(self,partial,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
         term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
-        dZ = self._psi2[:,:,:,None] * (term1[None] + term2) 
-        target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
+        dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
+        target += (partial[:,:,:,None]*dZ).sum(0).sum(0) # <----------------- TODO not sure about the first ':' here, should be a None (WAS a none in the debug branch)
+
 
     def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
         """Think N,M,M,Q """
         self._psi_computations(Z,mu,S)
         tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
-        target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
+        target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
+        target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
+
+
+    #---------------------------------------#
+    #            Precomputations            #
+    #---------------------------------------#
+
+    def _K_computations(self,X,X2):
+        if not (np.all(X==self._X) and np.all(X2==self._X2)):
+            self._X = X
+            self._X2 = X2
+            if X2 is None: X2 = X
+            self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
+            self._params = np.empty(shape=(1,0))    #ensure the next section gets called
+        if not np.all(self._params == self._get_params()):
+            self._params == self._get_params()
+            self._K_dist2 = np.square(self._K_dist/self.lengthscale)
+            self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
 
     def _psi_computations(self,Z,mu,S):
         #here are the "statistics" for psi1 and psi2
@@ -202,4 +221,3 @@ class rbf(kernpart):
             self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
 
             self._Z, self._mu, self._S = Z, mu,S
-
diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py
index 05dad318..db147944 100644
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@@ -58,5 +58,5 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
         return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
 
     def _log_likelihood_gradients(self):
-        return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
+        return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
 

From 0224e700f403aa875dd298ed66506c16ae0756ea Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Thu, 31 Jan 2013 09:54:27 +0000
Subject: [PATCH 61/61] first broken port of the psi stats to the linear kernel

---
 GPy/examples/BGPLVM_demo.py |  2 +-
 GPy/kern/linear.py          | 75 +++++++++++++++++++++----------------
 2 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
index 02092dbf..a5912462 100644
--- a/GPy/examples/BGPLVM_demo.py
+++ b/GPy/examples/BGPLVM_demo.py
@@ -17,7 +17,7 @@ K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T
 
 # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
+k = GPy.kern.linear(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
 m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
 m.constrain_positive('(rbf|bias|noise|white|S)')
 # m.constrain_fixed('S', 1)
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 4aaaa60d..89e87314 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -47,6 +47,7 @@ class linear(kernpart):
     def _set_params(self,x):
         assert x.size==(self.Nparam)
         self.variances = x
+        self.variances2 = np.square(self.variances)
 
     def _get_param_names(self):
         if self.Nparam == 1:
@@ -63,17 +64,6 @@ class linear(kernpart):
             self._K_computations(X, X2)
             target += self.variances * self._dot_product
 
-    def _K_computations(self,X,X2):
-        if X2 is None:
-            X2 = X
-        if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
-            self._Xcache = X
-            self._X2cache = X2
-            self._dot_product = np.dot(X,X2.T)
-        else:
-            # print "Cache hit!"
-            pass # TODO: insert debug message here (logging framework)
-
     def Kdiag(self,X,target):
         np.add(target,np.sum(self.variances*np.square(X),-1),target)
 
@@ -88,17 +78,21 @@ class linear(kernpart):
     def dK_dX(self,partial,X,X2,target):
         target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
 
+    #---------------------------------------#
+    #             PSI statistics            #
+    #---------------------------------------#
+
     def psi0(self,Z,mu,S,target):
         expected = np.square(mu) + S
         target += np.sum(self.variances*expected)
 
-    def dpsi0_dtheta(self,Z,mu,S,target):
+    def dpsi0_dtheta(self,partial,Z,mu,S,target):
         expected = np.square(mu) + S
-        return -2.*np.sum(expected,0)
+        target += (partial[:, None] * (-2.*np.sum(expected,0))).sum()
 
-    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-        np.add(target_mu,2*mu*self.variances,target_mu)
-        np.add(target_S,self.variances,target_S)
+    def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S):
+        target_mu += partial[:, None] * (2*mu*self.variances)
+        target_S += partial[:, None] * self.variances
 
     def dpsi0_dZ(self,Z,mu,S,target):
         pass
@@ -107,37 +101,54 @@ class linear(kernpart):
         """the variance, it does nothing"""
         self.K(mu,Z,target)
 
-    def dpsi1_dtheta(self,Z,mu,S,target):
+    def dpsi1_dtheta(self,partial,Z,mu,S,target):
         """the variance, it does nothing"""
-        self.dK_dtheta(mu,Z,target)
+        self.dK_dtheta(partial,mu,Z,target)
 
-    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
         """Do nothing for S, it does not affect psi1"""
-        np.add(target_mu,Z/self.variances2,target_mu)
+        target_mu += (partial.T[:,:, None]*(Z/self.variances)).sum(1)
 
-    def dpsi1_dZ(self,Z,mu,S,target):
-        self.dK_dX(mu,Z,target)
+    def dpsi1_dZ(self,partial,Z,mu,S,target):
+        self.dK_dX(partial.T,Z,mu,target)
 
     def psi2(self,Z,mu,S,target):
-        """Think N,M,M,Q """
+        """
+        returns N,M,M matrix
+        """
         mu2_S = np.square(mu)+S# N,Q,
         ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-        psi2 = ZZ*np.square(self.variances)*mu2_S
-        np.add(target, psi2.sum(-1),target) # M,M
+        psi2 = ZZ*np.square(self.variances)*mu2_S[:, None, None, :]
+        target += psi2.sum(-1)
 
-    def dpsi2_dtheta(self,Z,mu,S,target):
+    def dpsi2_dtheta(self,partial,Z,mu,S,target):
         mu2_S = np.square(mu)+S# N,Q,
         ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-        target += 2.*ZZ*mu2_S*self.variances
+        target += (partial[:,:,:,None]*(2.*ZZ*mu2_S[:,None,None,:]*self.variances)).sum()
 
-    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
         """Think N,M,M,Q """
         mu2_S = np.sum(np.square(mu)+S,0)# Q,
         ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
         tmp = ZZ*np.square(self.variances) # M,M,Q
-        np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
-        np.add(target_S, tmp, target_S) #N,M,M,Q
+        target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
+        target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1)
 
-    def dpsi2_dZ(self,Z,mu,S,target):
+    def dpsi2_dZ(self,partial,Z,mu,S,target):
         mu2_S = np.sum(np.square(mu)+S,0)# Q,
-        target += Z[:,None,:]*np.square(self.variances)*mu2_S
+        target += (partial[:,:,:,None]* (Z * mu2_S * np.square(self.variances))).sum(0).sum(0)
+
+    #---------------------------------------#
+    #            Precomputations            #
+    #---------------------------------------#
+
+    def _K_computations(self,X,X2):
+        if X2 is None:
+            X2 = X
+        if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
+            self._Xcache = X
+            self._X2cache = X2
+            self._dot_product = np.dot(X,X2.T)
+        else:
+            # print "Cache hit!"
+            pass # TODO: insert debug message here (logging framework)