From d9f895603b437f607db062fe5abf48708e81ea17 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Fri, 20 Sep 2013 14:20:58 +0100
Subject: [PATCH 01/17] Add eq_ode1 covariance.

---
 GPy/kern/constructors.py        | 45 ++++++++++++++++++++-----
 GPy/kern/parts/__init__.py      |  3 +-
 GPy/kern/parts/coregionalize.py | 58 ++++++++++++++++-----------------
 3 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 8a334278..f9d7a99a 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -140,6 +140,33 @@ def white(input_dim,variance=1.):
     part = parts.white.White(input_dim,variance)
     return kern(input_dim, [part])
 
+def eq_ode1(output_dim, W=None, rank=1,  kappa=None, length_scale=1., decay=None, delay=None):
+    """Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
+
+    This outputs of this kernel have the form
+    .. math::
+       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+
+    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
+    
+    :param output_dim: number of outputs driven by latent function.
+    :type output_dim: int
+    :param W: sensitivities of each output to the latent driving function. 
+    :type W: ndarray (output_dim x rank).
+    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
+    :type rank: int
+    :param decay: decay rates for the first order system. 
+    :type decay: array of length output_dim.
+    :param delay: delay between latent force and output response.
+    :type delay: array of length output_dim.
+    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
+    :type kappa: array of length output_dim.
+    
+    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
+    """
+    part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
+    return kern(2, [part])
+
 
 def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
     """
@@ -346,7 +373,7 @@ def symmetric(k):
     k_.parts = [symmetric.Symmetric(p) for p in k.parts]
     return k_
 
-def coregionalize(num_outputs,W_columns=1, W=None, kappa=None):
+def coregionalize(output_dim,W_columns=1, W=None, kappa=None):
     """
     Coregionlization matrix B, of the form:
     .. math::
@@ -358,18 +385,18 @@ def coregionalize(num_outputs,W_columns=1, W=None, kappa=None):
 
     it is obtainded as the tensor product between a kernel k(x,y) and B.
 
-    :param num_outputs: the number of outputs to corregionalize
-    :type num_outputs: int
+    :param output_dim: the number of outputs to corregionalize
+    :type output_dim: int
     :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
     :type W_colunns: int
     :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
     :type W: numpy array of dimensionality (num_outpus, W_columns)
     :param kappa: a vector which allows the outputs to behave independently
-    :type kappa: numpy array of dimensionality  (num_outputs,)
+    :type kappa: numpy array of dimensionality  (output_dim,)
     :rtype: kernel object
 
     """
-    p = parts.coregionalize.Coregionalize(num_outputs,W_columns,W,kappa)
+    p = parts.coregionalize.Coregionalize(output_dim,W_columns,W,kappa)
     return kern(1,[p])
 
 
@@ -429,12 +456,12 @@ def hierarchical(k):
     _parts = [parts.hierarchical.Hierarchical(k.parts)]
     return kern(k.input_dim+len(k.parts),_parts)
 
-def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None):
+def build_lcm(input_dim, output_dim, kernel_list = [], W_columns=1,W=None,kappa=None):
     """
     Builds a kernel of a linear coregionalization model
 
     :input_dim: Input dimensionality
-    :num_outputs: Number of outputs
+    :output_dim: Number of outputs
     :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
     :type kernel_list: list of GPy kernels
     :param W_columns: number tuples of the corregionalization parameters 'coregion_W'
@@ -448,11 +475,11 @@ def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa
             k.input_dim = input_dim
             warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
 
-    k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
+    k_coreg = coregionalize(output_dim,W_columns,W,kappa)
     kernel = kernel_list[0]**k_coreg.copy()
 
     for k in kernel_list[1:]:
-        k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
+        k_coreg = coregionalize(output_dim,W_columns,W,kappa)
         kernel += k**k_coreg.copy()
 
     return kernel
diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index 643483f5..fff24d04 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -2,10 +2,11 @@ import bias
 import Brownian
 import coregionalize
 import exponential
+import eq_ode1
 import finite_dimensional
 import fixed
 import gibbs
-#import hetero #hetero.py is not commited: omitting for now. JH. 
+import hetero #hetero.py is not commited: omitting for now. JH. 
 import hierarchical
 import independent_outputs
 import linear
diff --git a/GPy/kern/parts/coregionalize.py b/GPy/kern/parts/coregionalize.py
index 4dc8d909..8e960038 100644
--- a/GPy/kern/parts/coregionalize.py
+++ b/GPy/kern/parts/coregionalize.py
@@ -13,7 +13,7 @@ class Coregionalize(Kernpart):
 
     This covariance has the form
     .. math::
-       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
+       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
 
     An intrinsic/linear coregionalization covariance function of the form
     .. math::
@@ -22,33 +22,33 @@ class Coregionalize(Kernpart):
     it is obtained as the tensor product between a covariance function
     k(x,y) and B.
 
-    :param num_outputs: number of outputs to coregionalize
-    :type num_outputs: int
+    :param output_dim: number of outputs to coregionalize
+    :type output_dim: int
     :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
     :type W_colunns: int
     :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
     :type W: numpy array of dimensionality (num_outpus, W_columns)
     :param kappa: a vector which allows the outputs to behave independently
-    :type kappa: numpy array of dimensionality  (num_outputs,)
+    :type kappa: numpy array of dimensionality  (output_dim,)
 
     .. Note: see coregionalization examples in GPy.examples.regression for some usage.
     """
-    def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
+    def __init__(self, output_dim, rank=1, W=None, kappa=None):
         self.input_dim = 1
         self.name = 'coregion'
-        self.num_outputs = num_outputs
-        self.W_columns = W_columns
+        self.output_dim = output_dim
+        self.rank = rank
         if W is None:
-            self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns)
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
         else:
-            assert W.shape==(self.num_outputs,self.W_columns)
+            assert W.shape==(self.output_dim,self.rank)
             self.W = W
         if kappa is None:
-            kappa = 0.5*np.ones(self.num_outputs)
+            kappa = 0.5*np.ones(self.output_dim)
         else:
-            assert kappa.shape==(self.num_outputs,)
+            assert kappa.shape==(self.output_dim,)
         self.kappa = kappa
-        self.num_params = self.num_outputs*(self.W_columns + 1)
+        self.num_params = self.output_dim*(self.rank + 1)
         self._set_params(np.hstack([self.W.flatten(),self.kappa]))
 
     def _get_params(self):
@@ -56,12 +56,12 @@ class Coregionalize(Kernpart):
 
     def _set_params(self,x):
         assert x.size == self.num_params
-        self.kappa = x[-self.num_outputs:]
-        self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns)
+        self.kappa = x[-self.output_dim:]
+        self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
         self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
 
     def _get_param_names(self):
-        return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)]
+        return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
 
     def K(self,index,index2,target):
         index = np.asarray(index,dtype=np.int)
@@ -79,26 +79,26 @@ class Coregionalize(Kernpart):
         if index2 is None:
             code="""
             for(int i=0;i<N; i++){
-              target[i+i*N] += B[index[i]+num_outputs*index[i]];
+              target[i+i*N] += B[index[i]+output_dim*index[i]];
               for(int j=0; j<i; j++){
-                  target[j+i*N] += B[index[i]+num_outputs*index[j]];
+                  target[j+i*N] += B[index[i]+output_dim*index[j]];
                   target[i+j*N] += target[j+i*N];
                 }
               }
             """
-            N,B,num_outputs = index.size, self.B, self.num_outputs
-            weave.inline(code,['target','index','N','B','num_outputs'])
+            N,B,output_dim = index.size, self.B, self.output_dim
+            weave.inline(code,['target','index','N','B','output_dim'])
         else:
             index2 = np.asarray(index2,dtype=np.int)
             code="""
             for(int i=0;i<num_inducing; i++){
               for(int j=0; j<N; j++){
-                  target[i+j*num_inducing] += B[num_outputs*index[j]+index2[i]];
+                  target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
                 }
               }
             """
-            N,num_inducing,B,num_outputs = index.size,index2.size, self.B, self.num_outputs
-            weave.inline(code,['target','index','index2','N','num_inducing','B','num_outputs'])
+            N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
+            weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
 
 
     def Kdiag(self,index,target):
@@ -115,12 +115,12 @@ class Coregionalize(Kernpart):
         code="""
         for(int i=0; i<num_inducing; i++){
           for(int j=0; j<N; j++){
-            dL_dK_small[index[j] + num_outputs*index2[i]] += dL_dK[i+j*num_inducing];
+            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
           }
         }
         """
-        N, num_inducing, num_outputs = index.size, index2.size, self.num_outputs
-        weave.inline(code, ['N','num_inducing','num_outputs','dL_dK','dL_dK_small','index','index2'])
+        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
+        weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
@@ -137,8 +137,8 @@ class Coregionalize(Kernpart):
         ii,jj = ii.T, jj.T
 
         dL_dK_small = np.zeros_like(self.B)
-        for i in range(self.num_outputs):
-            for j in range(self.num_outputs):
+        for i in range(self.output_dim):
+            for j in range(self.output_dim):
                 tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
                 dL_dK_small[i,j] = tmp
 
@@ -150,8 +150,8 @@ class Coregionalize(Kernpart):
 
     def dKdiag_dtheta(self,dL_dKdiag,index,target):
         index = np.asarray(index,dtype=np.int).flatten()
-        dL_dKdiag_small = np.zeros(self.num_outputs)
-        for i in range(self.num_outputs):
+        dL_dKdiag_small = np.zeros(self.output_dim)
+        for i in range(self.output_dim):
             dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
         dW = 2.*self.W*dL_dKdiag_small[:,None]
         dkappa = dL_dKdiag_small

From 7f2472fa22acfa52329c9054f95286623de1b493 Mon Sep 17 00:00:00 2001
From: James McMurray <jamesmcm@Hostname>
Date: Fri, 20 Sep 2013 17:43:56 +0100
Subject: [PATCH 02/17] Fixed more errors in docs

---
 doc/tuto_interacting_with_models.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 342fb24f..5bd0511e 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -20,7 +20,7 @@ All of the examples included in GPy return an instance
 of a model class, and therefore they can be called in 
 the following way: ::
 
-	import numpy as np
+    import numpy as np
     import pylab as pb
     pb.ion()
     import GPy

From aa5ebcbd8fc18f71f161bb1d576c040e4d01ebf0 Mon Sep 17 00:00:00 2001
From: James McMurray <jamesmcm@Hostname>
Date: Fri, 20 Sep 2013 17:46:23 +0100
Subject: [PATCH 03/17] Fixed more errors in docs 2

---
 GPy/core/gp.py                | 20 +++++++++----------
 GPy/core/model.py             |  6 ++++--
 GPy/core/sparse_gp.py         |  3 +--
 GPy/inference/sgd.py          |  9 ++++-----
 GPy/kern/constructors.py      | 37 ++++++++++++++++++-----------------
 GPy/kern/kern.py              |  3 ++-
 GPy/likelihoods/ep.py         |  2 ++
 GPy/likelihoods/likelihood.py | 18 +++++++++--------
 GPy/models/bayesian_gplvm.py  |  7 ++++---
 GPy/util/datasets.py          | 13 +++++++++---
 GPy/util/linalg.py            |  2 +-
 GPy/util/misc.py              | 10 ++++------
 GPy/util/mocap.py             | 13 ++++++------
 GPy/util/visualize.py         | 13 +++++++-----
 GPy/util/warping_functions.py | 27 +++++++++++++------------
 15 files changed, 99 insertions(+), 84 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 2d826ac2..a3ef6c80 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -15,7 +15,7 @@ class GP(GPBase):
 
     :param X: input observations
     :param kernel: a GPy kernel, defaults to rbf+white
-    :parm likelihood: a GPy likelihood
+    :param likelihood: a GPy likelihood
     :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_X: False|True
     :rtype: model object
@@ -132,17 +132,16 @@ class GP(GPBase):
     def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
         """
         Predict the function(s) at the new point(s) Xnew.
-        Arguments
-        ---------
+
         :param Xnew: The points at which to make a prediction
         :type Xnew: np.ndarray, Nnew x self.input_dim
         :param which_parts:  specifies which outputs kernel(s) to use in prediction
         :type which_parts: ('all', list of bools)
         :param full_cov: whether to return the full covariance matrix, or just the diagonal
         :type full_cov: bool
-        :rtype: posterior mean,  a Numpy array, Nnew x self.input_dim
-        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
-        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
+        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
+        :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
 
 
            If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
@@ -160,8 +159,7 @@ class GP(GPBase):
     def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
         """
         For a specific output, predict the function at the new point(s) Xnew.
-        Arguments
-        ---------
+
         :param Xnew: The points at which to make a prediction
         :type Xnew: np.ndarray, Nnew x self.input_dim
         :param output: output to predict
@@ -170,9 +168,9 @@ class GP(GPBase):
         :type which_parts: ('all', list of bools)
         :param full_cov: whether to return the full covariance matrix, or just the diagonal
         :type full_cov: bool
-        :rtype: posterior mean,  a Numpy array, Nnew x self.input_dim
-        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
-        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
+        :returns: posterior mean,  a Numpy array, Nnew x self.input_dim
+        :returns: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
 
         .. Note:: For multiple output models only
         """
diff --git a/GPy/core/model.py b/GPy/core/model.py
index cb13378c..7aff8f4d 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -47,6 +47,7 @@ class Model(Parameterized):
 
         :param state: the state of the model.
         :type state: list as returned from getstate.
+
         """
         self.preferred_optimizer = state.pop()
         self.sampling_runs = state.pop()
@@ -543,10 +544,11 @@ class Model(Parameterized):
         """
         EM - like algorithm  for Expectation Propagation and Laplace approximation
 
-        :stop_crit: convergence criterion
+        :param stop_crit: convergence criterion
         :type stop_crit: float
 
-        ..Note: kwargs are passed to update_likelihood and optimize functions.        """
+        .. Note: kwargs are passed to update_likelihood and optimize functions. 
+        """
         assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
         ll_change = stop_crit + 1.
         iteration = 0
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index f6a7b885..88bd36e6 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -393,8 +393,7 @@ class SparseGP(GPBase):
     def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
         """
         For a specific output, predict the function at the new point(s) Xnew.
-        Arguments
-        ---------
+
         :param Xnew: The points at which to make a prediction
         :type Xnew: np.ndarray, Nnew x self.input_dim
         :param output: output to predict
diff --git a/GPy/inference/sgd.py b/GPy/inference/sgd.py
index e443f45a..5cd144e8 100644
--- a/GPy/inference/sgd.py
+++ b/GPy/inference/sgd.py
@@ -10,11 +10,10 @@ class opt_SGD(Optimizer):
     """
     Optimize using stochastic gradient descent.
 
-    *** Parameters ***
-    Model: reference to the Model object
-    iterations: number of iterations
-    learning_rate: learning rate
-    momentum: momentum
+    :param Model: reference to the Model object
+    :param iterations: number of iterations
+    :param learning_rate: learning rate
+    :param momentum: momentum
 
     """
 
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 28066413..ddaf8d54 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -80,29 +80,30 @@ def gibbs(input_dim,variance=1., mapping=None):
 
     .. math::
 
-       r = sqrt((x_i - x_j)'*(x_i - x_j))
+       r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
 
-       k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
+       k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
 
-       Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
+       Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
 
-       where :math:`l(x)` is a function giving the length scale as a function of space.
-       This is the non stationary kernel proposed by Mark Gibbs in his 1997
-        thesis. It is similar to an RBF but has a length scale that varies
-        with input location. This leads to an additional term in front of
-        the kernel.
+    Where :math:`l(x)` is a function giving the length scale as a function of space.
 
-        The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
+    This is the non stationary kernel proposed by Mark Gibbs in his 1997
+    thesis. It is similar to an RBF but has a length scale that varies
+    with input location. This leads to an additional term in front of
+    the kernel.
 
-        :param input_dim: the number of input dimensions
-        :type input_dim: int
-        :param variance: the variance :math:`\sigma^2`
-        :type variance: float
-        :param mapping: the mapping that gives the lengthscale across the input space.
-        :type mapping: GPy.core.Mapping
-        :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
-        :type ARD: Boolean
-        :rtype: Kernpart object
+    The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
+
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param variance: the variance :math:`\\sigma^2`
+    :type variance: float
+    :param mapping: the mapping that gives the lengthscale across the input space.
+    :type mapping: GPy.core.Mapping
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
+    :type ARD: Boolean
+    :rtype: Kernpart object
 
     """
     part = parts.gibbs.Gibbs(input_dim,variance,mapping)
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 6a72ac8d..9e930417 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -222,7 +222,8 @@ class kern(Parameterized):
 
     def prod(self, other, tensor=False):
         """
-        multiply two kernels (either on the same space, or on the tensor product of the input space).
+        Multiply two kernels (either on the same space, or on the tensor product of the input space).
+
         :param other: the other kernel to be added
         :type other: GPy.kern
         :param tensor: whether or not to use the tensor space (default is false).
diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py
index 67bb79fc..8fdf423f 100644
--- a/GPy/likelihoods/ep.py
+++ b/GPy/likelihoods/ep.py
@@ -95,6 +95,7 @@ class EP(likelihood):
         :type epsilon: float
         :param power_ep: Power EP parameters
         :type power_ep: list of floats
+
         """
         self.epsilon = epsilon
         self.eta, self.delta = power_ep
@@ -165,6 +166,7 @@ class EP(likelihood):
         :type epsilon: float
         :param power_ep: Power EP parameters
         :type power_ep: list of floats
+
         """
         self.epsilon = epsilon
         self.eta, self.delta = power_ep
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index cda62bfc..ca187305 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -10,14 +10,16 @@ class likelihood(Parameterized):
     (Gaussian) inherits directly from this, as does the EP algorithm
 
     Some things must be defined for this to work properly:
-    self.Y : the effective Gaussian target of the GP
-    self.N, self.D : Y.shape
-    self.covariance_matrix : the effective (noise) covariance of the GP targets
-    self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
-    self.is_heteroscedastic : enables significant computational savings in GP
-    self.precision : a scalar or vector representation of the effective target precision
-    self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
-    self.V : self.precision * self.Y
+
+        - self.Y : the effective Gaussian target of the GP
+        - self.N, self.D : Y.shape
+        - self.covariance_matrix : the effective (noise) covariance of the GP targets
+        - self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
+        - self.is_heteroscedastic : enables significant computational savings in GP
+        - self.precision : a scalar or vector representation of the effective target precision
+        - self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
+        - self.V : self.precision * self.Y
+
     """
     def __init__(self):
         Parameterized.__init__(self)
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index e094d915..d4d29711 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -245,12 +245,13 @@ class BayesianGPLVM(SparseGP, GPLVM):
         """
         Plot latent space X in 1D:
 
-            -if fig is given, create input_dim subplots in fig and plot in these
-            -if ax is given plot input_dim 1D latent space plots of X into each `axis`
-            -if neither fig nor ax is given create a figure with fignum and plot in there
+            - if fig is given, create input_dim subplots in fig and plot in these
+            - if ax is given plot input_dim 1D latent space plots of X into each `axis`
+            - if neither fig nor ax is given create a figure with fignum and plot in there
 
         colors:
             colors of different latent space dimensions input_dim
+
         """
         import pylab
         if ax is None:
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 8afd1470..1164d7e6 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -524,11 +524,14 @@ def simulation_BGPLVM():
             'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
 
 def toy_rbf_1d(seed=default_seed, num_samples=500):
-    """Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1.
+    """
+    Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1.
+
     :param seed: seed to use for random sampling.
     :type seed: int
     :param num_samples: number of samples to sample in the function (default 500).
     :type num_samples: int
+
     """
     np.random.seed(seed=seed)
     num_in = 1
@@ -631,11 +634,15 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
 
 
 def crescent_data(num_data=200, seed=default_seed):
-    """Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
+    """
+Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
+
     :param num_data_part: number of data to be sampled (default is 200).
     :type num_data: int
     :param seed: random seed to be used for data generation.
-    :type seed: int"""
+    :type seed: int
+
+    """
     np.random.seed(seed=seed)
     sqrt2 = np.sqrt(2)
     # Rotation matrix
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 3415c198..4e7f7fff 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -123,7 +123,7 @@ def jitchol(A, maxtries=5):
 
 def jitchol_old(A, maxtries=5):
     """
-    :param A : An almost pd square matrix
+    :param A: An almost pd square matrix
 
     :rval L: the Cholesky decomposition of A
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 29d69848..72edf99f 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -17,12 +17,9 @@ def linear_grid(D, n = 100, min_max = (-100, 100)):
     """
     Creates a D-dimensional grid of n linearly spaced points
 
-    Parameters:
-
-    D:        dimension of the grid
-    n:        number of points
-    min_max:  (min, max) list
-
+    :param D: dimension of the grid
+    :param n: number of points
+    :param min_max: (min, max) list
 
     """
 
@@ -39,6 +36,7 @@ def kmm_init(X, m = 10):
 
     :param X: data
     :param m: number of inducing points
+
     """
 
     # compute the distances
diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 1446512d..78f00955 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -120,13 +120,14 @@ class tree:
 def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
 
     """
+
     Compute the rotation matrix for an angle in each direction.
     This is a helper function for computing the rotation matrix for a given set of angles in a given order.
 
-     :param xangle: rotation for x-axis.
-     :param yangle: rotation for y-axis.
-     :param zangle: rotation for z-axis.
-     :param order: the order for the rotations.
+    :param xangle: rotation for x-axis.
+    :param yangle: rotation for y-axis.
+    :param zangle: rotation for z-axis.
+    :param order: the order for the rotations.
 
      """
     if degrees:
@@ -309,10 +310,8 @@ class acclaim_skeleton(skeleton):
 
         """
         Loads an ASF file into a skeleton structure.
-        loads skeleton structure from an acclaim skeleton file.
 
-         :param file_name: the file name to load in.
-         :rval skel: the skeleton for the file. - TODO isn't returning this?
+        :param file_name: The file name to load in.
 
          """         
 
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 4c3dbe2b..7a519555 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -502,11 +502,14 @@ def data_play(Y, visualizer, frame_rate=30):
 
     This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
     
-    data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
-    Y = data['Y']
-    Y[:, 0:3] = 0.   # Make figure walk in place
-    visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel'])
-    GPy.util.visualize.data_play(Y, visualize)
+    .. code-block:: python
+
+       data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
+       Y = data['Y']
+       Y[:, 0:3] = 0.   # Make figure walk in place
+       visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel'])
+       GPy.util.visualize.data_play(Y, visualize)
+
     """
     
 
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index f36805a9..e05f39af 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -53,9 +53,11 @@ class TanhWarpingFunction(WarpingFunction):
         self.num_parameters = 3 * self.n_terms
 
     def f(self,y,psi):
-        """transform y with f using parameter vector psi
+        """
+        transform y with f using parameter vector psi
         psi = [[a,b,c]]
-        f = \sum_{terms} a * tanh(b*(y+c))
+        ::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
+
         """
 
         #1. check that number of params is consistent
@@ -77,8 +79,7 @@ class TanhWarpingFunction(WarpingFunction):
         """
         calculate the numerical inverse of f
 
-        == input ==
-        iterations: number of N.R. iterations
+        :param iterations: number of N.R. iterations
 
         """
 
@@ -165,9 +166,11 @@ class TanhWarpingFunction_d(WarpingFunction):
         self.num_parameters = 3 * self.n_terms + 1
 
     def f(self,y,psi):
-        """transform y with f using parameter vector psi
+        """
+        Transform y with f using parameter vector psi
         psi = [[a,b,c]]
-        f = \sum_{terms} a * tanh(b*(y+c))
+
+        :math:`f = \\sum_{terms} a * tanh(b*(y+c))`
         """
 
         #1. check that number of params is consistent
@@ -189,8 +192,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         """
         calculate the numerical inverse of f
 
-        == input ==
-        iterations: number of N.R. iterations
+        :param max_iterations: maximum number of N.R. iterations
 
         """
 
@@ -214,12 +216,13 @@ class TanhWarpingFunction_d(WarpingFunction):
     def fgrad_y(self, y, psi, return_precalc = False):
         """
         gradient of f w.r.t to y ([N x 1])
-        returns: Nx1 vector of derivatives, unless return_precalc is true,
-        then it also returns the precomputed stuff
+
+        :returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
+
         """
 
 
-        mpsi = psi.copy()
+        mpsi = psi.coSpy()
         d = psi[-1]
         mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
 
@@ -242,7 +245,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         """
         gradient of f w.r.t to y and psi
 
-        returns: NxIx4 tensor of partial derivatives
+        :returns: NxIx4 tensor of partial derivatives
 
         """
 

From 469cbd815d88463fe67d485a8d2e3d208692c28e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 20 Sep 2013 18:15:45 +0100
Subject: [PATCH 04/17] adding extra tests for bgplvm

---
 GPy/testing/bgplvm_tests.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index 6b91d999..a8777e11 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -55,7 +55,18 @@ class BGPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
-    #@unittest.skip('psi2 cross terms are NotImplemented for this combination')
+    def test_rbf_line_kern(self):
+        N, num_inducing, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) +  GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
     def test_linear_bias_kern(self):
         N, num_inducing, input_dim, D = 30, 5, 4, 30
         X = np.random.rand(N, input_dim)

From a6c89b58f082534fe285faca676148c70bd73136 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 23 Sep 2013 10:59:57 +0100
Subject: [PATCH 05/17] missing file

---
 GPy/kern/kern.py          |   3 +-
 GPy/kern/parts/eq_ode1.py | 471 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 473 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/parts/eq_ode1.py

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 9e930417..cf36e16c 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+import sys
 import numpy as np
 import pylab as pb
 from ..core.parameterized import Parameterized
@@ -577,7 +578,7 @@ class Kern_check_model(Model):
 
     def is_positive_definite(self):
         v = np.linalg.eig(self.kernel.K(self.X))[0]
-        if any(v<-1e-6):
+        if any(v<-sys.float_info.epsilon):
             return False
         else:
             return True
diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/parts/eq_ode1.py
new file mode 100644
index 00000000..1e5345fe
--- /dev/null
+++ b/GPy/kern/parts/eq_ode1.py
@@ -0,0 +1,471 @@
+# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+from GPy.util.ln_diff_erfs import ln_diff_erfs
+import pdb
+from scipy import weave
+
+class Eq_ode1(Kernpart):
+    """
+    Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
+
+    This outputs of this kernel have the form
+    .. math::
+       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+
+    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
+    
+    :param output_dim: number of outputs driven by latent function.
+    :type output_dim: int
+    :param W: sensitivities of each output to the latent driving function. 
+    :type W: ndarray (output_dim x rank).
+    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
+    :type rank: int
+    :param decay: decay rates for the first order system. 
+    :type decay: array of length output_dim.
+    :param delay: delay between latent force and output response.
+    :type delay: array of length output_dim.
+    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
+    :type kappa: array of length output_dim.
+    
+    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
+    """
+    def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0,  decay=None, delay=None):
+        self.rank = rank
+        self.input_dim = 1
+        self.name = 'eq_ode1'
+        self.output_dim = output_dim
+        self.lengthscale = lengthscale
+        self.num_params = self.output_dim*(1. + self.rank) + 1
+        if kappa is not None:
+            self.num_params+=self.output_dim
+        if delay is not None:
+            self.num_params+=self.output_dim
+        self.rank = rank
+        if W is None:
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
+        else:
+            assert W.shape==(self.output_dim,self.rank)
+            self.W = W
+        if decay is None:
+            self.decay = np.ones(self.output_dim)
+        if kappa is not None:
+            assert kappa.shape==(self.output_dim,)
+        self.kappa = kappa
+
+        if delay is not None:
+            assert delay.shape==(self.output_dim,)
+        self.delay = delay
+        self.is_normalized = True
+        self.is_stationary = False
+        self._set_params(self._get_params())
+
+    def _get_params(self):
+        param_list = [self.W.flatten()]
+        if self.kappa is not None:
+            param_list.append(self.kappa)
+        param_list.append(self.decay)
+        if self.delay is not None:
+            param_list.append(self.delay)
+        param_list.append(self.lengthscale)
+        return np.hstack(param_list)
+
+    def _set_params(self,x):
+        assert x.size == self.num_params
+        end = self.output_dim*self.rank
+        self.W = x[:end].reshape(self.output_dim,self.rank)
+        start = end
+        self.B = np.dot(self.W,self.W.T)
+        if self.kappa is not None:
+            end+=self.output_dim
+            self.kappa = x[start:end]
+            self.B += np.diag(self.kappa)
+            start=end
+        end+=self.output_dim
+        self.decay = x[start:end]
+        start=end
+        if self.delay is not None:
+            end+=self.output_dim
+            self.delay = x[start:end]
+            start=end
+        end+=1
+        self.lengthscale = x[start]
+        self.sigma = np.sqrt(2)*self.lengthscale
+
+
+    def _get_param_names(self):
+        param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
+        if self.kappa is not None:
+            param_names += ['kappa_%i'%i for i in range(self.output_dim)]
+        param_names += ['decay_%i'%i for i in range(self.output_dim)]
+        if self.delay is not None:
+            param_names += ['delay_%i'%i for i in range(self.output_dim)]
+        param_names+= ['lengthscale'] 
+        return param_names
+
+    def K(self,X,X2,target):
+        
+        if X.shape[1] > 2:
+            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+
+        self._K_computations(X, X2)
+        target += self._scales*self._dK_dvar
+
+        if self.gaussian_initial:
+            # Add covariance associated with initial condition.
+            t1_mat = self._t[self._rorder, None]
+            t2_mat = self._t2[None, self._rorder2]
+            target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
+
+
+
+    def Kdiag(self,index,target):
+        #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
+        pass
+    
+    def dK_dtheta(self,dL_dK,index,index2,target):
+        pass
+
+    def dKdiag_dtheta(self,dL_dKdiag,index,target):
+        pass
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        pass
+
+    def _extract_t_indices(self, X, X2=None):
+        """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
+
+        # TODO: some fast checking here to see if this needs recomputing?
+        self._t = X[:, 0]
+        if X.shape[1]==1:
+            # No index passed, assume single output of ode model.
+            self._index = np.ones_like(X[:, 1],dtype=np.int)
+        self._index = np.asarray(X[:, 1],dtype=np.int)
+        # Sort indices so that outputs are in blocks for computational
+        # convenience.
+        self._order = self._index.argsort()
+        self._index = self._index[self._order]
+        self._t = self._t[self._order]
+        self._rorder = self._order.argsort() # rorder is for reversing the order
+        
+        if X2 is None:
+            self._t2 = None
+            self._index2 = None
+            self._rorder2 = self._rorder
+        else:
+            if X2.shape[1] > 2:
+                raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+            self._t2 = X2[:, 0]
+            if X.shape[1]==1:
+                # No index passed, assume single output of ode model.
+                self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
+            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
+            self._order2 = self._index2.argsort()
+            slef._index2 = self._index2[self._order2]
+            self._t2 = self._t2[self._order2]
+            self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
+
+    def _K_computations(self, X, X2):
+        """Perform main body of computations for the ode1 covariance function."""
+        # First extract times and indices.
+        self._extract_t_indices(X, X2)
+
+        self._K_compute_eq()
+        self._K_compute_ode_eq()
+        if X2 is None:
+            self._K_eq_ode = self._K_ode_eq.T
+        else:
+            self._K_compute_ode_eq(transpose=True)
+        self._K_compute_ode()
+        
+        # Reorder values of blocks for placing back into _K_dvar.
+        self._K_dvar[self._rorder, :] = np.vstack((np.hstack((self._K_eq, self._Keq_ode)),
+                                                   np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
+
+
+    def _K_compute_eq(self):
+        """Compute covariance for latent covariance."""
+        t_eq = self._t[self._index==0]
+        if t_eq.shape[0]==0:
+            self._K_eq = np.zeros((0, 0))
+            return
+        
+        if self._t2 is None:
+            self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
+        else:
+            t2_eq = self._t2[self._index2==0]
+            if t2_eq.shape[0]==0:
+                self._K_eq_eq = np.zeros((0, 0))
+                return
+            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
+        
+        self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
+        if self.is_normalized:
+            self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
+
+    def _K_compute_ode_eq(self, transpose=False):
+        """Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
+
+        :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
+        :type transpose: bool"""
+
+        if transpose:
+            if self._t2 is not None:
+                t_ode = self._t2[self._index2>0]
+                index_ode = self._index2[self._index2>0]-1
+                if t_ode.shape[0]==0:
+                    self._K_eq_ode = np.zeros((0, 0))
+                    return
+            else:
+                self._K_eq_ode = np.zeros((0, 0))
+                return
+        
+            t_eq = self._t[self._index==0]
+            if t_eq.shape[0]==0:
+                self._K_eq_ode = np.zeros((0, 0))
+                return
+        else:
+            t_ode = self._t[self._index>0]
+            index_ode = self._index[self._index>0]-1
+            if t_ode.shape[0]==0:
+                self._K_ode_eq = np.zeros((0, 0))
+                return
+            if self._t2 is not None:            
+                t_eq = self._t2[self._index2==0]
+                if t_eq.shape[0]==0:
+                    self._K_ode_eq = np.zeros((0, 0))
+                    return
+            else:
+                self._K_ode_eq = np.zeros((0, 0))
+                return
+        # Matrix giving scales of each output
+        # self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
+        # code="""
+        #     for(int i=0;i<N; i++){
+        #       for(int j=0; j<N2; j++){
+        #           scale_mat[i+j*N] = W[index_ode[i]+index_eq[j]*output_dim];
+        #         }
+        #       }
+        #     """
+        #     scale_mat, B = self._scale, self._B
+        #     N, N2, output_dim = index_ode.size, index_eq.size, self.output_dim
+        #     weave.inline(code,['index_ode', 'index_eq',
+        #                        'scale_mat', 'B',
+        #                        'N', 'N2', 'output_dim'])
+        # else:
+        #     self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
+        #     code = """
+        #     for(int i=0; i<N; i++){
+        #       for(int j=0; j<N2; j++){
+        #         scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
+        #       }
+        #     }
+        #     """
+        #     scale_mat, B = self._scale, self._B
+        #     N, N2, output_dim = index_ode.size, index2.size, self.output_dim
+        #     weave.inline(code, ['index_ode', 'index2_ode',
+        #                         'scale_mat', 'B',
+        #                         'N', 'N2', 'output_dim'])
+        t_ode_mat = t_ode[:, None]
+        t_eq_mat = t_eq[None, :]
+        if self.delay is not None:
+            t_ode_mat -= self.delay[index_ode, None]
+        diff_t = (t_ode_mat - t_eq_mat)
+
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        half_sigma_d_i = 0.5*self.sigma*self.decay
+
+        if self.is_stationary == False:
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        else:
+            ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        sK = signs*exp(half_sigma_d_i*half_sigma_d_i - self.decay*diff_t + ln_part)
+
+        sK *= 0.5
+
+        if not self.is_normalized:
+            sK *= sqrt(pi)*self.sigma
+
+
+        if transpose:
+            self._K_eq_ode = sK.T
+        else:
+            self._K_ode_eq = sK
+        return K
+        
+    def _K_compute_ode(self):
+        # Compute covariances between outputs of the ODE models.
+
+        t_ode = self._t[self._index>0]
+        index_ode = self._index[self._index>0]-1
+        if t_ode.shape[0]==0:
+            self._K_ode = np.zeros((0, 0))
+            return
+        
+        if self._t2 is None:
+            t2_ode = t_ode
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            index2_ode = self._index2[self._index2>0]-1
+            if t2_eq.shape[0]==0:
+                self._K_ode = np.zeros((0, 0))
+                return
+        
+        if self._index2 is None:
+            # Matrix giving scales of each output
+            self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
+            code="""
+            for(int i=0;i<N; i++){
+              scale_mat[i+i*N] = B[index_ode[i]+output_dim*(index_ode[i])];
+              for(int j=0; j<i; j++){
+                  scale_mat[j+i*N] = B[index_ode[i]+output_dim*index_ode[j]];
+                  scale_mat[i+j*N] = scale_mat[j+i*N];
+                }
+              }
+            """
+            scale_mat, B = self._scale, self.B
+            N, output_dim = index_ode.size, self.output_dim
+            weave.inline(code,['index_ode',
+                               'scale_mat', 'B',
+                               'N', 'output_dim'])
+        else:
+            self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
+            code = """
+            for(int i=0; i<N; i++){
+              for(int j=0; j<N2; j++){
+                scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
+              }
+            }
+            """
+            scale_mat, B = self._scale, self.B
+            N, N2, output_dim = index_ode.size, index2.size, self.output_dim
+            weave.inline(code, ['index_ode', 'index2_ode',
+                                'scale_mat', 'B',
+                                'N', 'N2', 'output_dim'])
+        
+        # When index is identical
+        if self.is_stationary:
+            h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode)
+        else:
+            h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode)
+
+        if self._t2 is None:
+            self._K_ode = 0.5 * (h + h.T)
+        else:
+            if self.is_stationary:
+                h2 = self._compute_H_stat(t2_ode, index2_ode, t_ode, index_ode)
+            else:
+                h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode)                
+            self._K_ode += 0.5 * (h + h2.T)
+
+        if not self.is_normalized:
+            self._K_ode *= np.sqrt(np.pi)*sigma
+    
+    def _compute_H(self, t, index, t2, index2, update_derivatives=False):
+        """Helper function for computing part of the ode1 covariance function.
+
+        :param t: first time input.
+        :type t: array
+        :param index: Indices of first output.
+        :type index: array of int
+        :param t2: second time input.
+        :type t2: array
+        :param index2: Indices of second output.
+        :type index2: array of int
+        :param update_derivatives: whether to update derivatives (default is False)
+        :return h : result of this subcomponent of the kernel for the given values.
+        :rtype: ndarray
+"""
+
+
+        # Vector of decays and delays associated with each output.
+        Decay = np.zeros(t.shape[0])
+        Decay2 = np.zeros(t2.shape[0])
+        Delay = np.zeros(t.shape[0])
+        Delay2 = np.zeros(t2.shape[0])
+        code="""
+        for(int i=0;i<N; i++){
+          Decay[i] = decay[index[i]];
+          }
+        for(int i=0; i<N2; i++){
+          Decay2[i] = decay[index2[i]];
+          }
+        """
+        decay = self.decay
+        N, N2 = index.size, index2.size
+        weave.inline(code,['index', 'index2',
+                           'Decay', 'Decay2',
+                           'decay',
+                           'N', 'N2'])
+        t_mat = t[:, None]
+        t2_mat = t2[None, :]
+        if self.delay is not None:
+            code="""
+            for(int i=0;i<N; i++){
+              Delay[i] = delay[index[i]];
+              }
+            for(int i=0; i<N2; i++){
+              Delay2[i] = delay[index2[i]];
+              }
+            """
+            delay=self.delay
+            N, N2 = index.size, index2.size
+            weave.inline(code,['index', 'index2',
+                               'Delay', 'Delay2', 
+                               'delay', 
+                               'N', 'N2'])
+
+            t_mat-=Delay[:, None]
+            t2_mat-=Delay2[None, :]
+
+        diff_t = (t_mat - t2_mat)
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
+
+        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, 
+                                        half_sigma_decay_i - inv_sigma_diff_t,
+                                        return_sign=True)
+        ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
+                                        half_sigma_decay_i - t_mat/self.sigma,
+                                        return_sign=True)
+
+        h = sign1*np.exp(half_sigma_decay_i
+                         *half_sigma_decay_i
+                         -Decay[:, None]*diff_t+ln_part_1
+                         -np.log(Decay[:, None] + Decay2[None, :]))
+        h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
+                          -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
+                          -np.log(Decay[:, None] + Decay2[None, :]))
+
+
+        # if update_derivatives:
+        #     sigma2 = self.sigma*self.sigma
+        #     # Update ith decay gradient
+        #     dh_ddecay += (0.5*self.decay[i]*sigma2*(self.decay[i] + decay[j])-1)*h
+        #     + (-diff_t*sign1*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*diff_t+ln_part_1) 
+        #        +t_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*t_mat - decay[j]*t2_mat+ln_part_2)) ...
+        #     +self.sigma/sqrt(pi)*(-np.exp(-diff_t*diff_t/sigma2)
+        #                           +np.exp(-t2_mat*t2_mat/sigma2-self.decay[i]*t_mat) 
+        #                           +np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) ...
+        #                           -np.exp(-(self.decay[i]*t_mat + decay[j]*t2_mat)))
+        #     self._dh_ddecay[i] += real(dh_ddecay/(self.decay[i]+decay[j]))
+
+        #     # Update jth decay gradient
+        #     dh_ddecay = t2_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-(self.decay[i]*t_mat + decay[j]*t2_mat)+ln_part_2)-h
+        #     self._dh_ddecay[j] += real(dh_ddecay/(self.decay[i] + decay[j]))
+            
+        #     # Update sigma gradient
+        #     self._dh_dsigma += 0.5*self.decay[i]*self.decay[i]*self.sigma*h + 2/(np.sqrt(np.pi)*(self.decay[i]+decay[j]))*((-diff_t/sigma2-self.decay[i]/ 
+        #                                                                                                                     2)*np.exp(-diff_t*
+        #                                                                                                                               diff_t/sigma2) 
+        #                                                                                                                    + (-t2_mat/sigma2+self.decay[i]/2)
+        #                                                                                                                    *np.exp(-t2_mat*t2_mat/sigma2 
+        #                                                                                                                            -self.decay[i]*t_mat) 
+        #                                                                                                                    - (-t_mat/sigma2-self.decay[i]/2) 
+        #                                                                                                                    *np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) 
+        #                                                                                                                    - self.decay[i]/2*np.exp(-(self.decay[i]*t_mat+decay[j]*t2_mat)))
+                

From c800e0687f4c924d82120cece8582305d02aad12 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 23 Sep 2013 12:24:26 +0100
Subject: [PATCH 06/17] Added missing file.

---
 GPy/util/ln_diff_erfs.py | 71 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 GPy/util/ln_diff_erfs.py

diff --git a/GPy/util/ln_diff_erfs.py b/GPy/util/ln_diff_erfs.py
new file mode 100644
index 00000000..13b442f7
--- /dev/null
+++ b/GPy/util/ln_diff_erfs.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+#Only works for scipy 0.12+
+try:
+    from scipy.special import erfcx, erf
+except ImportError:
+    from scipy.special import erf
+    from erfcx import erfcx
+
+import numpy as np
+
+def ln_diff_erfs(x1, x2, return_sign=False):
+    """Function for stably computing the log of difference of two erfs in a numerically stable manner.
+    :param x1 : argument of the positive erf
+    :type x1: ndarray
+    :param x2 : argument of the negative erf
+    :type x2: ndarray
+    :return: tuple containing (log(abs(erf(x1) - erf(x2))), sign(erf(x1) - erf(x2)))
+    
+    Based on MATLAB code that was written by Antti Honkela and modified by David Luengo, originally derived from code by Neil Lawrence.
+    """
+    x1 = np.require(x1).real
+    x2 = np.require(x2).real
+
+    v = np.zeros(np.max((x1.size, x2.size)))
+    
+    # if numel(x1) == 1:
+    #     x1 = x1 * ones(size(x2))
+
+    # if numel(x2) == 1:
+    #     x2 = x2 * ones(size(x1))
+
+    sign = np.sign(x1 - x2)
+    I = sign == -1
+    swap = x1[I]
+    x1[I] = x2[I]
+    x2[I] = swap
+
+    # TODO: switch off log of zero warnings.
+    # Case 1: arguments of different sign, no problems with loss of accuracy
+    I1 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0
+    v[I1] = np.log( erf(x1[I1]) - erf(x2[I1]) )
+
+    # Case 2: x1 = x2 so we have log of zero.
+    I2 = (x1 == x2)
+    v[I2] = -np.inf
+
+    # Case 3: Both arguments are non-negative
+    I3 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I1),
+                                               np.logical_not(I2)))
+    v[I3] = np.log(erfcx(x2[I3])
+                   -erfcx(x1[I3])*np.exp(x2[I3]**2
+                                         -x1[I3]**2)) - x2[I3]**2
+    
+    # Case 4: Both arguments are non-positive
+    I4 = np.logical_and(np.logical_and(np.logical_not(I1),
+                                       np.logical_not(I2)),
+                        np.logical_not(I3))
+    v[I4] = np.log(erfcx(-x1[I4])
+                   -erfcx(-x2[I4])*np.exp(x1[I4]**2
+                                          -x2[I4]**2))-x1[I4]**2
+    
+    # TODO: switch back on log of zero warnings.
+
+    if return_sign:
+        return v, sign
+    else:
+        # Need to add in a complex part because argument is negative.
+        v[I] += np.pi*1j
+

From 0ae9f9aafd89c3c712a509536a394b7fbf7ac5fa Mon Sep 17 00:00:00 2001
From: Ricardo <rick70x7@gmail.com>
Date: Mon, 23 Sep 2013 17:29:33 +0100
Subject: [PATCH 07/17] Fixing W_columns and num_outputs nomenclature

---
 GPy/core/gp_base.py                           | 22 ++++++++++++++++---
 GPy/core/sparse_gp.py                         |  5 ++---
 GPy/models/gp_multioutput_regression.py       | 12 +++++-----
 .../sparse_gp_multioutput_regression.py       | 18 +++++++--------
 4 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index e26deb0f..bd0b877e 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -128,10 +128,9 @@ class GPBase(Model):
             else:
                 raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
         else:
-            assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
+            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
 
             if self.X.shape[1] == 2:
-                assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
                 Xu = self.X[self.X[:,-1]==output ,0:1]
                 Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
 
@@ -263,7 +262,7 @@ class GPBase(Model):
                 raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
 
         else:
-            assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
+            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
             if self.X.shape[1] == 2:
                 resolution = resolution or 200
                 Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
@@ -287,3 +286,20 @@ class GPBase(Model):
 
             else:
                 raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    """
+    def samples_f(self,X,samples=10, which_data='all', which_parts='all',output=None):
+        if which_data == 'all':
+            which_data = slice(None)
+
+        if hasattr(self,'multioutput'):
+            np.hstack([X,np.ones((X.shape[0],1))*output])
+
+        m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
+        v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+        Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+        #gpplot(X, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
+        for i in range(samples):
+            ax.plot(X, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
+    """
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 88bd36e6..834bdd84 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -367,9 +367,8 @@ class SparseGP(GPBase):
                 ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
 
         else:
-            pass
-            """
             if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
+                """
                 Xu = self.X[self.X[:,-1]==output,:]
                 if self.has_uncertain_inputs:
                     Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
@@ -380,6 +379,7 @@ class SparseGP(GPBase):
                                 xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                                 ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
 
+                """
                 Zu = self.Z[self.Z[:,-1]==output,:]
                 Zu = self.Z * self._Xscale + self._Xoffset
                 Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
@@ -388,7 +388,6 @@ class SparseGP(GPBase):
 
             else:
                 raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
-            """
 
     def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
         """
diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/models/gp_multioutput_regression.py
index 20d839ce..4ce3dfbc 100644
--- a/GPy/models/gp_multioutput_regression.py
+++ b/GPy/models/gp_multioutput_regression.py
@@ -25,14 +25,14 @@ class GPMultioutputRegression(GP):
     :type normalize_X: False|True
     :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_Y: False|True
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
+    :type rank: integer
     """
 
-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1):
+    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,rank=1):
 
-        self.num_outputs = len(Y_list)
-        assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
+        self.output_dim = len(Y_list)
+        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
 
         #Inputs indexing
         i = 0
@@ -51,7 +51,7 @@ class GPMultioutputRegression(GP):
         #Coregionalization kernel definition
         if kernel_list is None:
             kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
+        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
 
         self.multioutput = True
         GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/models/sparse_gp_multioutput_regression.py
index 041204b6..d809610b 100644
--- a/GPy/models/sparse_gp_multioutput_regression.py
+++ b/GPy/models/sparse_gp_multioutput_regression.py
@@ -30,23 +30,23 @@ class SparseGPMultioutputRegression(SparseGP):
     :type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
     :param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
     :type num_inducing: integer
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
+    :type rank: integer
     """
     #NOTE not tested with uncertain inputs
-    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1):
+    def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,rank=1):
 
-        self.num_outputs = len(Y_list)
-        assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
+        self.output_dim = len(Y_list)
+        assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
 
         #Inducing inputs list
         if len(Z_list):
-            assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
+            assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
         else:
             if isinstance(num_inducing,np.int):
-                num_inducing = [num_inducing] * self.num_outputs
+                num_inducing = [num_inducing] * self.output_dim
             num_inducing = np.asarray(num_inducing)
-            assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
+            assert num_inducing.size == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
             for ni,X in zip(num_inducing,X_list):
                 i = np.random.permutation(X.shape[0])[:ni]
                 Z_list.append(X[i].copy())
@@ -72,7 +72,7 @@ class SparseGPMultioutputRegression(SparseGP):
         #Coregionalization kernel definition
         if kernel_list is None:
             kernel_list = [kern.rbf(original_dim)]
-        mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
+        mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
 
         self.multioutput = True
         SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)

From 0509b6f9d04bf4bb6d73f7e3120d0d3d39d4eaa0 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Tue, 24 Sep 2013 06:15:09 +0200
Subject: [PATCH 08/17] Added missing files.

---
 GPy/FAQ.txt                  |   8 ++
 GPy/coding_style_guide.txt   |  10 ++
 GPy/kern/parts/eq_ode1.py    | 217 +++++++++++++++--------------------
 GPy/testing/bcgplvm_tests.py |  50 ++++++++
 GPy/util/erfcx.py            |  63 ++++++++++
 GPy/util/ln_diff_erfs.py     | 109 ++++++++++++------
 6 files changed, 297 insertions(+), 160 deletions(-)
 create mode 100644 GPy/FAQ.txt
 create mode 100644 GPy/coding_style_guide.txt
 create mode 100644 GPy/testing/bcgplvm_tests.py
 create mode 100644 GPy/util/erfcx.py

diff --git a/GPy/FAQ.txt b/GPy/FAQ.txt
new file mode 100644
index 00000000..66ba4834
--- /dev/null
+++ b/GPy/FAQ.txt
@@ -0,0 +1,8 @@
+Frequently Asked Questions
+--------------------------
+
+Unit tests are run through Travis-Ci. They can be run locally through entering the GPy route diretory and writing
+
+nosetests testing/
+
+Documentation is handled by Sphinx. To build the documentation:
diff --git a/GPy/coding_style_guide.txt b/GPy/coding_style_guide.txt
new file mode 100644
index 00000000..0cc732e4
--- /dev/null
+++ b/GPy/coding_style_guide.txt
@@ -0,0 +1,10 @@
+In this text document we will describe coding conventions to be used in GPy to keep things consistent.
+
+All arrays containing data are two dimensional. The first dimension is the number of data, the second dimension is number of features. This keeps things consistent with the idea of a design matrix.
+
+Input matrices are either X or t, output matrices are Y.
+
+Input dimensionality is input_dim, output dimensionality is output_dim, number of data is num_data.
+
+Data sets are preprocessed in the datasets.py file. This file also records where the data set was obtained from in the dictionary stored in the file. Long term we should move this dictionary to sqlite or similar.
+
diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/parts/eq_ode1.py
index 1e5345fe..6fd5fb91 100644
--- a/GPy/kern/parts/eq_ode1.py
+++ b/GPy/kern/parts/eq_ode1.py
@@ -39,11 +39,12 @@ class Eq_ode1(Kernpart):
         self.name = 'eq_ode1'
         self.output_dim = output_dim
         self.lengthscale = lengthscale
-        self.num_params = self.output_dim*(1. + self.rank) + 1
+        self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
         if kappa is not None:
             self.num_params+=self.output_dim
         if delay is not None:
-            self.num_params+=self.output_dim
+            assert delay.shape==(self.output_dim-1,)
+            self.num_params+=self.output_dim-1
         self.rank = rank
         if W is None:
             self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
@@ -51,18 +52,17 @@ class Eq_ode1(Kernpart):
             assert W.shape==(self.output_dim,self.rank)
             self.W = W
         if decay is None:
-            self.decay = np.ones(self.output_dim)
+            self.decay = np.ones(self.output_dim-1)
         if kappa is not None:
             assert kappa.shape==(self.output_dim,)
         self.kappa = kappa
 
-        if delay is not None:
-            assert delay.shape==(self.output_dim,)
         self.delay = delay
         self.is_normalized = True
         self.is_stationary = False
+        self.gaussian_initial = False
         self._set_params(self._get_params())
-
+        
     def _get_params(self):
         param_list = [self.W.flatten()]
         if self.kappa is not None:
@@ -84,11 +84,11 @@ class Eq_ode1(Kernpart):
             self.kappa = x[start:end]
             self.B += np.diag(self.kappa)
             start=end
-        end+=self.output_dim
+        end+=self.output_dim-1
         self.decay = x[start:end]
         start=end
         if self.delay is not None:
-            end+=self.output_dim
+            end+=self.output_dim-1
             self.delay = x[start:end]
             start=end
         end+=1
@@ -100,9 +100,9 @@ class Eq_ode1(Kernpart):
         param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
         if self.kappa is not None:
             param_names += ['kappa_%i'%i for i in range(self.output_dim)]
-        param_names += ['decay_%i'%i for i in range(self.output_dim)]
+        param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
         if self.delay is not None:
-            param_names += ['delay_%i'%i for i in range(self.output_dim)]
+            param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
         param_names+= ['lengthscale'] 
         return param_names
 
@@ -112,7 +112,7 @@ class Eq_ode1(Kernpart):
             raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
 
         self._K_computations(X, X2)
-        target += self._scales*self._dK_dvar
+        target += self._scale*self._K_dvar
 
         if self.gaussian_initial:
             # Add covariance associated with initial condition.
@@ -140,9 +140,8 @@ class Eq_ode1(Kernpart):
 
         # TODO: some fast checking here to see if this needs recomputing?
         self._t = X[:, 0]
-        if X.shape[1]==1:
-            # No index passed, assume single output of ode model.
-            self._index = np.ones_like(X[:, 1],dtype=np.int)
+        if not X.shape[1] == 2:
+            raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
         self._index = np.asarray(X[:, 1],dtype=np.int)
         # Sort indices so that outputs are in blocks for computational
         # convenience.
@@ -156,15 +155,12 @@ class Eq_ode1(Kernpart):
             self._index2 = None
             self._rorder2 = self._rorder
         else:
-            if X2.shape[1] > 2:
-                raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+            if not X2.shape[1] == 2:
+                raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
             self._t2 = X2[:, 0]
-            if X.shape[1]==1:
-                # No index passed, assume single output of ode model.
-                self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
             self._index2 = np.asarray(X2[:, 1],dtype=np.int)
             self._order2 = self._index2.argsort()
-            slef._index2 = self._index2[self._order2]
+            self._index2 = self._index2[self._order2]
             self._t2 = self._t2[self._order2]
             self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
 
@@ -180,25 +176,65 @@ class Eq_ode1(Kernpart):
         else:
             self._K_compute_ode_eq(transpose=True)
         self._K_compute_ode()
-        
+
+        if X2 is None:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
+        else:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
+
         # Reorder values of blocks for placing back into _K_dvar.
-        self._K_dvar[self._rorder, :] = np.vstack((np.hstack((self._K_eq, self._Keq_ode)),
-                                                   np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
+        self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
+                                                   np.hstack((self._K_ode_eq, self._K_ode))))
+        self._K_dvar = self._K_dvar[self._rorder, :]
+        self._K_dvar = self._K_dvar[:, self._rorder2]
+        
+        
+        if X2 is None:
+            # Matrix giving scales of each output
+            self._scale = np.zeros((self._t.size, self._t.size))
+            code="""
+            for(int i=0;i<N; i++){
+              scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
+              for(int j=0; j<i; j++){
+                  scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
+                  scale_mat[i+j*N] = scale_mat[j+i*N];
+                }
+              }
+            """
+            scale_mat, B, index = self._scale, self.B, self._index
+            N, output_dim = self._t.size, self.output_dim
+            weave.inline(code,['index',
+                               'scale_mat', 'B',
+                               'N', 'output_dim'])
+        else:
+            self._scale = np.zeros((self._t.size, self._t2.size))
+            code = """
+            for(int i=0; i<N; i++){
+              for(int j=0; j<N2; j++){
+                scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
+              }
+            }
+            """
+            scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
+            N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
+            weave.inline(code, ['index', 'index2',
+                                'scale_mat', 'B',
+                                'N', 'N2', 'output_dim'])
+
 
 
     def _K_compute_eq(self):
         """Compute covariance for latent covariance."""
         t_eq = self._t[self._index==0]
-        if t_eq.shape[0]==0:
-            self._K_eq = np.zeros((0, 0))
-            return
-        
         if self._t2 is None:
+            if t_eq.size==0:
+                self._K_eq = np.zeros((0, 0))
+                return
             self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
         else:
             t2_eq = self._t2[self._index2==0]
-            if t2_eq.shape[0]==0:
-                self._K_eq_eq = np.zeros((0, 0))
+            if t_eq.size==0 or t2_eq.size==0:
+                self._K_eq = np.zeros((t_eq.size, t2_eq.size))
                 return
             self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
         
@@ -212,63 +248,27 @@ class Eq_ode1(Kernpart):
         :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
         :type transpose: bool"""
 
-        if transpose:
-            if self._t2 is not None:
+        if self._t2 is not None:
+            if transpose:
+                t_eq = self._t[self._index==0]
                 t_ode = self._t2[self._index2>0]
                 index_ode = self._index2[self._index2>0]-1
-                if t_ode.shape[0]==0:
-                    self._K_eq_ode = np.zeros((0, 0))
-                    return
             else:
-                self._K_eq_ode = np.zeros((0, 0))
-                return
-        
-            t_eq = self._t[self._index==0]
-            if t_eq.shape[0]==0:
-                self._K_eq_ode = np.zeros((0, 0))
-                return
+                t_eq = self._t2[self._index2==0]
+                t_ode = self._t[self._index>0]
+                index_ode = self._index[self._index>0]-1
         else:
+            t_eq = self._t[self._index==0]
             t_ode = self._t[self._index>0]
             index_ode = self._index[self._index>0]-1
-            if t_ode.shape[0]==0:
-                self._K_ode_eq = np.zeros((0, 0))
-                return
-            if self._t2 is not None:            
-                t_eq = self._t2[self._index2==0]
-                if t_eq.shape[0]==0:
-                    self._K_ode_eq = np.zeros((0, 0))
-                    return
+
+        if t_ode.size==0 or t_eq.size==0:
+            if transpose:
+                self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
             else:
-                self._K_ode_eq = np.zeros((0, 0))
-                return
-        # Matrix giving scales of each output
-        # self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
-        # code="""
-        #     for(int i=0;i<N; i++){
-        #       for(int j=0; j<N2; j++){
-        #           scale_mat[i+j*N] = W[index_ode[i]+index_eq[j]*output_dim];
-        #         }
-        #       }
-        #     """
-        #     scale_mat, B = self._scale, self._B
-        #     N, N2, output_dim = index_ode.size, index_eq.size, self.output_dim
-        #     weave.inline(code,['index_ode', 'index_eq',
-        #                        'scale_mat', 'B',
-        #                        'N', 'N2', 'output_dim'])
-        # else:
-        #     self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-        #     code = """
-        #     for(int i=0; i<N; i++){
-        #       for(int j=0; j<N2; j++){
-        #         scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
-        #       }
-        #     }
-        #     """
-        #     scale_mat, B = self._scale, self._B
-        #     N, N2, output_dim = index_ode.size, index2.size, self.output_dim
-        #     weave.inline(code, ['index_ode', 'index2_ode',
-        #                         'scale_mat', 'B',
-        #                         'N', 'N2', 'output_dim'])
+                self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
+            return
+
         t_ode_mat = t_ode[:, None]
         t_eq_mat = t_eq[None, :]
         if self.delay is not None:
@@ -276,13 +276,14 @@ class Eq_ode1(Kernpart):
         diff_t = (t_ode_mat - t_eq_mat)
 
         inv_sigma_diff_t = 1./self.sigma*diff_t
-        half_sigma_d_i = 0.5*self.sigma*self.decay
+        decay_vals = self.decay[index_ode][:, None]
+        half_sigma_d_i = 0.5*self.sigma*decay_vals
 
         if self.is_stationary == False:
-            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
         else:
             ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
-        sK = signs*exp(half_sigma_d_i*half_sigma_d_i - self.decay*diff_t + ln_part)
+        sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
 
         sK *= 0.5
 
@@ -294,58 +295,24 @@ class Eq_ode1(Kernpart):
             self._K_eq_ode = sK.T
         else:
             self._K_ode_eq = sK
-        return K
         
     def _K_compute_ode(self):
         # Compute covariances between outputs of the ODE models.
 
         t_ode = self._t[self._index>0]
         index_ode = self._index[self._index>0]-1
-        if t_ode.shape[0]==0:
-            self._K_ode = np.zeros((0, 0))
-            return
-        
         if self._t2 is None:
+            if t_ode.size==0:
+                self._K_ode = np.zeros((0, 0))
+                return        
             t2_ode = t_ode
             index2_ode = index_ode
         else:
             t2_ode = self._t2[self._index2>0]
-            index2_ode = self._index2[self._index2>0]-1
-            if t2_eq.shape[0]==0:
-                self._K_ode = np.zeros((0, 0))
+            if t_ode.size==0 or t2_ode.size==0:
+                self._K_ode = np.zeros((t_ode.size, t2_ode.size))
                 return
-        
-        if self._index2 is None:
-            # Matrix giving scales of each output
-            self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
-            code="""
-            for(int i=0;i<N; i++){
-              scale_mat[i+i*N] = B[index_ode[i]+output_dim*(index_ode[i])];
-              for(int j=0; j<i; j++){
-                  scale_mat[j+i*N] = B[index_ode[i]+output_dim*index_ode[j]];
-                  scale_mat[i+j*N] = scale_mat[j+i*N];
-                }
-              }
-            """
-            scale_mat, B = self._scale, self.B
-            N, output_dim = index_ode.size, self.output_dim
-            weave.inline(code,['index_ode',
-                               'scale_mat', 'B',
-                               'N', 'output_dim'])
-        else:
-            self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-            code = """
-            for(int i=0; i<N; i++){
-              for(int j=0; j<N2; j++){
-                scale_mat[i+j*N] = B[index_ode[i]+output_dim*index2_ode[j]]
-              }
-            }
-            """
-            scale_mat, B = self._scale, self.B
-            N, N2, output_dim = index_ode.size, index2.size, self.output_dim
-            weave.inline(code, ['index_ode', 'index2_ode',
-                                'scale_mat', 'B',
-                                'N', 'N2', 'output_dim'])
+            index2_ode = self._index2[self._index2>0]-1
         
         # When index is identical
         if self.is_stationary:
@@ -360,10 +327,10 @@ class Eq_ode1(Kernpart):
                 h2 = self._compute_H_stat(t2_ode, index2_ode, t_ode, index_ode)
             else:
                 h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode)                
-            self._K_ode += 0.5 * (h + h2.T)
+            self._K_ode = 0.5 * (h + h2.T)
 
         if not self.is_normalized:
-            self._K_ode *= np.sqrt(np.pi)*sigma
+            self._K_ode *= np.sqrt(np.pi)*self.sigma
     
     def _compute_H(self, t, index, t2, index2, update_derivatives=False):
         """Helper function for computing part of the ode1 covariance function.
@@ -441,7 +408,7 @@ class Eq_ode1(Kernpart):
                           -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
                           -np.log(Decay[:, None] + Decay2[None, :]))
 
-
+        return h
         # if update_derivatives:
         #     sigma2 = self.sigma*self.sigma
         #     # Update ith decay gradient
diff --git a/GPy/testing/bcgplvm_tests.py b/GPy/testing/bcgplvm_tests.py
new file mode 100644
index 00000000..94282a0b
--- /dev/null
+++ b/GPy/testing/bcgplvm_tests.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class BCGPLVMTests(unittest.TestCase):
+    def test_kernel_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.Kernel(output_dim=input_dim, X=Y, kernel=bk)
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+        
+    def test_linear_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.Linear(output_dim=input_dim, input_dim=output_dim)
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+        
+    def test_mlp_backconstraint(self):
+        num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
+        X = np.random.rand(num_data, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
+        k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
+        bk = GPy.kern.rbf(output_dim)
+        mapping = GPy.mappings.MLP(output_dim=input_dim, input_dim=output_dim, hidden_dim=[5, 4, 7])
+        m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()
diff --git a/GPy/util/erfcx.py b/GPy/util/erfcx.py
new file mode 100644
index 00000000..f42e49f3
--- /dev/null
+++ b/GPy/util/erfcx.py
@@ -0,0 +1,63 @@
+## Copyright (C) 2010 Soren Hauberg
+##
+## Copyright James Hensman 2011
+## 
+## This program is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+import numpy as np
+
+def erfcx (arg):
+	arg = np.atleast_1d(arg)
+	assert(np.all(np.isreal(arg)),"erfcx: input must be real")
+
+	## Get precision dependent thresholds -- or not :p
+	xneg = -26.628;
+	xmax = 2.53e+307;
+
+	## Allocate output
+	result = np.zeros (arg.shape)
+
+	## Find values where erfcx can be evaluated
+	idx_neg = (arg < xneg);
+	idx_max = (arg > xmax);
+	idx = ~(idx_neg | idx_max);
+
+	arg = arg [idx];
+
+	## Perform the actual computation
+	t = 3.97886080735226 / (np.abs (arg) + 3.97886080735226);
+	u = t - 0.5;
+	y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u \
+	    - 0.003963850973605135)   * u - 8.70779635317295828e-4) * u +     \
+	      0.00773672528313526668) * u + 0.00383335126264887303) * u -     \
+	      0.0127223813782122755)  * u - 0.0133823644533460069)  * u +     \
+	      0.0161315329733252248)  * u + 0.0390976845588484035)  * u +     \
+	      0.00249367200053503304;
+	y = ((((((((((((y * u - 0.0838864557023001992) * u -           \
+	      0.119463959964325415) * u + 0.0166207924969367356) * u + \
+	      0.357524274449531043) * u + 0.805276408752910567)  * u + \
+	      1.18902982909273333)  * u + 1.37040217682338167)   * u + \
+	      1.31314653831023098)  * u + 1.07925515155856677)   * u + \
+	      0.774368199119538609) * u + 0.490165080585318424)  * u + \
+	      0.275374741597376782) * t;
+
+	y [arg < 0] = 2 * np.exp (arg [arg < 0]**2) - y [arg < 0];
+
+	## Put the results back into something with the same size is the original input
+	result [idx] = y;
+	result [idx_neg] = np.inf;
+	## result (idx_max) = 0; # not needed as we initialise with zeros
+	return(result)
+
diff --git a/GPy/util/ln_diff_erfs.py b/GPy/util/ln_diff_erfs.py
index 13b442f7..bb9cfe03 100644
--- a/GPy/util/ln_diff_erfs.py
+++ b/GPy/util/ln_diff_erfs.py
@@ -18,54 +18,93 @@ def ln_diff_erfs(x1, x2, return_sign=False):
     :type x2: ndarray
     :return: tuple containing (log(abs(erf(x1) - erf(x2))), sign(erf(x1) - erf(x2)))
     
-    Based on MATLAB code that was written by Antti Honkela and modified by David Luengo, originally derived from code by Neil Lawrence.
+    Based on MATLAB code that was written by Antti Honkela and modified by David Luengo and originally derived from code by Neil Lawrence.
     """
     x1 = np.require(x1).real
     x2 = np.require(x2).real
-
-    v = np.zeros(np.max((x1.size, x2.size)))
+    if x1.size==1:
+        x1 = np.reshape(x1, (1, 1))
+    if x2.size==1:
+        x2 = np.reshape(x2, (1, 1))
+        
+    if x1.shape==x2.shape:
+        v = np.zeros_like(x1)
+    else:
+        if x1.size==1:
+            v = np.zeros(x2.shape)
+        elif x2.size==1:
+            v = np.zeros(x1.shape)
+        else:
+            raise ValueError, "This function does not broadcast unless provided with a scalar."
     
-    # if numel(x1) == 1:
-    #     x1 = x1 * ones(size(x2))
+    if x1.size == 1:
+        x1 = np.tile(x1, x2.shape)
 
-    # if numel(x2) == 1:
-    #     x2 = x2 * ones(size(x1))
+    if x2.size == 1:
+        x2 = np.tile(x2, x1.shape)
 
     sign = np.sign(x1 - x2)
-    I = sign == -1
-    swap = x1[I]
-    x1[I] = x2[I]
-    x2[I] = swap
+    if x1.size == 1:
+        if sign== -1:
+            swap = x1
+            x1 = x2
+            x2 = swap
+    else:
+        I = sign == -1
+        swap = x1[I]
+        x1[I] = x2[I]
+        x2[I] = swap
 
-    # TODO: switch off log of zero warnings.
-    # Case 1: arguments of different sign, no problems with loss of accuracy
-    I1 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0
-    v[I1] = np.log( erf(x1[I1]) - erf(x2[I1]) )
+    with np.errstate(divide='ignore'):
+        # switch off log of zero warnings.
 
-    # Case 2: x1 = x2 so we have log of zero.
-    I2 = (x1 == x2)
-    v[I2] = -np.inf
+        # Case 0: arguments of different sign, no problems with loss of accuracy
+        I0 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0
 
-    # Case 3: Both arguments are non-negative
-    I3 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I1),
-                                               np.logical_not(I2)))
-    v[I3] = np.log(erfcx(x2[I3])
-                   -erfcx(x1[I3])*np.exp(x2[I3]**2
-                                         -x1[I3]**2)) - x2[I3]**2
-    
-    # Case 4: Both arguments are non-positive
-    I4 = np.logical_and(np.logical_and(np.logical_not(I1),
-                                       np.logical_not(I2)),
-                        np.logical_not(I3))
-    v[I4] = np.log(erfcx(-x1[I4])
-                   -erfcx(-x2[I4])*np.exp(x1[I4]**2
-                                          -x2[I4]**2))-x1[I4]**2
-    
+        # Case 1: x1 = x2 so we have log of zero.
+        I1 = (x1 == x2)
+
+        # Case 2: Both arguments are non-negative
+        I2 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I0),
+                                                   np.logical_not(I1)))
+        # Case 3: Both arguments are non-positive
+        I3 = np.logical_and(np.logical_and(np.logical_not(I0),
+                                           np.logical_not(I1)),
+                            np.logical_not(I2))
+        _x2 = x2.flatten()
+        _x1 = x1.flatten()
+        for group, flags in zip((0, 1, 2, 3), (I0, I1, I2, I3)):
+
+            if np.any(flags):
+                if not x1.size==1:
+                    _x1 = x1[flags]
+                if not x2.size==1:
+                    _x2 = x2[flags]
+                if group==0:
+                    v[flags] = np.log( erf(_x1) - erf(_x2) )
+                elif group==1:
+                    v[flags] = -np.inf
+                elif group==2:
+                    v[flags] = np.log(erfcx(_x2)
+                                   -erfcx(_x1)*np.exp(_x2**2
+                                                      -_x1**2)) - _x2**2
+                elif group==3:
+                    v[flags] = np.log(erfcx(-_x1)
+                                   -erfcx(-_x2)*np.exp(_x1**2
+                                                          -_x2**2))-_x1**2
+        
     # TODO: switch back on log of zero warnings.
 
     if return_sign:
         return v, sign
     else:
-        # Need to add in a complex part because argument is negative.
-        v[I] += np.pi*1j
+        if v.size==1:
+            if sign==-1:
+                v = v.view('complex64')
+                v += np.pi*1j
+        else:
+            # Need to add in a complex part because argument is negative.
+            v = v.view('complex64')
+            v[I] += np.pi*1j
 
+    return v

From f2094778857cbd39a08fef7d9cbca38421e78fba Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 24 Sep 2013 11:49:47 +0100
Subject: [PATCH 09/17] allowed passing of factr to bfgs algorithm

---
 GPy/inference/optimization.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 589ec4c7..e65b862e 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -29,7 +29,7 @@ class Optimizer():
 
     """
     def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
-                 ftol=None, gtol=None, xtol=None):
+                 ftol=None, gtol=None, xtol=None, bfgs_factor=None):
         self.opt_name = None
         self.x_init = x_init
         self.messages = messages
@@ -39,6 +39,7 @@ class Optimizer():
         self.status = None
         self.max_f_eval = int(max_f_eval)
         self.max_iters = int(max_iters)
+        self.bfgs_factor = bfgs_factor
         self.trace = None
         self.time = "Not available"
         self.xtol = xtol
@@ -128,6 +129,8 @@ class opt_lbfgsb(Optimizer):
             print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
         if self.gtol is not None:
             opt_dict['pgtol'] = self.gtol
+        if self.bfgs_factor is not None:
+            opt_dict['factr'] = self.bfgs_factor
 
         opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
                                             maxfun=self.max_iters, **opt_dict)

From 00cbb5f742fda6b1da3886ca6259abd289fbd35a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 27 Sep 2013 17:39:38 +0100
Subject: [PATCH 10/17] some tidying in the EP likelihood

Changes self.N to self.num_data for consistency with everywhere else
added the factor of 2pi to Z.
---
 GPy/likelihoods/ep.py | 93 ++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py
index 8fdf423f..d242e583 100644
--- a/GPy/likelihoods/ep.py
+++ b/GPy/likelihoods/ep.py
@@ -16,20 +16,20 @@ class EP(likelihood):
         """
         self.noise_model = noise_model
         self.data = data
-        self.N, self.output_dim = self.data.shape
+        self.num_data, self.output_dim = self.data.shape
         self.is_heteroscedastic = True
         self.Nparams = 0
         self._transf_data = self.noise_model._preprocess_values(data)
 
         #Initial values - Likelihood approximation parameters:
         #p(y|f) = t(f|tau_tilde,v_tilde)
-        self.tau_tilde = np.zeros(self.N)
-        self.v_tilde = np.zeros(self.N)
+        self.tau_tilde = np.zeros(self.num_data)
+        self.v_tilde = np.zeros(self.num_data)
 
         #initial values for the GP variables
-        self.Y = np.zeros((self.N,1))
-        self.covariance_matrix = np.eye(self.N)
-        self.precision = np.ones(self.N)[:,None]
+        self.Y = np.zeros((self.num_data,1))
+        self.covariance_matrix = np.eye(self.num_data)
+        self.precision = np.ones(self.num_data)[:,None]
         self.Z = 0
         self.YYT = None
         self.V = self.precision * self.Y
@@ -39,11 +39,11 @@ class EP(likelihood):
         super(EP, self).__init__()
 
     def restart(self):
-        self.tau_tilde = np.zeros(self.N)
-        self.v_tilde = np.zeros(self.N)
-        self.Y = np.zeros((self.N,1))
-        self.covariance_matrix = np.eye(self.N)
-        self.precision = np.ones(self.N)[:,None]
+        self.tau_tilde = np.zeros(self.num_data)
+        self.v_tilde = np.zeros(self.num_data)
+        self.Y = np.zeros((self.num_data,1))
+        self.covariance_matrix = np.eye(self.num_data)
+        self.precision = np.ones(self.num_data)[:,None]
         self.Z = 0
         self.YYT = None
         self.V = self.precision * self.Y
@@ -77,6 +77,7 @@ class EP(likelihood):
         sigma_sum = 1./self.tau_ + 1./self.tau_tilde
         mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
         self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
+        self.Z += 0.5*self.num_data*np.log(2*np.pi)
 
         self.Y =  mu_tilde[:,None]
         self.YYT = np.dot(self.Y,self.Y.T)
@@ -101,7 +102,7 @@ class EP(likelihood):
         self.eta, self.delta = power_ep
 
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
-        mu = np.zeros(self.N)
+        mu = np.zeros(self.num_data)
         Sigma = K.copy()
 
         """
@@ -110,15 +111,15 @@ class EP(likelihood):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        self.tau_ = np.empty(self.N,dtype=float)
-        self.v_ = np.empty(self.N,dtype=float)
+        self.tau_ = np.empty(self.num_data,dtype=float)
+        self.v_ = np.empty(self.num_data,dtype=float)
 
         #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=float)
-        self.Z_hat = np.empty(self.N,dtype=float)
-        phi = np.empty(self.N,dtype=float)
-        mu_hat = np.empty(self.N,dtype=float)
-        sigma2_hat = np.empty(self.N,dtype=float)
+        z = np.empty(self.num_data,dtype=float)
+        self.Z_hat = np.empty(self.num_data,dtype=float)
+        phi = np.empty(self.num_data,dtype=float)
+        mu_hat = np.empty(self.num_data,dtype=float)
+        sigma2_hat = np.empty(self.num_data,dtype=float)
 
         #Approximation
         epsilon_np1 = self.epsilon + 1.
@@ -127,7 +128,7 @@ class EP(likelihood):
         self.np1 = [self.tau_tilde.copy()]
         self.np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.random.permutation(self.N)
+            update_order = np.random.permutation(self.num_data)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
@@ -145,13 +146,13 @@ class EP(likelihood):
                 self.iterations += 1
             #Sigma recomptutation with Cholesky decompositon
             Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
-            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
+            B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
             L = jitchol(B)
             V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
             Sigma = K - np.dot(V.T,V)
             mu = np.dot(Sigma,self.v_tilde)
-            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
-            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
+            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
+            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
             self.np1.append(self.tau_tilde.copy())
             self.np2.append(self.v_tilde.copy())
 
@@ -199,7 +200,7 @@ class EP(likelihood):
         Sigma = Diag + P*R.T*R*P.T + K
         mu = w + P*Gamma
         """
-        mu = np.zeros(self.N)
+        mu = np.zeros(self.num_data)
         LLT = Kmm.copy()
         Sigma_diag = Qnn_diag.copy()
 
@@ -209,15 +210,15 @@ class EP(likelihood):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        self.tau_ = np.empty(self.N,dtype=float)
-        self.v_ = np.empty(self.N,dtype=float)
+        self.tau_ = np.empty(self.num_data,dtype=float)
+        self.v_ = np.empty(self.num_data,dtype=float)
 
         #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=float)
-        self.Z_hat = np.empty(self.N,dtype=float)
-        phi = np.empty(self.N,dtype=float)
-        mu_hat = np.empty(self.N,dtype=float)
-        sigma2_hat = np.empty(self.N,dtype=float)
+        z = np.empty(self.num_data,dtype=float)
+        self.Z_hat = np.empty(self.num_data,dtype=float)
+        phi = np.empty(self.num_data,dtype=float)
+        mu_hat = np.empty(self.num_data,dtype=float)
+        sigma2_hat = np.empty(self.num_data,dtype=float)
 
         #Approximation
         epsilon_np1 = 1
@@ -226,7 +227,7 @@ class EP(likelihood):
         np1 = [self.tau_tilde.copy()]
         np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.random.permutation(self.N)
+            update_order = np.random.permutation(self.num_data)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
@@ -255,8 +256,8 @@ class EP(likelihood):
             Sigma_diag = np.sum(V*V,-2)
             Knmv_tilde = np.dot(Kmn,self.v_tilde)
             mu = np.dot(V2.T,Knmv_tilde)
-            epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
-            epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
+            epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
+            epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
             np1.append(self.tau_tilde.copy())
             np2.append(self.v_tilde.copy())
 
@@ -297,9 +298,9 @@ class EP(likelihood):
         Sigma = Diag + P*R.T*R*P.T + K
         mu = w + P*Gamma
         """
-        self.w = np.zeros(self.N)
+        self.w = np.zeros(self.num_data)
         self.Gamma = np.zeros(num_inducing)
-        mu = np.zeros(self.N)
+        mu = np.zeros(self.num_data)
         P = P0.copy()
         R = R0.copy()
         Diag = Diag0.copy()
@@ -312,15 +313,15 @@ class EP(likelihood):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        self.tau_ = np.empty(self.N,dtype=float)
-        self.v_ = np.empty(self.N,dtype=float)
+        self.tau_ = np.empty(self.num_data,dtype=float)
+        self.v_ = np.empty(self.num_data,dtype=float)
 
         #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=float)
-        self.Z_hat = np.empty(self.N,dtype=float)
-        phi = np.empty(self.N,dtype=float)
-        mu_hat = np.empty(self.N,dtype=float)
-        sigma2_hat = np.empty(self.N,dtype=float)
+        z = np.empty(self.num_data,dtype=float)
+        self.Z_hat = np.empty(self.num_data,dtype=float)
+        phi = np.empty(self.num_data,dtype=float)
+        mu_hat = np.empty(self.num_data,dtype=float)
+        sigma2_hat = np.empty(self.num_data,dtype=float)
 
         #Approximation
         epsilon_np1 = 1
@@ -329,7 +330,7 @@ class EP(likelihood):
         self.np1 = [self.tau_tilde.copy()]
         self.np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.random.permutation(self.N)
+            update_order = np.random.permutation(self.num_data)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
@@ -368,8 +369,8 @@ class EP(likelihood):
             self.w = Diag * self.v_tilde
             self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
             mu = self.w + np.dot(P,self.Gamma)
-            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
-            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
+            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
+            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
             self.np1.append(self.tau_tilde.copy())
             self.np2.append(self.v_tilde.copy())
 

From 1581f78412ddf375b01ae3af4d60c80586127d64 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 08:14:25 +0100
Subject: [PATCH 11/17] Updates to sympykern including bug fixes and ability to
 name covariance. Include test for rbf_sympy in kernel tests. Remove
 coregionalization test as it's causing a core dump! Need to chase this up.

---
 GPy/kern/constructors.py         |  45 +++--
 GPy/kern/parts/eq_ode1.py        | 272 ++++++++++++++++++++++---------
 GPy/kern/parts/sympy_helpers.cpp |   3 +-
 GPy/kern/parts/sympykern.py      |  96 +++++++----
 GPy/testing/kernel_tests.py      |   4 +
 GPy/util/datasets.py             |  25 ++-
 6 files changed, 312 insertions(+), 133 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 15a49b77..62ea24f9 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -283,13 +283,13 @@ def Brownian(input_dim, variance=1.):
     part = parts.Brownian.Brownian(input_dim, variance)
     return kern(input_dim, [part])
 
-try:
-    import sympy as sp
-    from sympykern import spkern
-    from sympy.parsing.sympy_parser import parse_expr
-    sympy_available = True
-except ImportError:
-    sympy_available = False
+#try:
+import sympy as sp
+from parts.sympykern import spkern
+from sympy.parsing.sympy_parser import parse_expr
+sympy_available = True
+#except ImportError:
+#    sympy_available = False
 
 if sympy_available:
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
@@ -298,24 +298,35 @@ if sympy_available:
         """
         X = [sp.var('x%i' % i) for i in range(input_dim)]
         Z = [sp.var('z%i' % i) for i in range(input_dim)]
-        rbf_variance = sp.var('rbf_variance',positive=True)
+        variance = sp.var('variance',positive=True)
         if ARD:
-            rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)]
-            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
             dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/2.)
+            f =  variance*sp.exp(-dist/2.)
         else:
-            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+            lengthscale = sp.var('lengthscale',positive=True)
             dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
             dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-        return kern(input_dim, [spkern(input_dim, f)])
+            f =  variance*sp.exp(-dist/(2*lengthscale**2))
+        return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
 
-    def sympykern(input_dim, k):
+    def sympykern(input_dim, k,name=None):
         """
-        A kernel from a symbolic sympy representation
+        A base kernel object, where all the hard work in done by sympy.
+
+        :param k: the covariance function
+        :type k: a positive definite sympy function of x1, z1, x2, z2...
+
+        To construct a new sympy kernel, you'll need to define:
+         - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
+         - that's it! we'll extract the variables from the function k.
+
+        Note:
+         - to handle multiple inputs, call them x1, z1, etc
+         - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
         """
-        return kern(input_dim, [spkern(input_dim, k)])
+        return kern(input_dim, [spkern(input_dim, k,name)])
 del sympy_available
 
 def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/parts/eq_ode1.py
index 6fd5fb91..70e3c49d 100644
--- a/GPy/kern/parts/eq_ode1.py
+++ b/GPy/kern/parts/eq_ode1.py
@@ -120,14 +120,75 @@ class Eq_ode1(Kernpart):
             t2_mat = self._t2[None, self._rorder2]
             target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
 
-
-
     def Kdiag(self,index,target):
         #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
         pass
     
-    def dK_dtheta(self,dL_dK,index,index2,target):
-        pass
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        
+        # First extract times and indices.
+        self._extract_t_indices(X, X2, dL_dK=dL_dK)
+        self._dK_ode_dtheta(target)
+        
+
+    def _dK_ode_dtheta(self, target):
+        """Do all the computations for the ode parts of the covariance function."""
+        t_ode = self._t[self._index>0]
+        dL_dK_ode = self._dL_dK[self._index>0, :]
+        index_ode = self._index[self._index>0]-1
+        if self._t2 is None:
+            if t_ode.size==0:
+                return        
+            t2_ode = t_ode
+            dL_dK_ode = dL_dK_ode[:, self._index>0]
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            dL_dK_ode = dL_dK_ode[:, self._index2>0]
+            if t_ode.size==0 or t2_ode.size==0:
+                return
+            index2_ode = self._index2[self._index2>0]-1
+
+        h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
+        #self._dK_ddelay = self._dh_ddelay
+        self._dK_dsigma = self._dh_dsigma
+
+        if self._t2 is None:
+            h2 = h1
+        else:
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
+
+        #self._dK_ddelay += self._dh_ddelay.T
+        self._dK_dsigma += self._dh_dsigma.T
+        # C1 = self.sensitivity
+        # C2 = self.sensitivity
+
+        # K = 0.5 * (h1 + h2.T)
+        # var2 = C1*C2
+        # if self.is_normalized:
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
+        #     dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
+        #     dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
+        #     dk_dC1 = C2 * sum(sum(dL_dK.*K))
+        #     dk_dC2 = C1 * sum(sum(dL_dK.*K))
+        # else:
+        #     K = np.sqrt(np.pi) * K
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
+        #     dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
+
+
+        # dk_dSim1Variance = dk_dC1
+        # Last element is the length scale.
+        (dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
+
+        target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
+
+
+        # # only pass the gradient with respect to the inverse width to one
+        # # of the gradient vectors ... otherwise it is counted twice.
+        # g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
+        # g2 = real([dk_dD2 0 dk_dSim2Variance])
+        # return g1, g2"""
 
     def dKdiag_dtheta(self,dL_dKdiag,index,target):
         pass
@@ -135,7 +196,7 @@ class Eq_ode1(Kernpart):
     def dK_dX(self,dL_dK,X,X2,target):
         pass
 
-    def _extract_t_indices(self, X, X2=None):
+    def _extract_t_indices(self, X, X2=None, dL_dK=None):
         """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
 
         # TODO: some fast checking here to see if this needs recomputing?
@@ -153,6 +214,7 @@ class Eq_ode1(Kernpart):
         if X2 is None:
             self._t2 = None
             self._index2 = None
+            self._order2 = self._order
             self._rorder2 = self._rorder
         else:
             if not X2.shape[1] == 2:
@@ -164,6 +226,10 @@ class Eq_ode1(Kernpart):
             self._t2 = self._t2[self._order2]
             self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
 
+        if dL_dK is not None:
+            self._dL_dK = dL_dK[self._order, :]
+            self._dL_dK = self._dL_dK[:, self._order2]
+            
     def _K_computations(self, X, X2):
         """Perform main body of computations for the ode1 covariance function."""
         # First extract times and indices.
@@ -279,16 +345,16 @@ class Eq_ode1(Kernpart):
         decay_vals = self.decay[index_ode][:, None]
         half_sigma_d_i = 0.5*self.sigma*decay_vals
 
-        if self.is_stationary == False:
-            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
-        else:
+        if self.is_stationary:
             ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        else:
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
         sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
 
         sK *= 0.5
 
         if not self.is_normalized:
-            sK *= sqrt(pi)*self.sigma
+            sK *= np.sqrt(np.pi)*self.sigma
 
 
         if transpose:
@@ -315,24 +381,84 @@ class Eq_ode1(Kernpart):
             index2_ode = self._index2[self._index2>0]-1
         
         # When index is identical
-        if self.is_stationary:
-            h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode)
-        else:
-            h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode)
+        h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
 
         if self._t2 is None:
             self._K_ode = 0.5 * (h + h.T)
         else:
-            if self.is_stationary:
-                h2 = self._compute_H_stat(t2_ode, index2_ode, t_ode, index_ode)
-            else:
-                h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode)                
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)                
             self._K_ode = 0.5 * (h + h2.T)
 
         if not self.is_normalized:
             self._K_ode *= np.sqrt(np.pi)*self.sigma
+    def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
+        """Helper function for computing H for the diagonal only.
+        :param t: time input.
+        :type t: array
+        :param index: first output indices
+        :type index: array of int.
+        :param index: second output indices
+        :type index: array of int.
+        :param update_derivatives: whether or not to update the derivative portions (default False).
+        :type update_derivatives: bool
+        :param stationary: whether to compute the stationary version of the covariance (default False).
+        :type stationary: bool"""
+
+        """if delta_i~=delta_j:
+            [h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
+        else:
+            Decay = self.decay[index]
+            if self.delay is not None:
+                t = t - self.delay[index]
+            
+            t_squared = t*t
+            half_sigma_decay = 0.5*self.sigma*Decay
+            [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
+                                              half_sigma_decay)
     
-    def _compute_H(self, t, index, t2, index2, update_derivatives=False):
+            [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
+                                              half_sigma_decay - t/self.sigma)
+            
+            h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+                             + ln_part_1
+                             - log(Decay + D_j)) 
+                 - sign2*np.exp(half_sigma_decay*half_sigma_decay
+                                - (Decay + D_j)*t
+                                + ln_part_2 
+                                - log(Decay + D_j)))
+    
+            sigma2 = self.sigma*self.sigma
+
+        if update_derivatives:
+        
+            dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h 
+                       + t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
+                )
+                       + self.sigma/np.sqrt(np.pi)*
+                       (-1 + np.exp(-t_squared/sigma2-Decay*t)
+                        + np.exp(-t_squared/sigma2-D_j*t)
+                        - np.exp(-(Decay + D_j)*t)))
+        
+            dh_dD_i = (dh_dD_i/(Decay+D_j)).real
+        
+        
+        
+            dh_dD_j = (t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
+                )
+                       -h)
+            dh_dD_j = (dh_dD_j/(Decay + D_j)).real
+
+            dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+                        + 2/(np.sqrt(np.pi)*(Decay+D_j))\
+                        *((-Decay/2) \
+                          + (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
+                          - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
+                          - Decay/2*np.exp(-(Decay+D_j)*t))"""
+        pass
+    
+    def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
         """Helper function for computing part of the ode1 covariance function.
 
         :param t: first time input.
@@ -348,44 +474,16 @@ class Eq_ode1(Kernpart):
         :rtype: ndarray
 """
 
-
+        if stationary:
+            raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
         # Vector of decays and delays associated with each output.
-        Decay = np.zeros(t.shape[0])
-        Decay2 = np.zeros(t2.shape[0])
-        Delay = np.zeros(t.shape[0])
-        Delay2 = np.zeros(t2.shape[0])
-        code="""
-        for(int i=0;i<N; i++){
-          Decay[i] = decay[index[i]];
-          }
-        for(int i=0; i<N2; i++){
-          Decay2[i] = decay[index2[i]];
-          }
-        """
-        decay = self.decay
-        N, N2 = index.size, index2.size
-        weave.inline(code,['index', 'index2',
-                           'Decay', 'Decay2',
-                           'decay',
-                           'N', 'N2'])
+        Decay = self.decay[index]
+        Decay2 = self.decay[index2]
         t_mat = t[:, None]
         t2_mat = t2[None, :]
         if self.delay is not None:
-            code="""
-            for(int i=0;i<N; i++){
-              Delay[i] = delay[index[i]];
-              }
-            for(int i=0; i<N2; i++){
-              Delay2[i] = delay[index2[i]];
-              }
-            """
-            delay=self.delay
-            N, N2 = index.size, index2.size
-            weave.inline(code,['index', 'index2',
-                               'Delay', 'Delay2', 
-                               'delay', 
-                               'N', 'N2'])
-
+            Delay = self.delay[index]
+            Delay2 = self.delay[index2]
             t_mat-=Delay[:, None]
             t2_mat-=Delay2[None, :]
 
@@ -408,31 +506,51 @@ class Eq_ode1(Kernpart):
                           -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
                           -np.log(Decay[:, None] + Decay2[None, :]))
 
-        return h
-        # if update_derivatives:
-        #     sigma2 = self.sigma*self.sigma
-        #     # Update ith decay gradient
-        #     dh_ddecay += (0.5*self.decay[i]*sigma2*(self.decay[i] + decay[j])-1)*h
-        #     + (-diff_t*sign1*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*diff_t+ln_part_1) 
-        #        +t_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*t_mat - decay[j]*t2_mat+ln_part_2)) ...
-        #     +self.sigma/sqrt(pi)*(-np.exp(-diff_t*diff_t/sigma2)
-        #                           +np.exp(-t2_mat*t2_mat/sigma2-self.decay[i]*t_mat) 
-        #                           +np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) ...
-        #                           -np.exp(-(self.decay[i]*t_mat + decay[j]*t2_mat)))
-        #     self._dh_ddecay[i] += real(dh_ddecay/(self.decay[i]+decay[j]))
+        if update_derivatives:
+            sigma2 = self.sigma*self.sigma
+            # Update ith decay gradient
 
-        #     # Update jth decay gradient
-        #     dh_ddecay = t2_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-(self.decay[i]*t_mat + decay[j]*t2_mat)+ln_part_2)-h
-        #     self._dh_ddecay[j] += real(dh_ddecay/(self.decay[i] + decay[j]))
+            dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+                         + (-diff_t*sign1*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
+                )
+                            +t_mat*sign2*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
+                - Decay2*t2_mat+ln_part_2))
+                         +self.sigma/np.sqrt(np.pi)*(
+                -np.exp(
+                -diff_t*diff_t/sigma2
+                )+np.exp(
+                -t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
+                )+np.exp(
+                -t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
+                )-np.exp(
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                )
+                ))
+            self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
             
-        #     # Update sigma gradient
-        #     self._dh_dsigma += 0.5*self.decay[i]*self.decay[i]*self.sigma*h + 2/(np.sqrt(np.pi)*(self.decay[i]+decay[j]))*((-diff_t/sigma2-self.decay[i]/ 
-        #                                                                                                                     2)*np.exp(-diff_t*
-        #                                                                                                                               diff_t/sigma2) 
-        #                                                                                                                    + (-t2_mat/sigma2+self.decay[i]/2)
-        #                                                                                                                    *np.exp(-t2_mat*t2_mat/sigma2 
-        #                                                                                                                            -self.decay[i]*t_mat) 
-        #                                                                                                                    - (-t_mat/sigma2-self.decay[i]/2) 
-        #                                                                                                                    *np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) 
-        #                                                                                                                    - self.decay[i]/2*np.exp(-(self.decay[i]*t_mat+decay[j]*t2_mat)))
+            # Update jth decay gradient
+            dh_ddecay2 = (t2_mat*sign2
+                         *np.exp(
+                half_sigma_decay_i*half_sigma_decay_i
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                +ln_part_2
+                )
+                         -h)
+            self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
+            
+            # Update sigma gradient
+            self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+                               + 2/(np.sqrt(np.pi)
+                                    *(Decay[:, None]+Decay2[None, :]))
+                               *((-diff_t/sigma2-Decay[:, None]/2)
+                                 *np.exp(-diff_t*diff_t/sigma2)
+                                 + (-t2_mat/sigma2+Decay[:, None]/2)
+                                 *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) 
+                                 - (-t_mat/sigma2-Decay[:, None]/2) 
+                                 *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) 
+                                 - Decay[:, None]/2
+                                 *np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
                 
+        return h
diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp
index 2af4737a..ec8ac87b 100644
--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@@ -1,6 +1,7 @@
 #include <math.h>
 double DiracDelta(double x){
-    if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
+  // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
+    if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
         return 1.0;
     else
         return 0.0;
diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py
index def1bc5f..9755e37b 100644
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@@ -26,8 +26,11 @@ class spkern(Kernpart):
      - to handle multiple inputs, call them x1, z1, etc
      - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
     """
-    def __init__(self,input_dim,k,param=None):
-        self.name='sympykern'
+    def __init__(self,input_dim,k,name=None,param=None):
+        if name is None:
+            self.name='sympykern'
+        else:
+            self.name = name
         self._sp_k = k
         sp_vars = [e for e in k.atoms() if e.is_Symbol]
         self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
@@ -56,9 +59,9 @@ class spkern(Kernpart):
 
         self.weave_kwargs = {\
             'support_code':self._function_code,\
-            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
+            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
             'headers':['"sympy_helpers.h"'],\
-            'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
+            'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
             #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
             'extra_compile_args':[],\
             'extra_link_args':['-lgomp'],\
@@ -109,14 +112,15 @@ class spkern(Kernpart):
         f.write(self._function_header)
         f.close()
 
-        #get rid of derivatives of DiracDelta
+        # Substitute any known derivatives which sympy doesn't compute
         self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
 
-        #Here's some code to do the looping for K
-        arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
-                + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
-                + ["param[%i]"%i for i in range(self.num_params)])
+        # Here's the code to do the looping for K
+        arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
+                            + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
+                            + ["param[%i]"%i for i in range(self.num_params)])
 
+        
         self._K_code =\
         """
         int i;
@@ -133,9 +137,14 @@ class spkern(Kernpart):
         %s
         """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
+        # Similar code when only X is provided. 
+        self._K_code_X = self._K_code.replace('Z[', 'X[')
+
+        
+        # Code to compute diagonal of covariance.
         diag_arglist = re.sub('Z','X',arglist)
         diag_arglist = re.sub('j','i',diag_arglist)
-        #Here's some code to do the looping for Kdiag
+        # Code to do the looping for Kdiag
         self._Kdiag_code =\
         """
         int i;
@@ -148,8 +157,9 @@ class spkern(Kernpart):
         %s
         """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
-        #here's some code to compute gradients
+        # Code to compute gradients
         funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
+
         self._dK_dtheta_code =\
         """
         int i;
@@ -164,9 +174,12 @@ class spkern(Kernpart):
             }
         }
         %s
-        """%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+        """%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
 
-        #here's some code to compute gradients for Kdiag TODO: thius is yucky.
+        # Similar code when only X is provided, change argument lists.
+        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
+
+        # Code to compute gradients for Kdiag TODO: needs clean up
         diag_funclist = re.sub('Z','X',funclist,count=0)
         diag_funclist = re.sub('j','i',diag_funclist)
         diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
@@ -181,8 +194,12 @@ class spkern(Kernpart):
         %s
         """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
 
-        #Here's some code to do gradients wrt x
+        # Code for gradients wrt X
         gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
+        if False:
+            gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
+            if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
+
         self._dK_dX_code = \
         """
         int i;
@@ -192,30 +209,34 @@ class spkern(Kernpart):
         int input_dim = X_array->dimensions[1];
         //#pragma omp parallel for private(j)
         for (i=0;i<N; i++){
-            for (j=0; j<num_inducing; j++){
-                %s
-                //if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
-                //if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
-
-            }
+          for (j=0; j<num_inducing; j++){
+            %s
+          }
         }
         %s
         """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+  
+        # Create code for call when just X is passed as argument.
+        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
 
-        #now for gradients of Kdiag wrt X
+        diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
+        diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
+        diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
+
+        # Code for gradients of Kdiag wrt X
         self._dKdiag_dX_code= \
         """
-        int i;
-        int j;
         int N = partial_array->dimensions[0];
-        int num_inducing = 0;
         int input_dim = X_array->dimensions[1];
-        for (i=0;i<N; i++){
-            j = i;
+        for (int i=0;i<N; i++){
             %s
         }
         %s
-        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+        """%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
+        # string representation forces recompile when needed Get rid
+        # of Zs in argument for diagonal. TODO: Why wasn't
+        # diag_funclist called here? Need to check that.
+        #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
 
 
         #TODO: insert multiple functions here via string manipulation
@@ -223,7 +244,10 @@ class spkern(Kernpart):
 
     def K(self,X,Z,target):
         param = self._param
-        weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
+        else:
+            weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
 
     def Kdiag(self,X,target):
         param = self._param
@@ -231,21 +255,25 @@ class spkern(Kernpart):
 
     def dK_dtheta(self,partial,X,Z,target):
         param = self._param
-        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
+        else:
+            weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
 
     def dKdiag_dtheta(self,partial,X,target):
         param = self._param
-        Z = X
-        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
 
     def dK_dX(self,partial,X,Z,target):
         param = self._param
-        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
+        else:
+            weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
 
     def dKdiag_dX(self,partial,X,target):
         param = self._param
-        Z = X
-        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
 
     def _set_params(self,param):
         #print param.flags['C_CONTIGUOUS']
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index e67649f4..7f59e37c 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -21,6 +21,10 @@ class KernelTests(unittest.TestCase):
         kern = GPy.kern.rbf(5)
         self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
+    def test_rbf_sympykernel(self):
+        kern = GPy.kern.rbf_sympy(5)
+        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+
     def test_rbf_invkernel(self):
         kern = GPy.kern.rbf_inv(5)
         self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 1164d7e6..79bc3fc3 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -7,9 +7,21 @@ import urllib as url
 import zipfile
 import tarfile
 import datetime
-    
+
+ipython_notebook = False
+if ipython_notebook:
+    import IPython.core.display
+    def ipynb_input(varname, prompt=''):
+        """Prompt user for input and assign string val to given variable name."""
+        js_code = ("""
+            var value = prompt("{prompt}","");
+            var py_code = "{varname} = '" + value + "'";
+            IPython.notebook.kernel.execute(py_code);
+        """).format(prompt=prompt, varname=varname)
+        return IPython.core.display.Javascript(js_code)
 
 import sys, urllib
+
 def reporthook(a,b,c): 
     # ',' at the end of the line is important!
     #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
@@ -130,14 +142,18 @@ The database was created with funding from NSF EIA-0196217.""",
                                         'license' : None,
                                         'size' : 24229368},
                   }
-                  
+
+
 def prompt_user():
     """Ask user for agreeing to data set licenses."""
     # raw_input returns the empty string for "enter"
     yes = set(['yes', 'y'])
     no = set(['no','n'])
-
-    choice = raw_input().lower()
+    choice = ''
+    if ipython_notebook:
+        ipynb_input(choice, prompt='provide your answer here')
+    else:
+        choice = raw_input().lower()
     if choice in yes:
         return True
     elif choice in no:
@@ -146,6 +162,7 @@ def prompt_user():
         sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
         return prompt_user()
 
+
 def data_available(dataset_name=None):
     """Check if the data set is available on the local machine already."""
     for file_list in data_resources[dataset_name]['files']:

From eee597f9ab3370704312aff7c9493213d1f23ae4 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 08:16:25 +0100
Subject: [PATCH 12/17] Remove coregionalization test as it's causing a core
 dump! Need to chase this up.

---
 GPy/testing/kernel_tests.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 7f59e37c..87d4a20e 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -83,19 +83,19 @@ class KernelTests(unittest.TestCase):
         kern = GPy.kern.poly(5, degree=4)
         self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
-    def test_coregionalization(self):
-        X1 = np.random.rand(50,1)*8
-        X2 = np.random.rand(30,1)*5
-        index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
-        X = np.hstack((np.vstack((X1,X2)),index))
-        Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
-        Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
-        Y = np.vstack((Y1,Y2))
+    # def test_coregionalization(self):
+    #     X1 = np.random.rand(50,1)*8
+    #     X2 = np.random.rand(30,1)*5
+    #     index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
+    #     X = np.hstack((np.vstack((X1,X2)),index))
+    #     Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
+    #     Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
+    #     Y = np.vstack((Y1,Y2))
 
-        k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
-        k2 = GPy.kern.coregionalize(2,1)
-        kern = k1**k2
-        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+    #     k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
+    #     k2 = GPy.kern.coregionalize(2,1)
+    #     kern = k1**k2
+    #     self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
 
 if __name__ == "__main__":

From b815dd98d1f82ea99980cb06e73542fce8c90eb4 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 08:23:25 +0100
Subject: [PATCH 13/17] Change to criterion on positive definite check
 (epsilon*10 instead of epsilon).

---
 GPy/kern/kern.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index cf36e16c..5a8882dd 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -578,7 +578,7 @@ class Kern_check_model(Model):
 
     def is_positive_definite(self):
         v = np.linalg.eig(self.kernel.K(self.X))[0]
-        if any(v<-sys.float_info.epsilon):
+        if any(v<-10*sys.float_info.epsilon):
             return False
         else:
             return True

From 7a5c5649c38de28e358c88939caa8280d099c5d3 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 13:35:39 +0100
Subject: [PATCH 14/17] Replaced check for sympy in constructors.py.

---
 GPy/kern/constructors.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 62ea24f9..ca349b59 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -283,15 +283,16 @@ def Brownian(input_dim, variance=1.):
     part = parts.Brownian.Brownian(input_dim, variance)
     return kern(input_dim, [part])
 
-#try:
-import sympy as sp
-from parts.sympykern import spkern
-from sympy.parsing.sympy_parser import parse_expr
-sympy_available = True
-#except ImportError:
-#    sympy_available = False
+try:
+    import sympy as sp
+    sympy_available = True
+except ImportError:
+    sympy_available = False
 
 if sympy_available:
+    from parts.sympykern import spkern
+    from sympy.parsing.sympy_parser import parse_expr
+
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
         """
         Radial Basis Function covariance.

From 0d4fd01ae1b174ae09aaa75c10a79dfa1e2184cf Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 17:00:38 +0100
Subject: [PATCH 15/17] Minor changes to della_gatta example (multiple optima).

---
 GPy/examples/regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index c6dc1d9f..888a01d9 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -132,7 +132,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
     length_scales = np.linspace(0.1, 60., resolution)
     log_SNRs = np.linspace(-3., 4., resolution)
 
-    data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
+    data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
     # data['Y'] = data['Y'][0::2, :]
     # data['X'] = data['X'][0::2, :]
 

From b7f88991aff3fe7411e24ca8a1594ecdfeff8e60 Mon Sep 17 00:00:00 2001
From: mu <mu@mu-DQ67SW.(none)>
Date: Mon, 30 Sep 2013 17:17:38 +0100
Subject: [PATCH 16/17] testing ODE

---
 GPy/kern/constructors.py   |  20 +++++
 GPy/kern/parts/ODE_1.py    | 161 +++++++++++++++++++++++++++++++++++++
 GPy/kern/parts/__init__.py |   1 +
 GPy/kern/parts/odekern1.c  |  38 +++++++++
 4 files changed, 220 insertions(+)
 create mode 100644 GPy/kern/parts/ODE_1.py
 create mode 100644 GPy/kern/parts/odekern1.c

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 046b0205..a61f46b4 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -456,3 +456,23 @@ def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa
         kernel += k**k_coreg.copy()
 
     return kernel
+
+def ODE_1(input_dim=1, varianceU=1.,  varianceY=1., lengthscaleU=None,  lengthscaleY=None):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+    part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
+    return kern(input_dim, [part])
\ No newline at end of file
diff --git a/GPy/kern/parts/ODE_1.py b/GPy/kern/parts/ODE_1.py
new file mode 100644
index 00000000..416278e3
--- /dev/null
+++ b/GPy/kern/parts/ODE_1.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+
+class ODE_1(Kernpart):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP  (sqrt(3)/lengthscaleU)
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+    def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
+        assert input_dim==1, "Only defined for input_dim = 1"
+        self.input_dim = input_dim
+        self.num_params = 4
+        self.name = 'ODE_1'
+        if lengthscaleU is not None:
+            lengthscaleU = np.asarray(lengthscaleU)
+            assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
+        else:
+            lengthscaleU = np.ones(1)
+        if lengthscaleY is not None:
+            lengthscaleY = np.asarray(lengthscaleY)
+            assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
+        else:
+            lengthscaleY = np.ones(1)
+            #lengthscaleY = 0.5
+        self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
+
+    def _get_params(self):
+        """return the value of the parameters."""
+        return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
+
+    def _set_params(self, x):
+        """set the value of the parameters."""
+        assert x.size == self.num_params
+        self.varianceU = x[0]
+        self.varianceY = x[1]
+        self.lengthscaleU = x[2]
+        self.lengthscaleY = x[3]
+
+    def _get_param_names(self):
+        """return parameter names."""
+        return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
+
+
+    def K(self, X, X2, target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+       # i1 = X[:,1]
+       # i2 = X2[:,1]
+       # X = X[:,0].reshape(-1,1)
+       # X2 = X2[:,0].reshape(-1,1)
+        dist = np.abs(X - X2.T)
+        
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+        
+        k1 = (2*lu+ly)/(lu+ly)**2
+        k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 
+        k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def dK_dtheta(self, dL_dK, X, X2, target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        if X2 is None: X2 = X
+        dist = np.abs(X - X2.T)
+
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
+        #c=np.sqrt(3)
+        #t1=c/lu
+        #t2=1/ly
+        #dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3   )
+        
+        dk2theta1 = 1*( 
+            np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) 
+            +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) 
+            +np.exp(-dist*ly)*2*(ly-lu)**(-2)
+            +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
+            )
+      
+        dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
+
+        dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
+
+
+
+
+        dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * (  (-dist)*(2*lu+ly)  +  1  +  (-2)*(2*lu+ly)/(lu+ly)  )
+
+        dk2theta2 = 1*(
+            np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+            +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
+            )
+
+        dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
+
+        dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
+
+
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+        dkdvar = k1+k2+k3
+
+        target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
+        target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
+        target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
+        target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
+
+
+    # def dKdiag_dtheta(self, dL_dKdiag, X, target):
+    #     """derivative of the diagonal of the covariance matrix with respect to the parameters."""
+    #     # NB: derivative of diagonal elements wrt lengthscale is 0
+    #     target[0] += np.sum(dL_dKdiag)
+
+    # def dK_dX(self, dL_dK, X, X2, target):
+    #     """derivative of the covariance matrix with respect to X."""
+    #     if X2 is None: X2 = X
+    #     dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
+    #     ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
+    #     dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
+    #     target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
+
+    # def dKdiag_dX(self, dL_dKdiag, X, target):
+    #     pass
diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index 643483f5..bdb60ce5 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -12,6 +12,7 @@ import linear
 import Matern32
 import Matern52
 import mlp
+import ODE_1
 import periodic_exponential
 import periodic_Matern32
 import periodic_Matern52
diff --git a/GPy/kern/parts/odekern1.c b/GPy/kern/parts/odekern1.c
new file mode 100644
index 00000000..5aecf164
--- /dev/null
+++ b/GPy/kern/parts/odekern1.c
@@ -0,0 +1,38 @@
+#include <math.h> 
+
+ double k_uu(t1,t2,theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
+
+ return kern;
+ }
+
+
+
+ double k_yy(t1, t2, theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*sig2 * (  exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
+  	exp(-dist)  ) / ((theta2-theta1)*(theta2-theta1))
+
+  return kern;
+ } 
+
+
+
+
+
+
+	
+
+
+

From e06d889d3577870d5bcd1c96650c6fdd109ac75a Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 30 Sep 2013 22:51:07 +0100
Subject: [PATCH 17/17] Added capability for sinc covariance via sympy kernel.

---
 GPy/kern/constructors.py         | 24 +++++++++++++++++++++++-
 GPy/kern/parts/sympy_helpers.cpp | 14 ++++++++++++++
 GPy/kern/parts/sympy_helpers.h   |  3 +++
 GPy/util/symbolic.py             | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 GPy/util/symbolic.py

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index ca349b59..98d88107 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -292,7 +292,8 @@ except ImportError:
 if sympy_available:
     from parts.sympykern import spkern
     from sympy.parsing.sympy_parser import parse_expr
-
+    from GPy.util.symbolic import sinc
+    
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
         """
         Radial Basis Function covariance.
@@ -312,6 +313,27 @@ if sympy_available:
             f =  variance*sp.exp(-dist/(2*lengthscale**2))
         return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
 
+    def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
+        """
+        TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
+        sinc covariance funciton
+        """
+        X = [sp.var('x%i' % i) for i in range(input_dim)]
+        Z = [sp.var('z%i' % i) for i in range(input_dim)]
+        variance = sp.var('variance',positive=True)
+        if ARD:
+            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            dist = parse_expr(dist_string)
+            f =  variance*sinc(sp.pi*sp.sqrt(dist))
+        else:
+            lengthscale = sp.var('lengthscale',positive=True)
+            dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
+            dist = parse_expr(dist_string)
+            f =  variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
+            
+        return kern(input_dim, [spkern(input_dim, f, name='sinc')])
+
     def sympykern(input_dim, k,name=None):
         """
         A base kernel object, where all the hard work in done by sympy.
diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp
index ec8ac87b..76dba4eb 100644
--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@@ -9,3 +9,17 @@ double DiracDelta(double x){
 double DiracDelta(double x,int foo){
     return 0.0;
 };
+
+double sinc(double x){
+  if (x==0)
+    return 1.0;
+  else 
+    return sin(x)/x;
+}
+
+double sinc_grad(double x){
+  if (x==0)
+    return 0.0;
+  else 
+    return (x*cos(x) - sin(x))/(x*x);
+}
diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/parts/sympy_helpers.h
index 29244eca..d5b495ca 100644
--- a/GPy/kern/parts/sympy_helpers.h
+++ b/GPy/kern/parts/sympy_helpers.h
@@ -1,3 +1,6 @@
 #include <math.h>
 double DiracDelta(double x);
 double DiracDelta(double x, int foo);
+
+double sinc(double x);
+double sinc_grad(double x);
diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py
new file mode 100644
index 00000000..f4f5fda0
--- /dev/null
+++ b/GPy/util/symbolic.py
@@ -0,0 +1,32 @@
+from sympy import Function, S, oo, I, cos, sin
+
+
+class sinc_grad(Function):
+    nargs = 1
+    
+    def fdiff(self, argindex=1):
+        return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
+    
+    @classmethod
+    def eval(cls, x):
+        if x is S.Zero:
+            return S.Zero
+        else:
+            return (x*cos(x) - sin(x))/(x*x)
+            
+class sinc(Function):
+    
+    nargs = 1
+    
+    def fdiff(self, argindex=1):
+        return sinc_grad(self.args[0])
+    
+    @classmethod
+    def eval(cls, x):
+        if x is S.Zero:
+            return S.One
+        else:
+            return sin(x)/x
+    
+    def _eval_is_real(self):
+        return self.args[0].is_real