Merge branch 'devel' into params

2026-04-30 23:36:23 +02:00 · 2013-10-07 08:20:29 +01:00 · 2013-10-07 08:20:29 +01:00 · 4f56506aa6
commit 4f56506aa6
parent 857f9c0b6e a0b58020a4
60 changed files with 1944 additions and 596 deletions
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -17,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
    :type lengthscale: float
    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
    :type ARD: Boolean
+
    """
    part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
    return kern(input_dim, [part])
@ -33,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
    :type lengthscale: float
    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
    :type ARD: Boolean
+
    """
    part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
    return kern(input_dim, [part])
@ -41,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
    """
     Construct a linear kernel.

-     Arguments
-     ---------
-    input_dimD (int), obligatory
-     variances (np.ndarray)
-     ARD (boolean)
+    :param input_dim: dimensionality of the kernel, obligatory
+    :type input_dim: int
+    :param variances:
+    :type variances: np.ndarray
+    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
+    :type ARD: Boolean
+
    """
    part = parts.linear.Linear(input_dim,variances,ARD)
    return kern(input_dim, [part])
@ -64,39 +68,42 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
    :type bias_variance: float
    :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
    :type ARD: Boolean
+
    """
    part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
    return kern(input_dim, [part])

 def gibbs(input_dim,variance=1., mapping=None):
    """
+
    Gibbs and MacKay non-stationary covariance function.

    .. math::

-       r = sqrt((x_i - x_j)'*(x_i - x_j))
+       r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}

-       k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
+       k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))

-       Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
+       Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}

-       where :math:`l(x)` is a function giving the length scale as a function of space.
-       This is the non stationary kernel proposed by Mark Gibbs in his 1997
-        thesis. It is similar to an RBF but has a length scale that varies
-        with input location. This leads to an additional term in front of
-        the kernel.
+    Where :math:`l(x)` is a function giving the length scale as a function of space.

-        The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
+    This is the non stationary kernel proposed by Mark Gibbs in his 1997
+    thesis. It is similar to an RBF but has a length scale that varies
+    with input location. This leads to an additional term in front of
+    the kernel.

-        :param input_dim: the number of input dimensions
-        :type input_dim: int
-        :param variance: the variance :math:`\sigma^2`
-        :type variance: float
-        :param mapping: the mapping that gives the lengthscale across the input space.
-        :type mapping: GPy.core.Mapping
-        :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
-        :type ARD: Boolean
-        :rtype: Kernpart object
+    The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
+
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param variance: the variance :math:`\\sigma^2`
+    :type variance: float
+    :param mapping: the mapping that gives the lengthscale across the input space.
+    :type mapping: GPy.core.Mapping
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
+    :type ARD: Boolean
+    :rtype: Kernpart object

    """
    part = parts.gibbs.Gibbs(input_dim,variance,mapping)
@ -124,6 +131,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
    :type degree: int
    :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
    :type ARD: Boolean
+
    """
    part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
    return kern(input_dim, [part])
@ -132,14 +140,42 @@ def white(input_dim,variance=1.):
    """
     Construct a white kernel.

-     Arguments
-     ---------
-    input_dimD (int), obligatory
-     variance (float)
+    :param input_dim: dimensionality of the kernel, obligatory
+    :type input_dim: int
+    :param variance: the variance of the kernel
+    :type variance: float
+
    """
    part = parts.white.White(input_dim,variance)
    return kern(input_dim, [part])

+def eq_ode1(output_dim, W=None, rank=1,  kappa=None, length_scale=1., decay=None, delay=None):
+    """Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
+
+    This outputs of this kernel have the form
+    .. math::
+       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+
+    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
+    
+    :param output_dim: number of outputs driven by latent function.
+    :type output_dim: int
+    :param W: sensitivities of each output to the latent driving function. 
+    :type W: ndarray (output_dim x rank).
+    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
+    :type rank: int
+    :param decay: decay rates for the first order system. 
+    :type decay: array of length output_dim.
+    :param delay: delay between latent force and output response.
+    :type delay: array of length output_dim.
+    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
+    :type kappa: array of length output_dim.
+    
+    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
+    """
+    part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
+    return kern(2, [part])
+

 def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
    """
@ -153,6 +189,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
    :type lengthscale: float
    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
    :type ARD: Boolean
+
    """
    part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
    return kern(input_dim, [part])
@ -169,6 +206,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
    :type lengthscale: float
    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
    :type ARD: Boolean
+
    """
    part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
    return kern(input_dim, [part])
@ -185,6 +223,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
    :type lengthscale: float
    :param ARD: Auto Relevance Determination (one lengthscale per dimension)
    :type ARD: Boolean
+
    """
    part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
    return kern(input_dim, [part])
@ -193,10 +232,11 @@ def bias(input_dim, variance=1.):
    """
     Construct a bias kernel.

-     Arguments
-     ---------
-     input_dim (int), obligatory
-     variance (float)
+    :param input_dim: dimensionality of the kernel, obligatory
+    :type input_dim: int
+    :param variance: the variance of the kernel
+    :type variance: float
+
    """
    part = parts.bias.Bias(input_dim, variance)
    return kern(input_dim, [part])
@ -204,10 +244,15 @@ def bias(input_dim, variance=1.):
 def finite_dimensional(input_dim, F, G, variances=1., weights=None):
    """
    Construct a finite dimensional kernel.
-    input_dim: int - the number of input dimensions
-    F: np.array of functions with shape (n,) - the n basis functions
-    G: np.array with shape (n,n) - the Gram matrix associated to F
-    variances : np.ndarray with shape (n,)
+
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param F: np.array of functions with shape (n,) - the n basis functions
+    :type F: np.array
+    :param G: np.array with shape (n,n) - the Gram matrix associated to F
+    :type G: np.array
+    :param variances: np.ndarray with shape (n,)
+    :type: np.ndarray
    """
    part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
    return kern(input_dim, [part])
@ -220,6 +265,7 @@ def spline(input_dim, variance=1.):
    :type input_dim: int
    :param variance: the variance of the kernel
    :type variance: float
+
    """
    part = parts.spline.Spline(input_dim, variance)
    return kern(input_dim, [part])
@ -232,43 +278,78 @@ def Brownian(input_dim, variance=1.):
    :type input_dim: int
    :param variance: the variance of the kernel
    :type variance: float
+
    """
    part = parts.Brownian.Brownian(input_dim, variance)
    return kern(input_dim, [part])

 try:
    import sympy as sp
-    from sympykern import spkern
-    from sympy.parsing.sympy_parser import parse_expr
    sympy_available = True
 except ImportError:
    sympy_available = False

 if sympy_available:
+    from parts.sympykern import spkern
+    from sympy.parsing.sympy_parser import parse_expr
+    from GPy.util.symbolic import sinc
+    
    def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
        """
        Radial Basis Function covariance.
        """
        X = [sp.var('x%i' % i) for i in range(input_dim)]
        Z = [sp.var('z%i' % i) for i in range(input_dim)]
-        rbf_variance = sp.var('rbf_variance',positive=True)
+        variance = sp.var('variance',positive=True)
        if ARD:
-            rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)]
-            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/2.)
+            f =  variance*sp.exp(-dist/2.)
        else:
-            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+            lengthscale = sp.var('lengthscale',positive=True)
            dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-        return kern(input_dim, [spkern(input_dim, f)])
+            f =  variance*sp.exp(-dist/(2*lengthscale**2))
+        return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])

-    def sympykern(input_dim, k):
+    def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
        """
-        A kernel from a symbolic sympy representation
+        TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
+        sinc covariance funciton
        """
-        return kern(input_dim, [spkern(input_dim, k)])
+        X = [sp.var('x%i' % i) for i in range(input_dim)]
+        Z = [sp.var('z%i' % i) for i in range(input_dim)]
+        variance = sp.var('variance',positive=True)
+        if ARD:
+            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            dist = parse_expr(dist_string)
+            f =  variance*sinc(sp.pi*sp.sqrt(dist))
+        else:
+            lengthscale = sp.var('lengthscale',positive=True)
+            dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
+            dist = parse_expr(dist_string)
+            f =  variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
+            
+        return kern(input_dim, [spkern(input_dim, f, name='sinc')])
+
+    def sympykern(input_dim, k,name=None):
+        """
+        A base kernel object, where all the hard work in done by sympy.
+
+        :param k: the covariance function
+        :type k: a positive definite sympy function of x1, z1, x2, z2...
+
+        To construct a new sympy kernel, you'll need to define:
+         - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
+         - that's it! we'll extract the variables from the function k.
+
+        Note:
+         - to handle multiple inputs, call them x1, z1, etc
+         - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
+        """
+        return kern(input_dim, [spkern(input_dim, k,name)])
 del sympy_available

 def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
@ -285,6 +366,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
    :type period: float
    :param n_freq: the number of frequencies considered for the periodic subspace
    :type n_freq: int
+
    """
    part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
    return kern(input_dim, [part])
@ -303,6 +385,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
     :type period: float
     :param n_freq: the number of frequencies considered for the periodic subspace
     :type n_freq: int
+
    """
    part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
    return kern(input_dim, [part])
@ -321,6 +404,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
     :type period: float
     :param n_freq: the number of frequencies considered for the periodic subspace
     :type n_freq: int
+
    """
    part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
    return kern(input_dim, [part])
@ -334,6 +418,7 @@ def prod(k1,k2,tensor=False):
    :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
    :type tensor: Boolean
    :rtype: kernel object
+
    """
    part = parts.prod.Prod(k1, k2, tensor)
    return kern(part.input_dim, [part])
@ -346,30 +431,32 @@ def symmetric(k):
    k_.parts = [symmetric.Symmetric(p) for p in k.parts]
    return k_

-def coregionalize(num_outputs,W_columns=1, W=None, kappa=None):
+def coregionalize(output_dim,rank=1, W=None, kappa=None):
    """
    Coregionlization matrix B, of the form:
+
    .. math::
       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}

-    An intrinsic/linear coregionalization kernel of the form
+    An intrinsic/linear coregionalization kernel of the form:
+
    .. math::
       k_2(x, y)=\mathbf{B} k(x, y)

    it is obtainded as the tensor product between a kernel k(x,y) and B.

-    :param num_outputs: the number of outputs to coregionalize
-    :type num_outputs: int
-    :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
-    :type W_colunns: int
+    :param output_dim: the number of outputs to corregionalize
+    :type output_dim: int
+    :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
+    :type rank: int
    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
-    :type W: numpy array of dimensionality (num_outpus, W_columns)
+    :type W: numpy array of dimensionality (num_outpus, rank)
    :param kappa: a vector which allows the outputs to behave independently
-    :type kappa: numpy array of dimensionality  (num_outputs,)
+    :type kappa: numpy array of dimensionality  (output_dim,)
    :rtype: kernel object

    """
-    p = parts.coregionalize.Coregionalize(num_outputs,W_columns,W,kappa)
+    p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
    return kern(1,[p])


@ -422,25 +509,26 @@ def independent_outputs(k):

 def hierarchical(k):
    """
-    TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel
+    TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
    """
    # for sl in k.input_slices:
    #     assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
    _parts = [parts.hierarchical.Hierarchical(k.parts)]
    return kern(k.input_dim+len(k.parts),_parts)

-def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None):
+def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
    """
    Builds a kernel of a linear coregionalization model

    :input_dim: Input dimensionality
-    :num_outputs: Number of outputs
+    :output_dim: Number of outputs
    :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
    :type kernel_list: list of GPy kernels
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W'
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W'
+    :type rank: integer
+
+    ..note the kernels dimensionality is overwritten to fit input_dim

-    ..Note the kernels dimensionality is overwritten to fit input_dim
    """

    for k in kernel_list:
@ -448,11 +536,31 @@ def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa
            k.input_dim = input_dim
            warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")

-    k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
+    k_coreg = coregionalize(output_dim,rank,W,kappa)
    kernel = kernel_list[0]**k_coreg.copy()

    for k in kernel_list[1:]:
-        k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
+        k_coreg = coregionalize(output_dim,rank,W,kappa)
        kernel += k**k_coreg.copy()

    return kernel
+
+def ODE_1(input_dim=1, varianceU=1.,  varianceY=1., lengthscaleU=None,  lengthscaleY=None):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+    part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
+    return kern(input_dim, [part])
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -1,6 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

+import sys
 import numpy as np
 import pylab as pb
 from ..core.parameterized import Parameterized
@ -79,13 +80,15 @@ class kern(Parameterized):


    def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
-        """If an ARD kernel is present, it bar-plots the ARD parameters,
+        """If an ARD kernel is present, it bar-plots the ARD parameters.
+
        :param fignum: figure number of the plot
        :param ax: matplotlib axis to plot on
        :param title: 
            title of the plot, 
            pass '' to not print a title
            pass None for a generic title
+
        """
        if ax is None:
            fig = pb.figure(fignum)
@ -176,8 +179,10 @@ class kern(Parameterized):
    def add(self, other, tensor=False):
        """
        Add another kernel to this one. Both kernels are defined on the same _space_
+
        :param other: the other kernel to be added
        :type other: GPy.kern
+
        """
        if tensor:
            D = self.input_dim + other.input_dim
@ -219,11 +224,13 @@ class kern(Parameterized):

    def prod(self, other, tensor=False):
        """
-        multiply two kernels (either on the same space, or on the tensor product of the input space).
+        Multiply two kernels (either on the same space, or on the tensor product of the input space).
+
        :param other: the other kernel to be added
        :type other: GPy.kern
        :param tensor: whether or not to use the tensor space (default is false).
        :type tensor: bool 
+
        """
        K1 = self.copy()
        K2 = other.copy()
@ -322,6 +329,7 @@ class kern(Parameterized):
        :type X: np.ndarray (num_samples x input_dim)
        :param X2: Observed data inputs (optional, defaults to X)
        :type X2: np.ndarray (num_inducing x input_dim)
+
        """
        assert X.shape[1] == self.input_dim
        target = np.zeros(self.num_params)
@ -341,6 +349,7 @@ class kern(Parameterized):
        :type X: np.ndarray (num_samples x input_dim)
        :param X2: Observed data inputs (optional, defaults to X)
        :type X2: np.ndarray (num_inducing x input_dim)"""
+
        target = np.zeros_like(X)
        if X2 is None: 
            [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
@ -414,6 +423,7 @@ class kern(Parameterized):
        :param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
        :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
        :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
+
        """
        target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
        [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
@ -568,7 +578,7 @@ class Kern_check_model(Model):

    def is_positive_definite(self):
        v = np.linalg.eig(self.kernel.K(self.X))[0]
-        if any(v<0):
+        if any(v<-10*sys.float_info.epsilon):
            return False
        else:
            return True
@ -657,6 +667,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
    :type X: ndarray
    :param X2: X2 input values to test the covariance function.
    :type X2: ndarray
+
    """
    pass_checks = True
    if X==None:
@ -683,7 +694,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
        Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
        pass_checks = False
        return False
-    
+
    if verbose:
        print("Checking gradients of K(X, X2) wrt theta.")
    result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
@ -694,7 +705,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
        Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
        pass_checks = False
        return False
-    
+
    if verbose:
        print("Checking gradients of Kdiag(X) wrt theta.")
    result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
@ -705,10 +716,15 @@ def kern_test(kern, X=None, X2=None, verbose=False):
        Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
        pass_checks = False
        return False
-        
+
    if verbose:
        print("Checking gradients of K(X, X) wrt X.")
-    result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
+    try:
+        result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
+    except NotImplementedError:
+        result=True
+        if verbose:
+            print("dK_dX not implemented for " + kern.name)
    if result and verbose:
        print("Check passed.")
    if not result:
@ -719,7 +735,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):

    if verbose:
        print("Checking gradients of K(X, X2) wrt X.")
-    result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
+    try:
+        result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
+    except NotImplementedError:
+        result=True
+        if verbose:
+            print("dK_dX not implemented for " + kern.name)
    if result and verbose:
        print("Check passed.")
    if not result:
@ -730,7 +751,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):

    if verbose:
        print("Checking gradients of Kdiag(X) wrt X.")
-    result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
+    try:
+        result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
+    except NotImplementedError:
+        result=True
+        if verbose:
+            print("dK_dX not implemented for " + kern.name)
    if result and verbose:
        print("Check passed.")
    if not result:
@ -738,5 +764,5 @@ def kern_test(kern, X=None, X2=None, verbose=False):
        Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
        pass_checks = False
        return False
-    
+
    return pass_checks
--- a/GPy/kern/parts/ODE_1.py
+++ b/GPy/kern/parts/ODE_1.py
@ -0,0 +1,161 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+
+class ODE_1(Kernpart):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP  (sqrt(3)/lengthscaleU)
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+    def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
+        assert input_dim==1, "Only defined for input_dim = 1"
+        self.input_dim = input_dim
+        self.num_params = 4
+        self.name = 'ODE_1'
+        if lengthscaleU is not None:
+            lengthscaleU = np.asarray(lengthscaleU)
+            assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
+        else:
+            lengthscaleU = np.ones(1)
+        if lengthscaleY is not None:
+            lengthscaleY = np.asarray(lengthscaleY)
+            assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
+        else:
+            lengthscaleY = np.ones(1)
+            #lengthscaleY = 0.5
+        self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
+
+    def _get_params(self):
+        """return the value of the parameters."""
+        return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
+
+    def _set_params(self, x):
+        """set the value of the parameters."""
+        assert x.size == self.num_params
+        self.varianceU = x[0]
+        self.varianceY = x[1]
+        self.lengthscaleU = x[2]
+        self.lengthscaleY = x[3]
+
+    def _get_param_names(self):
+        """return parameter names."""
+        return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
+
+
+    def K(self, X, X2, target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+       # i1 = X[:,1]
+       # i2 = X2[:,1]
+       # X = X[:,0].reshape(-1,1)
+       # X2 = X2[:,0].reshape(-1,1)
+        dist = np.abs(X - X2.T)
+        
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+        
+        k1 = (2*lu+ly)/(lu+ly)**2
+        k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 
+        k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def dK_dtheta(self, dL_dK, X, X2, target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        if X2 is None: X2 = X
+        dist = np.abs(X - X2.T)
+
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
+        #c=np.sqrt(3)
+        #t1=c/lu
+        #t2=1/ly
+        #dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3   )
+        
+        dk2theta1 = 1*( 
+            np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) 
+            +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) 
+            +np.exp(-dist*ly)*2*(ly-lu)**(-2)
+            +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
+            )
+      
+        dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
+
+        dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
+
+
+
+
+        dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * (  (-dist)*(2*lu+ly)  +  1  +  (-2)*(2*lu+ly)/(lu+ly)  )
+
+        dk2theta2 = 1*(
+            np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+            +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
+            )
+
+        dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
+
+        dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
+
+
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+        dkdvar = k1+k2+k3
+
+        target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
+        target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
+        target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
+        target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
+
+
+    # def dKdiag_dtheta(self, dL_dKdiag, X, target):
+    #     """derivative of the diagonal of the covariance matrix with respect to the parameters."""
+    #     # NB: derivative of diagonal elements wrt lengthscale is 0
+    #     target[0] += np.sum(dL_dKdiag)
+
+    # def dK_dX(self, dL_dK, X, X2, target):
+    #     """derivative of the covariance matrix with respect to X."""
+    #     if X2 is None: X2 = X
+    #     dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
+    #     ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
+    #     dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
+    #     target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
+
+    # def dKdiag_dX(self, dL_dKdiag, X, target):
+    #     pass
--- a/GPy/kern/parts/init.py
+++ b/GPy/kern/parts/init.py
@ -2,16 +2,18 @@ import bias
 import Brownian
 import coregionalize
 import exponential
+import eq_ode1
 import finite_dimensional
 import fixed
 import gibbs
-#import hetero #hetero.py is not commited: omitting for now. JH. 
+import hetero
 import hierarchical
 import independent_outputs
 import linear
 import Matern32
 import Matern52
 import mlp
+import ODE_1
 import periodic_exponential
 import periodic_Matern32
 import periodic_Matern52
--- a/GPy/kern/parts/coregionalize.py
+++ b/GPy/kern/parts/coregionalize.py
@ -11,44 +11,47 @@ class Coregionalize(Kernpart):
    """
    Covariance function for intrinsic/linear coregionalization models

-    This covariance has the form
+    This covariance has the form:
    .. math::
-       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
+       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)

-    An intrinsic/linear coregionalization covariance function of the form
+    An intrinsic/linear coregionalization covariance function of the form:
    .. math::
+
       k_2(x, y)=\mathbf{B} k(x, y)

    it is obtained as the tensor product between a covariance function
    k(x,y) and B.

-    :param num_outputs: number of outputs to coregionalize
-    :type num_outputs: int
-    :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
-    :type W_colunns: int
+    :param output_dim: number of outputs to coregionalize
+    :type output_dim: int
+    :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
+    :type rank: int
    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
    :type W: numpy array of dimensionality (num_outpus, W_columns)
    :param kappa: a vector which allows the outputs to behave independently
-    :type kappa: numpy array of dimensionality  (num_outputs,)
+    :type kappa: numpy array of dimensionality  (output_dim,)

-    .. Note: see coregionalization examples in GPy.examples.regression for some usage.
+    .. note: see coregionalization examples in GPy.examples.regression for some usage.
    """
-    def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
+    def __init__(self, output_dim, rank=1, W=None, kappa=None):
        self.input_dim = 1
        self.name = 'coregion'
-        self.num_outputs = num_outputs
-        self.W_columns = W_columns
+        self.output_dim = output_dim
+        self.rank = rank
+        if self.rank>output_dim-1:
+            print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
        if W is None:
-            self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns)
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
        else:
-            assert W.shape==(self.num_outputs,self.W_columns)
+            assert W.shape==(self.output_dim,self.rank)
            self.W = W
        if kappa is None:
-            kappa = 0.5*np.ones(self.num_outputs)
+            kappa = 0.5*np.ones(self.output_dim)
        else:
-            assert kappa.shape==(self.num_outputs,)
+            assert kappa.shape==(self.output_dim,)
        self.kappa = kappa
-        self.num_params = self.num_outputs*(self.W_columns + 1)
+        self.num_params = self.output_dim*(self.rank + 1)
        self._set_params(np.hstack([self.W.flatten(),self.kappa]))

    def _get_params(self):
@ -56,12 +59,12 @@ class Coregionalize(Kernpart):

    def _set_params(self,x):
        assert x.size == self.num_params
-        self.kappa = x[-self.num_outputs:]
-        self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns)
+        self.kappa = x[-self.output_dim:]
+        self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
        self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)

    def _get_param_names(self):
-        return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)]
+        return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]

    def K(self,index,index2,target):
        index = np.asarray(index,dtype=np.int)
@ -79,26 +82,26 @@ class Coregionalize(Kernpart):
        if index2 is None:
            code="""
            for(int i=0;i<N; i++){
-              target[i+i*N] += B[index[i]+num_outputs*index[i]];
+              target[i+i*N] += B[index[i]+output_dim*index[i]];
              for(int j=0; j<i; j++){
-                  target[j+i*N] += B[index[i]+num_outputs*index[j]];
+                  target[j+i*N] += B[index[i]+output_dim*index[j]];
                  target[i+j*N] += target[j+i*N];
                }
              }
            """
-            N,B,num_outputs = index.size, self.B, self.num_outputs
-            weave.inline(code,['target','index','N','B','num_outputs'])
+            N,B,output_dim = index.size, self.B, self.output_dim
+            weave.inline(code,['target','index','N','B','output_dim'])
        else:
            index2 = np.asarray(index2,dtype=np.int)
            code="""
            for(int i=0;i<num_inducing; i++){
              for(int j=0; j<N; j++){
-                  target[i+j*num_inducing] += B[num_outputs*index[j]+index2[i]];
+                  target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
                }
              }
            """
-            N,num_inducing,B,num_outputs = index.size,index2.size, self.B, self.num_outputs
-            weave.inline(code,['target','index','index2','N','num_inducing','B','num_outputs'])
+            N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
+            weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])


    def Kdiag(self,index,target):
@ -115,12 +118,12 @@ class Coregionalize(Kernpart):
        code="""
        for(int i=0; i<num_inducing; i++){
          for(int j=0; j<N; j++){
-            dL_dK_small[index[j] + num_outputs*index2[i]] += dL_dK[i+j*num_inducing];
+            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
          }
        }
        """
-        N, num_inducing, num_outputs = index.size, index2.size, self.num_outputs
-        weave.inline(code, ['N','num_inducing','num_outputs','dL_dK','dL_dK_small','index','index2'])
+        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
+        weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])

        dkappa = np.diag(dL_dK_small)
        dL_dK_small += dL_dK_small.T
@ -137,8 +140,8 @@ class Coregionalize(Kernpart):
        ii,jj = ii.T, jj.T

        dL_dK_small = np.zeros_like(self.B)
-        for i in range(self.num_outputs):
-            for j in range(self.num_outputs):
+        for i in range(self.output_dim):
+            for j in range(self.output_dim):
                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
                dL_dK_small[i,j] = tmp

@ -150,8 +153,8 @@ class Coregionalize(Kernpart):

    def dKdiag_dtheta(self,dL_dKdiag,index,target):
        index = np.asarray(index,dtype=np.int).flatten()
-        dL_dKdiag_small = np.zeros(self.num_outputs)
-        for i in range(self.num_outputs):
+        dL_dKdiag_small = np.zeros(self.output_dim)
+        for i in range(self.output_dim):
            dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
        dW = 2.*self.W*dL_dKdiag_small[:,None]
        dkappa = dL_dKdiag_small
--- a/GPy/kern/parts/eq_ode1.py
+++ b/GPy/kern/parts/eq_ode1.py
@ -0,0 +1,556 @@
+# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+from GPy.util.ln_diff_erfs import ln_diff_erfs
+import pdb
+from scipy import weave
+
+class Eq_ode1(Kernpart):
+    """
+    Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
+
+    This outputs of this kernel have the form
+    .. math::
+       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+
+    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
+    
+    :param output_dim: number of outputs driven by latent function.
+    :type output_dim: int
+    :param W: sensitivities of each output to the latent driving function. 
+    :type W: ndarray (output_dim x rank).
+    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
+    :type rank: int
+    :param decay: decay rates for the first order system. 
+    :type decay: array of length output_dim.
+    :param delay: delay between latent force and output response.
+    :type delay: array of length output_dim.
+    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
+    :type kappa: array of length output_dim.
+    
+    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
+    """
+    def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0,  decay=None, delay=None):
+        self.rank = rank
+        self.input_dim = 1
+        self.name = 'eq_ode1'
+        self.output_dim = output_dim
+        self.lengthscale = lengthscale
+        self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
+        if kappa is not None:
+            self.num_params+=self.output_dim
+        if delay is not None:
+            assert delay.shape==(self.output_dim-1,)
+            self.num_params+=self.output_dim-1
+        self.rank = rank
+        if W is None:
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
+        else:
+            assert W.shape==(self.output_dim,self.rank)
+            self.W = W
+        if decay is None:
+            self.decay = np.ones(self.output_dim-1)
+        if kappa is not None:
+            assert kappa.shape==(self.output_dim,)
+        self.kappa = kappa
+
+        self.delay = delay
+        self.is_normalized = True
+        self.is_stationary = False
+        self.gaussian_initial = False
+        self._set_params(self._get_params())
+        
+    def _get_params(self):
+        param_list = [self.W.flatten()]
+        if self.kappa is not None:
+            param_list.append(self.kappa)
+        param_list.append(self.decay)
+        if self.delay is not None:
+            param_list.append(self.delay)
+        param_list.append(self.lengthscale)
+        return np.hstack(param_list)
+
+    def _set_params(self,x):
+        assert x.size == self.num_params
+        end = self.output_dim*self.rank
+        self.W = x[:end].reshape(self.output_dim,self.rank)
+        start = end
+        self.B = np.dot(self.W,self.W.T)
+        if self.kappa is not None:
+            end+=self.output_dim
+            self.kappa = x[start:end]
+            self.B += np.diag(self.kappa)
+            start=end
+        end+=self.output_dim-1
+        self.decay = x[start:end]
+        start=end
+        if self.delay is not None:
+            end+=self.output_dim-1
+            self.delay = x[start:end]
+            start=end
+        end+=1
+        self.lengthscale = x[start]
+        self.sigma = np.sqrt(2)*self.lengthscale
+
+
+    def _get_param_names(self):
+        param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
+        if self.kappa is not None:
+            param_names += ['kappa_%i'%i for i in range(self.output_dim)]
+        param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
+        if self.delay is not None:
+            param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
+        param_names+= ['lengthscale'] 
+        return param_names
+
+    def K(self,X,X2,target):
+        
+        if X.shape[1] > 2:
+            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+
+        self._K_computations(X, X2)
+        target += self._scale*self._K_dvar
+
+        if self.gaussian_initial:
+            # Add covariance associated with initial condition.
+            t1_mat = self._t[self._rorder, None]
+            t2_mat = self._t2[None, self._rorder2]
+            target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
+
+    def Kdiag(self,index,target):
+        #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
+        pass
+    
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        
+        # First extract times and indices.
+        self._extract_t_indices(X, X2, dL_dK=dL_dK)
+        self._dK_ode_dtheta(target)
+        
+
+    def _dK_ode_dtheta(self, target):
+        """Do all the computations for the ode parts of the covariance function."""
+        t_ode = self._t[self._index>0]
+        dL_dK_ode = self._dL_dK[self._index>0, :]
+        index_ode = self._index[self._index>0]-1
+        if self._t2 is None:
+            if t_ode.size==0:
+                return        
+            t2_ode = t_ode
+            dL_dK_ode = dL_dK_ode[:, self._index>0]
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            dL_dK_ode = dL_dK_ode[:, self._index2>0]
+            if t_ode.size==0 or t2_ode.size==0:
+                return
+            index2_ode = self._index2[self._index2>0]-1
+
+        h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
+        #self._dK_ddelay = self._dh_ddelay
+        self._dK_dsigma = self._dh_dsigma
+
+        if self._t2 is None:
+            h2 = h1
+        else:
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
+
+        #self._dK_ddelay += self._dh_ddelay.T
+        self._dK_dsigma += self._dh_dsigma.T
+        # C1 = self.sensitivity
+        # C2 = self.sensitivity
+
+        # K = 0.5 * (h1 + h2.T)
+        # var2 = C1*C2
+        # if self.is_normalized:
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
+        #     dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
+        #     dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
+        #     dk_dC1 = C2 * sum(sum(dL_dK.*K))
+        #     dk_dC2 = C1 * sum(sum(dL_dK.*K))
+        # else:
+        #     K = np.sqrt(np.pi) * K
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
+        #     dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
+
+
+        # dk_dSim1Variance = dk_dC1
+        # Last element is the length scale.
+        (dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
+
+        target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
+
+
+        # # only pass the gradient with respect to the inverse width to one
+        # # of the gradient vectors ... otherwise it is counted twice.
+        # g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
+        # g2 = real([dk_dD2 0 dk_dSim2Variance])
+        # return g1, g2"""
+
+    def dKdiag_dtheta(self,dL_dKdiag,index,target):
+        pass
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        pass
+
+    def _extract_t_indices(self, X, X2=None, dL_dK=None):
+        """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
+
+        # TODO: some fast checking here to see if this needs recomputing?
+        self._t = X[:, 0]
+        if not X.shape[1] == 2:
+            raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
+        self._index = np.asarray(X[:, 1],dtype=np.int)
+        # Sort indices so that outputs are in blocks for computational
+        # convenience.
+        self._order = self._index.argsort()
+        self._index = self._index[self._order]
+        self._t = self._t[self._order]
+        self._rorder = self._order.argsort() # rorder is for reversing the order
+        
+        if X2 is None:
+            self._t2 = None
+            self._index2 = None
+            self._order2 = self._order
+            self._rorder2 = self._rorder
+        else:
+            if not X2.shape[1] == 2:
+                raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
+            self._t2 = X2[:, 0]
+            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
+            self._order2 = self._index2.argsort()
+            self._index2 = self._index2[self._order2]
+            self._t2 = self._t2[self._order2]
+            self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
+
+        if dL_dK is not None:
+            self._dL_dK = dL_dK[self._order, :]
+            self._dL_dK = self._dL_dK[:, self._order2]
+            
+    def _K_computations(self, X, X2):
+        """Perform main body of computations for the ode1 covariance function."""
+        # First extract times and indices.
+        self._extract_t_indices(X, X2)
+
+        self._K_compute_eq()
+        self._K_compute_ode_eq()
+        if X2 is None:
+            self._K_eq_ode = self._K_ode_eq.T
+        else:
+            self._K_compute_ode_eq(transpose=True)
+        self._K_compute_ode()
+
+        if X2 is None:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
+        else:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
+
+        # Reorder values of blocks for placing back into _K_dvar.
+        self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
+                                                   np.hstack((self._K_ode_eq, self._K_ode))))
+        self._K_dvar = self._K_dvar[self._rorder, :]
+        self._K_dvar = self._K_dvar[:, self._rorder2]
+        
+        
+        if X2 is None:
+            # Matrix giving scales of each output
+            self._scale = np.zeros((self._t.size, self._t.size))
+            code="""
+            for(int i=0;i<N; i++){
+              scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
+              for(int j=0; j<i; j++){
+                  scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
+                  scale_mat[i+j*N] = scale_mat[j+i*N];
+                }
+              }
+            """
+            scale_mat, B, index = self._scale, self.B, self._index
+            N, output_dim = self._t.size, self.output_dim
+            weave.inline(code,['index',
+                               'scale_mat', 'B',
+                               'N', 'output_dim'])
+        else:
+            self._scale = np.zeros((self._t.size, self._t2.size))
+            code = """
+            for(int i=0; i<N; i++){
+              for(int j=0; j<N2; j++){
+                scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
+              }
+            }
+            """
+            scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
+            N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
+            weave.inline(code, ['index', 'index2',
+                                'scale_mat', 'B',
+                                'N', 'N2', 'output_dim'])
+
+
+
+    def _K_compute_eq(self):
+        """Compute covariance for latent covariance."""
+        t_eq = self._t[self._index==0]
+        if self._t2 is None:
+            if t_eq.size==0:
+                self._K_eq = np.zeros((0, 0))
+                return
+            self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
+        else:
+            t2_eq = self._t2[self._index2==0]
+            if t_eq.size==0 or t2_eq.size==0:
+                self._K_eq = np.zeros((t_eq.size, t2_eq.size))
+                return
+            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
+        
+        self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
+        if self.is_normalized:
+            self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
+
+    def _K_compute_ode_eq(self, transpose=False):
+        """Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
+
+        :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
+        :type transpose: bool"""
+
+        if self._t2 is not None:
+            if transpose:
+                t_eq = self._t[self._index==0]
+                t_ode = self._t2[self._index2>0]
+                index_ode = self._index2[self._index2>0]-1
+            else:
+                t_eq = self._t2[self._index2==0]
+                t_ode = self._t[self._index>0]
+                index_ode = self._index[self._index>0]-1
+        else:
+            t_eq = self._t[self._index==0]
+            t_ode = self._t[self._index>0]
+            index_ode = self._index[self._index>0]-1
+
+        if t_ode.size==0 or t_eq.size==0:
+            if transpose:
+                self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
+            else:
+                self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
+            return
+
+        t_ode_mat = t_ode[:, None]
+        t_eq_mat = t_eq[None, :]
+        if self.delay is not None:
+            t_ode_mat -= self.delay[index_ode, None]
+        diff_t = (t_ode_mat - t_eq_mat)
+
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        decay_vals = self.decay[index_ode][:, None]
+        half_sigma_d_i = 0.5*self.sigma*decay_vals
+
+        if self.is_stationary:
+            ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        else:
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
+
+        sK *= 0.5
+
+        if not self.is_normalized:
+            sK *= np.sqrt(np.pi)*self.sigma
+
+
+        if transpose:
+            self._K_eq_ode = sK.T
+        else:
+            self._K_ode_eq = sK
+        
+    def _K_compute_ode(self):
+        # Compute covariances between outputs of the ODE models.
+
+        t_ode = self._t[self._index>0]
+        index_ode = self._index[self._index>0]-1
+        if self._t2 is None:
+            if t_ode.size==0:
+                self._K_ode = np.zeros((0, 0))
+                return        
+            t2_ode = t_ode
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            if t_ode.size==0 or t2_ode.size==0:
+                self._K_ode = np.zeros((t_ode.size, t2_ode.size))
+                return
+            index2_ode = self._index2[self._index2>0]-1
+        
+        # When index is identical
+        h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
+
+        if self._t2 is None:
+            self._K_ode = 0.5 * (h + h.T)
+        else:
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)                
+            self._K_ode = 0.5 * (h + h2.T)
+
+        if not self.is_normalized:
+            self._K_ode *= np.sqrt(np.pi)*self.sigma
+    def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
+        """Helper function for computing H for the diagonal only.
+        :param t: time input.
+        :type t: array
+        :param index: first output indices
+        :type index: array of int.
+        :param index: second output indices
+        :type index: array of int.
+        :param update_derivatives: whether or not to update the derivative portions (default False).
+        :type update_derivatives: bool
+        :param stationary: whether to compute the stationary version of the covariance (default False).
+        :type stationary: bool"""
+
+        """if delta_i~=delta_j:
+            [h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
+        else:
+            Decay = self.decay[index]
+            if self.delay is not None:
+                t = t - self.delay[index]
+            
+            t_squared = t*t
+            half_sigma_decay = 0.5*self.sigma*Decay
+            [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
+                                              half_sigma_decay)
+    
+            [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
+                                              half_sigma_decay - t/self.sigma)
+            
+            h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+                             + ln_part_1
+                             - log(Decay + D_j)) 
+                 - sign2*np.exp(half_sigma_decay*half_sigma_decay
+                                - (Decay + D_j)*t
+                                + ln_part_2 
+                                - log(Decay + D_j)))
+    
+            sigma2 = self.sigma*self.sigma
+
+        if update_derivatives:
+        
+            dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h 
+                       + t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
+                )
+                       + self.sigma/np.sqrt(np.pi)*
+                       (-1 + np.exp(-t_squared/sigma2-Decay*t)
+                        + np.exp(-t_squared/sigma2-D_j*t)
+                        - np.exp(-(Decay + D_j)*t)))
+        
+            dh_dD_i = (dh_dD_i/(Decay+D_j)).real
+        
+        
+        
+            dh_dD_j = (t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
+                )
+                       -h)
+            dh_dD_j = (dh_dD_j/(Decay + D_j)).real
+
+            dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+                        + 2/(np.sqrt(np.pi)*(Decay+D_j))\
+                        *((-Decay/2) \
+                          + (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
+                          - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
+                          - Decay/2*np.exp(-(Decay+D_j)*t))"""
+        pass
+    
+    def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
+        """Helper function for computing part of the ode1 covariance function.
+
+        :param t: first time input.
+        :type t: array
+        :param index: Indices of first output.
+        :type index: array of int
+        :param t2: second time input.
+        :type t2: array
+        :param index2: Indices of second output.
+        :type index2: array of int
+        :param update_derivatives: whether to update derivatives (default is False)
+        :return h : result of this subcomponent of the kernel for the given values.
+        :rtype: ndarray
+"""
+
+        if stationary:
+            raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
+        # Vector of decays and delays associated with each output.
+        Decay = self.decay[index]
+        Decay2 = self.decay[index2]
+        t_mat = t[:, None]
+        t2_mat = t2[None, :]
+        if self.delay is not None:
+            Delay = self.delay[index]
+            Delay2 = self.delay[index2]
+            t_mat-=Delay[:, None]
+            t2_mat-=Delay2[None, :]
+
+        diff_t = (t_mat - t2_mat)
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
+
+        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, 
+                                        half_sigma_decay_i - inv_sigma_diff_t,
+                                        return_sign=True)
+        ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
+                                        half_sigma_decay_i - t_mat/self.sigma,
+                                        return_sign=True)
+
+        h = sign1*np.exp(half_sigma_decay_i
+                         *half_sigma_decay_i
+                         -Decay[:, None]*diff_t+ln_part_1
+                         -np.log(Decay[:, None] + Decay2[None, :]))
+        h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
+                          -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
+                          -np.log(Decay[:, None] + Decay2[None, :]))
+
+        if update_derivatives:
+            sigma2 = self.sigma*self.sigma
+            # Update ith decay gradient
+
+            dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+                         + (-diff_t*sign1*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
+                )
+                            +t_mat*sign2*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
+                - Decay2*t2_mat+ln_part_2))
+                         +self.sigma/np.sqrt(np.pi)*(
+                -np.exp(
+                -diff_t*diff_t/sigma2
+                )+np.exp(
+                -t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
+                )+np.exp(
+                -t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
+                )-np.exp(
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                )
+                ))
+            self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
+            
+            # Update jth decay gradient
+            dh_ddecay2 = (t2_mat*sign2
+                         *np.exp(
+                half_sigma_decay_i*half_sigma_decay_i
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                +ln_part_2
+                )
+                         -h)
+            self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
+            
+            # Update sigma gradient
+            self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+                               + 2/(np.sqrt(np.pi)
+                                    *(Decay[:, None]+Decay2[None, :]))
+                               *((-diff_t/sigma2-Decay[:, None]/2)
+                                 *np.exp(-diff_t*diff_t/sigma2)
+                                 + (-t2_mat/sigma2+Decay[:, None]/2)
+                                 *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) 
+                                 - (-t_mat/sigma2-Decay[:, None]/2) 
+                                 *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) 
+                                 - Decay[:, None]/2
+                                 *np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
+                
+        return h
--- a/GPy/kern/parts/hetero.py
+++ b/GPy/kern/parts/hetero.py
@ -10,9 +10,12 @@ import GPy

 class Hetero(Kernpart):
    """
-    TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
+    TODO: Need to constrain the function outputs
+    positive (still thinking of best way of doing this!!! Yes, intend to use
+    transformations, but what's the *best* way). Currently just squaring output.

-    Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
+    Heteroschedastic noise which depends on input location. See, for example,
+    this paper by Goldberg et al.

    .. math::

@ -20,15 +23,15 @@ class Hetero(Kernpart):

       where :math:`\sigma^2(x)` is a function giving the variance  as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.

-        The parameters are the parameters of \sigma^2(x) which is a
-        function that can be specified by the user, by default an
-        multi-layer peceptron is used.
+    The parameters are the parameters of \sigma^2(x) which is a
+    function that can be specified by the user, by default an
+    multi-layer peceptron is used.

-        :param input_dim: the number of input dimensions
-        :type input_dim: int 
-        :param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
-        :type mapping: GPy.core.Mapping
-        :rtype: Kernpart object
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
+    :type mapping: GPy.core.Mapping
+    :rtype: Kernpart object

    See this paper:

@ -36,7 +39,7 @@ class Hetero(Kernpart):
    C. M. (1998) Regression with Input-dependent Noise: a Gaussian
    Process Treatment In Advances in Neural Information Processing
    Systems, Volume 10, pp.  493-499. MIT Press
-    
+
    for a Gaussian process treatment of this problem.

    """
@ -47,7 +50,7 @@ class Hetero(Kernpart):
            mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
        if not transform:
            transform = GPy.core.transformations.logexp()
-            
+
        self.transform = transform
        self.mapping = mapping
        self.name='hetero'
@ -66,7 +69,7 @@ class Hetero(Kernpart):

    def K(self, X, X2, target):
        """Return covariance between X and X2."""
-        if X2==None or X2 is X:
+        if (X2 is None) or (X2 is X):
            target[np.diag_indices_from(target)] += self._Kdiag(X)

    def Kdiag(self, X, target):
@ -76,26 +79,26 @@ class Hetero(Kernpart):
    def _Kdiag(self, X):
        """Helper function for computing the diagonal elements of the covariance."""
        return self.mapping.f(X).flatten()**2
-    
+
    def dK_dtheta(self, dL_dK, X, X2, target):
        """Derivative of the covariance with respect to the parameters."""
-        if X2==None or X2 is X:
+        if (X2 is None) or (X2 is X):
            dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
            self.dKdiag_dtheta(dL_dKdiag, X, target)

    def dKdiag_dtheta(self, dL_dKdiag, X, target):
        """Gradient of diagonal of covariance with respect to parameters."""
-        target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
+        target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)

    def dK_dX(self, dL_dK, X, X2, target):
        """Derivative of the covariance matrix with respect to X."""
        if X2==None or X2 is X:
            dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
            self.dKdiag_dX(dL_dKdiag, X, target)
-    
+
    def dKdiag_dX(self, dL_dKdiag, X, target):
        """Gradient of diagonal of covariance with respect to X."""
        target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)


-    
+
--- a/GPy/kern/parts/kernpart.py
+++ b/GPy/kern/parts/kernpart.py
@ -58,6 +58,8 @@ class Kernpart(object):
        raise NotImplementedError
    def dK_dX(self, dL_dK, X, X2, target):
        raise NotImplementedError
+    def dKdiag_dX(self, dL_dK, X, target):
+        raise NotImplementedError



@ -97,6 +99,9 @@ class Kernpart_stationary(Kernpart):
        # wrt lengthscale is 0.
        target[0] += np.sum(dL_dKdiag)

+    def dKdiag_dX(self, dL_dK, X, target):
+        pass # true for all stationary kernels
+

 class Kernpart_inner(Kernpart):
    def __init__(self,input_dim):
--- a/GPy/kern/parts/mlp.py
+++ b/GPy/kern/parts/mlp.py
@ -7,11 +7,13 @@ four_over_tau = 2./np.pi

 class MLP(Kernpart):
    """
-    multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
+
+    Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)

    .. math::

-       k(x,y) = \sigma^2 \frac{2}{\pi}  \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right)
+          k(x,y) = \\sigma^{2}\\frac{2}{\\pi }  \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
+          

    :param input_dim: the number of input dimensions
    :type input_dim: int 
@ -24,6 +26,7 @@ class MLP(Kernpart):
    :type ARD: Boolean
    :rtype: Kernpart object

+
    """

    def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
--- a/GPy/kern/parts/odekern1.c
+++ b/GPy/kern/parts/odekern1.c
@ -0,0 +1,38 @@
+#include <math.h> 
+
+ double k_uu(t1,t2,theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
+
+ return kern;
+ }
+
+
+
+ double k_yy(t1, t2, theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*sig2 * (  exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
+  	exp(-dist)  ) / ((theta2-theta1)*(theta2-theta1))
+
+  return kern;
+ } 
+
+
+
+
+
+
+	
+
+
+
--- a/GPy/kern/parts/poly.py
+++ b/GPy/kern/parts/poly.py
@ -7,22 +7,22 @@ four_over_tau = 2./np.pi

 class POLY(Kernpart):
    """
-    polynomial kernel parameter initialisation.  Included for completeness, but generally not recommended, is the polynomial kernel,
-    .. math::
-    
-    k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d

-    The kernel parameters are \sigma^2 (variance), \sigma^2_w
-    (weight_variance), \sigma^2_b (bias_variance) and d
+    Polynomial kernel parameter initialisation.  Included for completeness, but generally not recommended, is the polynomial kernel:
+
+    .. math::
+        k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
+
+    The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
+    (weight_variance), :math:`\sigma^2_b` (bias_variance) and d
    (degree). Only gradients of the first three are provided for
    kernel optimisation, it is assumed that polynomial degree would
    be set by hand.

    The kernel is not recommended as it is badly behaved when the
-    \sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
+    :math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
    there is an automatic relevance determination version of this
-    kernel provided.
-
+    kernel provided (NOTE YET IMPLEMENTED!).
    :param input_dim: the number of input dimensions
    :type input_dim: int 
    :param variance: the variance :math:`\sigma^2`
@ -32,7 +32,7 @@ class POLY(Kernpart):
    :param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
    :param degree: the degree of the polynomial.
    :type degree: int
-    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
    :type ARD: Boolean
    :rtype: Kernpart object

--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@ -1,6 +1,7 @@
 #include <math.h>
 double DiracDelta(double x){
-    if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
+  // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
+    if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
        return 1.0;
    else
        return 0.0;
@ -8,3 +9,17 @@ double DiracDelta(double x){
 double DiracDelta(double x,int foo){
    return 0.0;
 };
+
+double sinc(double x){
+  if (x==0)
+    return 1.0;
+  else 
+    return sin(x)/x;
+}
+
+double sinc_grad(double x){
+  if (x==0)
+    return 0.0;
+  else 
+    return (x*cos(x) - sin(x))/(x*x);
+}
--- a/GPy/kern/parts/sympy_helpers.h
+++ b/GPy/kern/parts/sympy_helpers.h
@ -1,3 +1,6 @@
 #include <math.h>
 double DiracDelta(double x);
 double DiracDelta(double x, int foo);
+
+double sinc(double x);
+double sinc_grad(double x);
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@ -26,8 +26,11 @@ class spkern(Kernpart):
     - to handle multiple inputs, call them x1, z1, etc
     - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
    """
-    def __init__(self,input_dim,k,param=None):
-        self.name='sympykern'
+    def __init__(self,input_dim,k,name=None,param=None):
+        if name is None:
+            self.name='sympykern'
+        else:
+            self.name = name
        self._sp_k = k
        sp_vars = [e for e in k.atoms() if e.is_Symbol]
        self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
@ -56,9 +59,9 @@ class spkern(Kernpart):

        self.weave_kwargs = {\
            'support_code':self._function_code,\
-            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
+            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
            'headers':['"sympy_helpers.h"'],\
-            'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
+            'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
            #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
            'extra_compile_args':[],\
            'extra_link_args':['-lgomp'],\
@ -109,14 +112,15 @@ class spkern(Kernpart):
        f.write(self._function_header)
        f.close()

-        #get rid of derivatives of DiracDelta
+        # Substitute any known derivatives which sympy doesn't compute
        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)

-        #Here's some code to do the looping for K
-        arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
-                + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
-                + ["param[%i]"%i for i in range(self.num_params)])
+        # Here's the code to do the looping for K
+        arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
+                            + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
+                            + ["param[%i]"%i for i in range(self.num_params)])

+        
        self._K_code =\
        """
        int i;
@ -133,9 +137,14 @@ class spkern(Kernpart):
        %s
        """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed

+        # Similar code when only X is provided. 
+        self._K_code_X = self._K_code.replace('Z[', 'X[')
+
+        
+        # Code to compute diagonal of covariance.
        diag_arglist = re.sub('Z','X',arglist)
        diag_arglist = re.sub('j','i',diag_arglist)
-        #Here's some code to do the looping for Kdiag
+        # Code to do the looping for Kdiag
        self._Kdiag_code =\
        """
        int i;
@ -148,8 +157,9 @@ class spkern(Kernpart):
        %s
        """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed

-        #here's some code to compute gradients
+        # Code to compute gradients
        funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
+
        self._dK_dtheta_code =\
        """
        int i;
@ -164,9 +174,12 @@ class spkern(Kernpart):
            }
        }
        %s
-        """%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+        """%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed

-        #here's some code to compute gradients for Kdiag TODO: thius is yucky.
+        # Similar code when only X is provided, change argument lists.
+        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
+
+        # Code to compute gradients for Kdiag TODO: needs clean up
        diag_funclist = re.sub('Z','X',funclist,count=0)
        diag_funclist = re.sub('j','i',diag_funclist)
        diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
@ -181,8 +194,12 @@ class spkern(Kernpart):
        %s
        """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed

-        #Here's some code to do gradients wrt x
+        # Code for gradients wrt X
        gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
+        if False:
+            gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
+            if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
+
        self._dK_dX_code = \
        """
        int i;
@ -192,30 +209,34 @@ class spkern(Kernpart):
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N; i++){
-            for (j=0; j<num_inducing; j++){
-                %s
-                //if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
-                //if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
-
-            }
+          for (j=0; j<num_inducing; j++){
+            %s
+          }
        }
        %s
        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+  
+        # Create code for call when just X is passed as argument.
+        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')

-        #now for gradients of Kdiag wrt X
+        diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
+        diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
+        diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
+
+        # Code for gradients of Kdiag wrt X
        self._dKdiag_dX_code= \
        """
-        int i;
-        int j;
        int N = partial_array->dimensions[0];
-        int num_inducing = 0;
        int input_dim = X_array->dimensions[1];
-        for (i=0;i<N; i++){
-            j = i;
+        for (int i=0;i<N; i++){
            %s
        }
        %s
-        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
+        """%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
+        # string representation forces recompile when needed Get rid
+        # of Zs in argument for diagonal. TODO: Why wasn't
+        # diag_funclist called here? Need to check that.
+        #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')


        #TODO: insert multiple functions here via string manipulation
@ -223,7 +244,10 @@ class spkern(Kernpart):

    def K(self,X,Z,target):
        param = self._param
-        weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
+        else:
+            weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)

    def Kdiag(self,X,target):
        param = self._param
@ -231,21 +255,25 @@ class spkern(Kernpart):

    def dK_dtheta(self,partial,X,Z,target):
        param = self._param
-        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
+        else:
+            weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)

    def dKdiag_dtheta(self,partial,X,target):
        param = self._param
-        Z = X
-        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)

    def dK_dX(self,partial,X,Z,target):
        param = self._param
-        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        if Z is None:
+            weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
+        else:
+            weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)

    def dKdiag_dX(self,partial,X,target):
        param = self._param
-        Z = X
-        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
+        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)

    def _set_params(self,param):
        #print param.flags['C_CONTIGUOUS']