tidying in kern

2026-05-24 14:15:14 +02:00 · 2014-02-24 15:56:06 +00:00 · 2014-02-24 15:56:06 +00:00 · 4215f5fb28
commit 4215f5fb28
parent 70ada7fa46
14 changed files with 1 additions and 687 deletions
--- a/GPy/kern/_src/todo/ODE_1.py
+++ b/GPy/kern/_src/todo/ODE_1.py
@ -0,0 +1,161 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+
+class ODE_1(Kernpart):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP  (sqrt(3)/lengthscaleU)
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+    def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
+        assert input_dim==1, "Only defined for input_dim = 1"
+        self.input_dim = input_dim
+        self.num_params = 4
+        self.name = 'ODE_1'
+        if lengthscaleU is not None:
+            lengthscaleU = np.asarray(lengthscaleU)
+            assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
+        else:
+            lengthscaleU = np.ones(1)
+        if lengthscaleY is not None:
+            lengthscaleY = np.asarray(lengthscaleY)
+            assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
+        else:
+            lengthscaleY = np.ones(1)
+            #lengthscaleY = 0.5
+        self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
+
+    def _get_params(self):
+        """return the value of the parameters."""
+        return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
+
+    def _set_params(self, x):
+        """set the value of the parameters."""
+        assert x.size == self.num_params
+        self.varianceU = x[0]
+        self.varianceY = x[1]
+        self.lengthscaleU = x[2]
+        self.lengthscaleY = x[3]
+
+    def _get_param_names(self):
+        """return parameter names."""
+        return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
+
+
+    def K(self, X, X2, target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+       # i1 = X[:,1]
+       # i2 = X2[:,1]
+       # X = X[:,0].reshape(-1,1)
+       # X2 = X2[:,0].reshape(-1,1)
+        dist = np.abs(X - X2.T)
+        
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+        
+        k1 = (2*lu+ly)/(lu+ly)**2
+        k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 
+        k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 
+
+        np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
+
+    def _param_grad_helper(self, dL_dK, X, X2, target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        if X2 is None: X2 = X
+        dist = np.abs(X - X2.T)
+
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
+        #c=np.sqrt(3)
+        #t1=c/lu
+        #t2=1/ly
+        #dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3   )
+        
+        dk2theta1 = 1*( 
+            np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) 
+            +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) 
+            +np.exp(-dist*ly)*2*(ly-lu)**(-2)
+            +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
+            )
+      
+        dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
+
+        dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
+
+
+
+
+        dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * (  (-dist)*(2*lu+ly)  +  1  +  (-2)*(2*lu+ly)/(lu+ly)  )
+
+        dk2theta2 = 1*(
+            np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+            +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
+            )
+
+        dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
+
+        dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
+
+
+
+        k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+        dkdvar = k1+k2+k3
+
+        target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
+        target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
+        target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
+        target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
+
+
+    # def dKdiag_dtheta(self, dL_dKdiag, X, target):
+    #     """derivative of the diagonal of the covariance matrix with respect to the parameters."""
+    #     # NB: derivative of diagonal elements wrt lengthscale is 0
+    #     target[0] += np.sum(dL_dKdiag)
+
+    # def dK_dX(self, dL_dK, X, X2, target):
+    #     """derivative of the covariance matrix with respect to X."""
+    #     if X2 is None: X2 = X
+    #     dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
+    #     ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
+    #     dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
+    #     target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
+
+    # def dKdiag_dX(self, dL_dKdiag, X, target):
+    #     pass
--- a/GPy/kern/_src/todo/eq_ode1.py
+++ b/GPy/kern/_src/todo/eq_ode1.py
@ -0,0 +1,556 @@
+# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+from GPy.util.linalg import mdot, pdinv
+from GPy.util.ln_diff_erfs import ln_diff_erfs
+import pdb
+from scipy import weave
+
+class Eq_ode1(Kernpart):
+    """
+    Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
+
+    This outputs of this kernel have the form
+    .. math::
+       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+
+    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
+    
+    :param output_dim: number of outputs driven by latent function.
+    :type output_dim: int
+    :param W: sensitivities of each output to the latent driving function. 
+    :type W: ndarray (output_dim x rank).
+    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
+    :type rank: int
+    :param decay: decay rates for the first order system. 
+    :type decay: array of length output_dim.
+    :param delay: delay between latent force and output response.
+    :type delay: array of length output_dim.
+    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
+    :type kappa: array of length output_dim.
+    
+    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
+    """
+    def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0,  decay=None, delay=None):
+        self.rank = rank
+        self.input_dim = 1
+        self.name = 'eq_ode1'
+        self.output_dim = output_dim
+        self.lengthscale = lengthscale
+        self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
+        if kappa is not None:
+            self.num_params+=self.output_dim
+        if delay is not None:
+            assert delay.shape==(self.output_dim-1,)
+            self.num_params+=self.output_dim-1
+        self.rank = rank
+        if W is None:
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
+        else:
+            assert W.shape==(self.output_dim,self.rank)
+            self.W = W
+        if decay is None:
+            self.decay = np.ones(self.output_dim-1)
+        if kappa is not None:
+            assert kappa.shape==(self.output_dim,)
+        self.kappa = kappa
+
+        self.delay = delay
+        self.is_normalized = True
+        self.is_stationary = False
+        self.gaussian_initial = False
+        self._set_params(self._get_params())
+        
+    def _get_params(self):
+        param_list = [self.W.flatten()]
+        if self.kappa is not None:
+            param_list.append(self.kappa)
+        param_list.append(self.decay)
+        if self.delay is not None:
+            param_list.append(self.delay)
+        param_list.append(self.lengthscale)
+        return np.hstack(param_list)
+
+    def _set_params(self,x):
+        assert x.size == self.num_params
+        end = self.output_dim*self.rank
+        self.W = x[:end].reshape(self.output_dim,self.rank)
+        start = end
+        self.B = np.dot(self.W,self.W.T)
+        if self.kappa is not None:
+            end+=self.output_dim
+            self.kappa = x[start:end]
+            self.B += np.diag(self.kappa)
+            start=end
+        end+=self.output_dim-1
+        self.decay = x[start:end]
+        start=end
+        if self.delay is not None:
+            end+=self.output_dim-1
+            self.delay = x[start:end]
+            start=end
+        end+=1
+        self.lengthscale = x[start]
+        self.sigma = np.sqrt(2)*self.lengthscale
+
+
+    def _get_param_names(self):
+        param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
+        if self.kappa is not None:
+            param_names += ['kappa_%i'%i for i in range(self.output_dim)]
+        param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
+        if self.delay is not None:
+            param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
+        param_names+= ['lengthscale'] 
+        return param_names
+
+    def K(self,X,X2,target):
+        
+        if X.shape[1] > 2:
+            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
+
+        self._K_computations(X, X2)
+        target += self._scale*self._K_dvar
+
+        if self.gaussian_initial:
+            # Add covariance associated with initial condition.
+            t1_mat = self._t[self._rorder, None]
+            t2_mat = self._t2[None, self._rorder2]
+            target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
+
+    def Kdiag(self,index,target):
+        #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
+        pass
+    
+    def _param_grad_helper(self,dL_dK,X,X2,target):
+        
+        # First extract times and indices.
+        self._extract_t_indices(X, X2, dL_dK=dL_dK)
+        self._dK_ode_dtheta(target)
+        
+
+    def _dK_ode_dtheta(self, target):
+        """Do all the computations for the ode parts of the covariance function."""
+        t_ode = self._t[self._index>0]
+        dL_dK_ode = self._dL_dK[self._index>0, :]
+        index_ode = self._index[self._index>0]-1
+        if self._t2 is None:
+            if t_ode.size==0:
+                return        
+            t2_ode = t_ode
+            dL_dK_ode = dL_dK_ode[:, self._index>0]
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            dL_dK_ode = dL_dK_ode[:, self._index2>0]
+            if t_ode.size==0 or t2_ode.size==0:
+                return
+            index2_ode = self._index2[self._index2>0]-1
+
+        h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
+        #self._dK_ddelay = self._dh_ddelay
+        self._dK_dsigma = self._dh_dsigma
+
+        if self._t2 is None:
+            h2 = h1
+        else:
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
+
+        #self._dK_ddelay += self._dh_ddelay.T
+        self._dK_dsigma += self._dh_dsigma.T
+        # C1 = self.sensitivity
+        # C2 = self.sensitivity
+
+        # K = 0.5 * (h1 + h2.T)
+        # var2 = C1*C2
+        # if self.is_normalized:
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
+        #     dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
+        #     dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
+        #     dk_dC1 = C2 * sum(sum(dL_dK.*K))
+        #     dk_dC2 = C1 * sum(sum(dL_dK.*K))
+        # else:
+        #     K = np.sqrt(np.pi) * K
+        #     dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
+        #     dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
+
+
+        # dk_dSim1Variance = dk_dC1
+        # Last element is the length scale.
+        (dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
+
+        target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
+
+
+        # # only pass the gradient with respect to the inverse width to one
+        # # of the gradient vectors ... otherwise it is counted twice.
+        # g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
+        # g2 = real([dk_dD2 0 dk_dSim2Variance])
+        # return g1, g2"""
+
+    def dKdiag_dtheta(self,dL_dKdiag,index,target):
+        pass
+
+    def gradients_X(self,dL_dK,X,X2,target):
+        pass
+
+    def _extract_t_indices(self, X, X2=None, dL_dK=None):
+        """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
+
+        # TODO: some fast checking here to see if this needs recomputing?
+        self._t = X[:, 0]
+        if not X.shape[1] == 2:
+            raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
+        self._index = np.asarray(X[:, 1],dtype=np.int)
+        # Sort indices so that outputs are in blocks for computational
+        # convenience.
+        self._order = self._index.argsort()
+        self._index = self._index[self._order]
+        self._t = self._t[self._order]
+        self._rorder = self._order.argsort() # rorder is for reversing the order
+        
+        if X2 is None:
+            self._t2 = None
+            self._index2 = None
+            self._order2 = self._order
+            self._rorder2 = self._rorder
+        else:
+            if not X2.shape[1] == 2:
+                raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
+            self._t2 = X2[:, 0]
+            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
+            self._order2 = self._index2.argsort()
+            self._index2 = self._index2[self._order2]
+            self._t2 = self._t2[self._order2]
+            self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
+
+        if dL_dK is not None:
+            self._dL_dK = dL_dK[self._order, :]
+            self._dL_dK = self._dL_dK[:, self._order2]
+            
+    def _K_computations(self, X, X2):
+        """Perform main body of computations for the ode1 covariance function."""
+        # First extract times and indices.
+        self._extract_t_indices(X, X2)
+
+        self._K_compute_eq()
+        self._K_compute_ode_eq()
+        if X2 is None:
+            self._K_eq_ode = self._K_ode_eq.T
+        else:
+            self._K_compute_ode_eq(transpose=True)
+        self._K_compute_ode()
+
+        if X2 is None:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
+        else:
+            self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
+
+        # Reorder values of blocks for placing back into _K_dvar.
+        self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
+                                                   np.hstack((self._K_ode_eq, self._K_ode))))
+        self._K_dvar = self._K_dvar[self._rorder, :]
+        self._K_dvar = self._K_dvar[:, self._rorder2]
+        
+        
+        if X2 is None:
+            # Matrix giving scales of each output
+            self._scale = np.zeros((self._t.size, self._t.size))
+            code="""
+            for(int i=0;i<N; i++){
+              scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
+              for(int j=0; j<i; j++){
+                  scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
+                  scale_mat[i+j*N] = scale_mat[j+i*N];
+                }
+              }
+            """
+            scale_mat, B, index = self._scale, self.B, self._index
+            N, output_dim = self._t.size, self.output_dim
+            weave.inline(code,['index',
+                               'scale_mat', 'B',
+                               'N', 'output_dim'])
+        else:
+            self._scale = np.zeros((self._t.size, self._t2.size))
+            code = """
+            for(int i=0; i<N; i++){
+              for(int j=0; j<N2; j++){
+                scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
+              }
+            }
+            """
+            scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
+            N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
+            weave.inline(code, ['index', 'index2',
+                                'scale_mat', 'B',
+                                'N', 'N2', 'output_dim'])
+
+
+
+    def _K_compute_eq(self):
+        """Compute covariance for latent covariance."""
+        t_eq = self._t[self._index==0]
+        if self._t2 is None:
+            if t_eq.size==0:
+                self._K_eq = np.zeros((0, 0))
+                return
+            self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
+        else:
+            t2_eq = self._t2[self._index2==0]
+            if t_eq.size==0 or t2_eq.size==0:
+                self._K_eq = np.zeros((t_eq.size, t2_eq.size))
+                return
+            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
+        
+        self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
+        if self.is_normalized:
+            self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
+
+    def _K_compute_ode_eq(self, transpose=False):
+        """Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
+
+        :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
+        :type transpose: bool"""
+
+        if self._t2 is not None:
+            if transpose:
+                t_eq = self._t[self._index==0]
+                t_ode = self._t2[self._index2>0]
+                index_ode = self._index2[self._index2>0]-1
+            else:
+                t_eq = self._t2[self._index2==0]
+                t_ode = self._t[self._index>0]
+                index_ode = self._index[self._index>0]-1
+        else:
+            t_eq = self._t[self._index==0]
+            t_ode = self._t[self._index>0]
+            index_ode = self._index[self._index>0]-1
+
+        if t_ode.size==0 or t_eq.size==0:
+            if transpose:
+                self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
+            else:
+                self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
+            return
+
+        t_ode_mat = t_ode[:, None]
+        t_eq_mat = t_eq[None, :]
+        if self.delay is not None:
+            t_ode_mat -= self.delay[index_ode, None]
+        diff_t = (t_ode_mat - t_eq_mat)
+
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        decay_vals = self.decay[index_ode][:, None]
+        half_sigma_d_i = 0.5*self.sigma*decay_vals
+
+        if self.is_stationary:
+            ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        else:
+            ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
+        sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
+
+        sK *= 0.5
+
+        if not self.is_normalized:
+            sK *= np.sqrt(np.pi)*self.sigma
+
+
+        if transpose:
+            self._K_eq_ode = sK.T
+        else:
+            self._K_ode_eq = sK
+        
+    def _K_compute_ode(self):
+        # Compute covariances between outputs of the ODE models.
+
+        t_ode = self._t[self._index>0]
+        index_ode = self._index[self._index>0]-1
+        if self._t2 is None:
+            if t_ode.size==0:
+                self._K_ode = np.zeros((0, 0))
+                return        
+            t2_ode = t_ode
+            index2_ode = index_ode
+        else:
+            t2_ode = self._t2[self._index2>0]
+            if t_ode.size==0 or t2_ode.size==0:
+                self._K_ode = np.zeros((t_ode.size, t2_ode.size))
+                return
+            index2_ode = self._index2[self._index2>0]-1
+        
+        # When index is identical
+        h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
+
+        if self._t2 is None:
+            self._K_ode = 0.5 * (h + h.T)
+        else:
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)                
+            self._K_ode = 0.5 * (h + h2.T)
+
+        if not self.is_normalized:
+            self._K_ode *= np.sqrt(np.pi)*self.sigma
+    def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
+        """Helper function for computing H for the diagonal only.
+        :param t: time input.
+        :type t: array
+        :param index: first output indices
+        :type index: array of int.
+        :param index: second output indices
+        :type index: array of int.
+        :param update_derivatives: whether or not to update the derivative portions (default False).
+        :type update_derivatives: bool
+        :param stationary: whether to compute the stationary version of the covariance (default False).
+        :type stationary: bool"""
+
+        """if delta_i~=delta_j:
+            [h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
+        else:
+            Decay = self.decay[index]
+            if self.delay is not None:
+                t = t - self.delay[index]
+            
+            t_squared = t*t
+            half_sigma_decay = 0.5*self.sigma*Decay
+            [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
+                                              half_sigma_decay)
+    
+            [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
+                                              half_sigma_decay - t/self.sigma)
+            
+            h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+                             + ln_part_1
+                             - log(Decay + D_j)) 
+                 - sign2*np.exp(half_sigma_decay*half_sigma_decay
+                                - (Decay + D_j)*t
+                                + ln_part_2 
+                                - log(Decay + D_j)))
+    
+            sigma2 = self.sigma*self.sigma
+
+        if update_derivatives:
+        
+            dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h 
+                       + t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
+                )
+                       + self.sigma/np.sqrt(np.pi)*
+                       (-1 + np.exp(-t_squared/sigma2-Decay*t)
+                        + np.exp(-t_squared/sigma2-D_j*t)
+                        - np.exp(-(Decay + D_j)*t)))
+        
+            dh_dD_i = (dh_dD_i/(Decay+D_j)).real
+        
+        
+        
+            dh_dD_j = (t*sign2*np.exp(
+                half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
+                )
+                       -h)
+            dh_dD_j = (dh_dD_j/(Decay + D_j)).real
+
+            dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+                        + 2/(np.sqrt(np.pi)*(Decay+D_j))\
+                        *((-Decay/2) \
+                          + (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
+                          - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
+                          - Decay/2*np.exp(-(Decay+D_j)*t))"""
+        pass
+    
+    def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
+        """Helper function for computing part of the ode1 covariance function.
+
+        :param t: first time input.
+        :type t: array
+        :param index: Indices of first output.
+        :type index: array of int
+        :param t2: second time input.
+        :type t2: array
+        :param index2: Indices of second output.
+        :type index2: array of int
+        :param update_derivatives: whether to update derivatives (default is False)
+        :return h : result of this subcomponent of the kernel for the given values.
+        :rtype: ndarray
+"""
+
+        if stationary:
+            raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
+        # Vector of decays and delays associated with each output.
+        Decay = self.decay[index]
+        Decay2 = self.decay[index2]
+        t_mat = t[:, None]
+        t2_mat = t2[None, :]
+        if self.delay is not None:
+            Delay = self.delay[index]
+            Delay2 = self.delay[index2]
+            t_mat-=Delay[:, None]
+            t2_mat-=Delay2[None, :]
+
+        diff_t = (t_mat - t2_mat)
+        inv_sigma_diff_t = 1./self.sigma*diff_t
+        half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
+
+        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, 
+                                        half_sigma_decay_i - inv_sigma_diff_t,
+                                        return_sign=True)
+        ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
+                                        half_sigma_decay_i - t_mat/self.sigma,
+                                        return_sign=True)
+
+        h = sign1*np.exp(half_sigma_decay_i
+                         *half_sigma_decay_i
+                         -Decay[:, None]*diff_t+ln_part_1
+                         -np.log(Decay[:, None] + Decay2[None, :]))
+        h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
+                          -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
+                          -np.log(Decay[:, None] + Decay2[None, :]))
+
+        if update_derivatives:
+            sigma2 = self.sigma*self.sigma
+            # Update ith decay gradient
+
+            dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+                         + (-diff_t*sign1*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
+                )
+                            +t_mat*sign2*np.exp(
+                half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
+                - Decay2*t2_mat+ln_part_2))
+                         +self.sigma/np.sqrt(np.pi)*(
+                -np.exp(
+                -diff_t*diff_t/sigma2
+                )+np.exp(
+                -t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
+                )+np.exp(
+                -t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
+                )-np.exp(
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                )
+                ))
+            self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
+            
+            # Update jth decay gradient
+            dh_ddecay2 = (t2_mat*sign2
+                         *np.exp(
+                half_sigma_decay_i*half_sigma_decay_i
+                -(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+                +ln_part_2
+                )
+                         -h)
+            self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
+            
+            # Update sigma gradient
+            self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+                               + 2/(np.sqrt(np.pi)
+                                    *(Decay[:, None]+Decay2[None, :]))
+                               *((-diff_t/sigma2-Decay[:, None]/2)
+                                 *np.exp(-diff_t*diff_t/sigma2)
+                                 + (-t2_mat/sigma2+Decay[:, None]/2)
+                                 *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) 
+                                 - (-t_mat/sigma2-Decay[:, None]/2) 
+                                 *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) 
+                                 - Decay[:, None]/2
+                                 *np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
+                
+        return h
--- a/GPy/kern/_src/todo/finite_dimensional.py
+++ b/GPy/kern/_src/todo/finite_dimensional.py
@ -0,0 +1,74 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+from ...util.linalg import pdinv,mdot
+
+class FiniteDimensional(Kernpart):
+    def __init__(self, input_dim, F, G, variance=1., weights=None):
+        """
+        Argumnents
+        ----------
+        input_dim: int - the number of input dimensions
+        F: np.array of functions with shape (n,) - the n basis functions
+        G: np.array with shape (n,n) - the Gram matrix associated to F
+        weights : np.ndarray with shape (n,)
+        """
+        self.input_dim = input_dim
+        self.F = F
+        self.G = G
+        self.G_1 ,L,Li,logdet = pdinv(G)
+        self.n = F.shape[0]
+        if weights is not None:
+            assert weights.shape==(self.n,)
+        else:
+            weights = np.ones(self.n)
+        self.num_params = self.n + 1
+        self.name = 'finite_dim'
+        self._set_params(np.hstack((variance,weights)))
+
+    def _get_params(self):
+        return np.hstack((self.variance,self.weights))
+    def _set_params(self,x):
+        assert x.size == (self.num_params)
+        self.variance = x[0]
+        self.weights = x[1:]
+    def _get_param_names(self):
+        if self.n==1:
+            return ['variance','weight']
+        else:
+            return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
+
+    def K(self,X,X2,target):
+        if X2 is None: X2 = X
+        FX = np.column_stack([f(X) for f in self.F])
+        FX2 = np.column_stack([f(X2) for f in self.F])
+        product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        np.add(product,target,target)
+    def Kdiag(self,X,target):
+        product = np.diag(self.K(X, X))
+        np.add(target,product,target)
+    def _param_grad_helper(self,X,X2,target):
+        """Return shape is NxMx(Ntheta)"""
+        if X2 is None: X2 = X
+        FX = np.column_stack([f(X) for f in self.F])
+        FX2 = np.column_stack([f(X2) for f in self.F])
+        DER = np.zeros((self.n,self.n,self.n))
+        for i in range(self.n):
+            DER[i,i,i] = np.sqrt(self.weights[i])
+        dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
+        np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+
+
+
+
+
+
+
+
--- a/GPy/kern/_src/todo/fixed.py
+++ b/GPy/kern/_src/todo/fixed.py
@ -0,0 +1,41 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+
+class Fixed(Kernpart):
+    def __init__(self, input_dim, K, variance=1.):
+        """
+        :param input_dim: the number of input dimensions
+        :type input_dim: int
+        :param variance: the variance of the kernel
+        :type variance: float
+        """
+        self.input_dim = input_dim
+        self.fixed_K = K
+        self.num_params = 1
+        self.name = 'fixed'
+        self._set_params(np.array([variance]).flatten())
+
+    def _get_params(self):
+        return self.variance
+
+    def _set_params(self, x):
+        assert x.shape == (1,)
+        self.variance = x
+
+    def _get_param_names(self):
+        return ['variance']
+
+    def K(self, X, X2, target):
+        target += self.variance * self.fixed_K
+
+    def _param_grad_helper(self, partial, X, X2, target):
+        target += (partial * self.fixed_K).sum()
+
+    def gradients_X(self, partial, X, X2, target):
+        pass
+
+    def dKdiag_dX(self, partial, X, target):
+        pass
--- a/GPy/kern/_src/todo/gibbs.py
+++ b/GPy/kern/_src/todo/gibbs.py
@ -0,0 +1,154 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+from ...util.linalg import tdot
+from ...core.mapping import Mapping
+import GPy
+
+class Gibbs(Kernpart):
+    """
+    Gibbs non-stationary covariance function. 
+
+    .. math::
+       
+       r = sqrt((x_i - x_j)'*(x_i - x_j))
+       
+       k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
+
+       Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
+
+       where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
+       This is the non stationary kernel proposed by Mark Gibbs in his 1997
+        thesis. It is similar to an RBF but has a length scale that varies
+        with input location. This leads to an additional term in front of
+        the kernel.
+
+        The parameters are :math:`\sigma^2`, the process variance, and
+        the parameters of l(x) which is a function that can be
+        specified by the user, by default an multi-layer peceptron is
+        used.
+
+        :param input_dim: the number of input dimensions
+        :type input_dim: int 
+        :param variance: the variance :math:`\sigma^2`
+        :type variance: float
+        :param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
+        :type mapping: GPy.core.Mapping
+        :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
+        :type ARD: Boolean
+        :rtype: Kernpart object
+
+    See Mark Gibbs's thesis for more details: Gibbs,
+    M. N. (1997). Bayesian Gaussian Processes for Regression and
+    Classification. PhD thesis, Department of Physics, University of
+    Cambridge. Or also see Page 93 of Gaussian Processes for Machine
+    Learning by Rasmussen and Williams. Although note that we do not
+    constrain the lengthscale to be positive by default. This allows
+    anticorrelation to occur. The positive constraint can be included
+    by the user manually.
+
+    """
+
+    def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
+        self.input_dim = input_dim
+        self.ARD = ARD
+        if not mapping:
+            mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
+        if not ARD:
+            self.num_params=1+mapping.num_params
+        else:
+            raise NotImplementedError
+
+        self.mapping = mapping
+        self.name='gibbs'
+        self._set_params(np.hstack((variance, self.mapping._get_params())))
+
+    def _get_params(self):
+        return np.hstack((self.variance, self.mapping._get_params()))
+
+    def _set_params(self, x):
+        assert x.size == (self.num_params)
+        self.variance = x[0]
+        self.mapping._set_params(x[1:])
+
+    def _get_param_names(self):
+        return ['variance'] + self.mapping._get_param_names()
+
+    def K(self, X, X2, target):
+        """Return covariance between X and X2."""
+        self._K_computations(X, X2)
+        target += self.variance*self._K_dvar
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix for X."""
+        np.add(target, self.variance, target)
+
+    def _param_grad_helper(self, dL_dK, X, X2, target):
+        """Derivative of the covariance with respect to the parameters."""
+        self._K_computations(X, X2)
+        self._dK_computations(dL_dK)
+        if X2==None:
+            gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
+        else:
+            gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
+            gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
+
+        target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
+
+    def gradients_X(self, dL_dK, X, X2, target):
+        """Derivative of the covariance matrix with respect to X."""
+        # First account for gradients arising from presence of X in exponent.
+        self._K_computations(X, X2)
+        if X2 is None:
+            _K_dist = 2*(X[:, None, :] - X[None, :, :])
+        else:
+            _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
+        gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
+        target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
+        # Now account for gradients arising from presence of X in lengthscale.
+        self._dK_computations(dL_dK)
+        if X2 is None:
+            target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
+        else:
+            target += self.mapping.df_dX(self._dL_dl[:, None], X)
+    
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to X."""
+        pass
+
+    def dKdiag_dtheta(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to parameters."""
+        target[0] += np.sum(dL_dKdiag)
+
+
+    
+    def _K_computations(self, X, X2=None):
+        """Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
+        self._lengthscales=self.mapping.f(X)
+        self._lengthscales2=np.square(self._lengthscales)
+        if X2==None:
+            self._lengthscales_two = self._lengthscales
+            self._lengthscales_two2 = self._lengthscales2
+            Xsquare = np.square(X).sum(1)
+            self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
+        else:
+            self._lengthscales_two = self.mapping.f(X2)
+            self._lengthscales_two2 = np.square(self._lengthscales_two)
+            self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
+        self._w2 = self._lengthscales2 + self._lengthscales_two2.T
+        prod_length = self._lengthscales*self._lengthscales_two.T
+        self._K_exponential = np.exp(-self._K_dist2/self._w2)
+        self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
+
+    def _dK_computations(self, dL_dK):
+        """Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
+        :param dL_dK: the gradient of the objective with respect to the covariance function.
+        :type dL_dK: ndarray"""
+        
+        self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
+        if self._lengthscales_two is self._lengthscales:
+            self._dL_dl_two = None
+        else:
+            self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)
--- a/GPy/kern/_src/todo/hetero.py
+++ b/GPy/kern/_src/todo/hetero.py
@ -0,0 +1,104 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from IPython.core.debugger import Tracer; debug_here=Tracer()
+from kernpart import Kernpart
+import numpy as np
+from ...util.linalg import tdot
+from ...core.mapping import Mapping
+import GPy
+
+class Hetero(Kernpart):
+    """
+    TODO: Need to constrain the function outputs
+    positive (still thinking of best way of doing this!!! Yes, intend to use
+    transformations, but what's the *best* way). Currently just squaring output.
+
+    Heteroschedastic noise which depends on input location. See, for example,
+    this paper by Goldberg et al.
+
+    .. math::
+
+       k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
+
+       where :math:`\sigma^2(x)` is a function giving the variance  as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
+
+    The parameters are the parameters of \sigma^2(x) which is a
+    function that can be specified by the user, by default an
+    multi-layer peceptron is used.
+
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
+    :type mapping: GPy.core.Mapping
+    :rtype: Kernpart object
+
+    See this paper:
+
+    Goldberg, P. W.  Williams, C. K. I. and Bishop,
+    C. M. (1998) Regression with Input-dependent Noise: a Gaussian
+    Process Treatment In Advances in Neural Information Processing
+    Systems, Volume 10, pp.  493-499. MIT Press
+
+    for a Gaussian process treatment of this problem.
+
+    """
+
+    def __init__(self, input_dim, mapping=None, transform=None):
+        self.input_dim = input_dim
+        if not mapping:
+            mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
+        if not transform:
+            transform = GPy.core.transformations.logexp()
+
+        self.transform = transform
+        self.mapping = mapping
+        self.name='hetero'
+        self.num_params=self.mapping.num_params
+        self._set_params(self.mapping._get_params())
+
+    def _get_params(self):
+        return self.mapping._get_params()
+
+    def _set_params(self, x):
+        assert x.size == (self.num_params)
+        self.mapping._set_params(x)
+
+    def _get_param_names(self):
+        return self.mapping._get_param_names()
+
+    def K(self, X, X2, target):
+        """Return covariance between X and X2."""
+        if (X2 is None) or (X2 is X):
+            target[np.diag_indices_from(target)] += self._Kdiag(X)
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix for X."""
+        target+=self._Kdiag(X)
+
+    def _Kdiag(self, X):
+        """Helper function for computing the diagonal elements of the covariance."""
+        return self.mapping.f(X).flatten()**2
+
+    def _param_grad_helper(self, dL_dK, X, X2, target):
+        """Derivative of the covariance with respect to the parameters."""
+        if (X2 is None) or (X2 is X):
+            dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
+            self.dKdiag_dtheta(dL_dKdiag, X, target)
+
+    def dKdiag_dtheta(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to parameters."""
+        target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
+
+    def gradients_X(self, dL_dK, X, X2, target):
+        """Derivative of the covariance matrix with respect to X."""
+        if X2==None or X2 is X:
+            dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
+            self.dKdiag_dX(dL_dKdiag, X, target)
+
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to X."""
+        target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
+
+
+
--- a/GPy/kern/_src/todo/odekern1.c
+++ b/GPy/kern/_src/todo/odekern1.c
@ -0,0 +1,38 @@
+#include <math.h> 
+
+ double k_uu(t1,t2,theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
+
+ return kern;
+ }
+
+
+
+ double k_yy(t1, t2, theta1,theta2,sig1,sig2)
+ {
+  double kern=0;
+  double dist=0;
+  
+  dist = sqrt(t2*t2-t1*t1) 
+ 
+  kern = sig1*sig2 * (  exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
+  	exp(-dist)  ) / ((theta2-theta1)*(theta2-theta1))
+
+  return kern;
+ } 
+
+
+
+
+
+
+	
+
+
+
--- a/GPy/kern/_src/todo/poly.py
+++ b/GPy/kern/_src/todo/poly.py
@ -0,0 +1,138 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+four_over_tau = 2./np.pi
+
+class POLY(Kernpart):
+    """
+
+    Polynomial kernel parameter initialisation.  Included for completeness, but generally not recommended, is the polynomial kernel:
+
+    .. math::
+        k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
+
+    The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
+    (weight_variance), :math:`\sigma^2_b` (bias_variance) and d
+    (degree). Only gradients of the first three are provided for
+    kernel optimisation, it is assumed that polynomial degree would
+    be set by hand.
+
+    The kernel is not recommended as it is badly behaved when the
+    :math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
+    there is an automatic relevance determination version of this
+    kernel provided (NOTE YET IMPLEMENTED!).
+    :param input_dim: the number of input dimensions
+    :type input_dim: int 
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
+    :type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
+    :param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
+    :param degree: the degree of the polynomial.
+    :type degree: int
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
+    :type ARD: Boolean
+    :rtype: Kernpart object
+
+    """
+
+    def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
+        self.input_dim = input_dim
+        self.ARD = ARD
+        if not ARD:
+            self.num_params=3
+            if weight_variance is not None:
+                weight_variance = np.asarray(weight_variance)
+                assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
+            else:
+                weight_variance = 1.*np.ones(1)
+        else:
+            self.num_params = self.input_dim + 2
+            if weight_variance is not None:
+                weight_variance = np.asarray(weight_variance)
+                assert weight_variance.size == self.input_dim, "bad number of weight variances"
+            else:
+                weight_variance = np.ones(self.input_dim)
+            raise NotImplementedError
+        self.degree=degree
+        self.name='poly_deg' + str(self.degree)
+        self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
+
+    def _get_params(self):
+        return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
+
+    def _set_params(self, x):
+        assert x.size == (self.num_params)
+        self.variance = x[0]
+        self.weight_variance = x[1:-1]
+        self.weight_std = np.sqrt(self.weight_variance)
+        self.bias_variance = x[-1]
+
+    def _get_param_names(self):
+        if self.num_params == 3:
+            return ['variance', 'weight_variance', 'bias_variance']
+        else:
+            return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
+
+    def K(self, X, X2, target):
+        """Return covariance between X and X2."""
+        self._K_computations(X, X2)
+        target += self.variance*self._K_dvar
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix for X."""
+        self._K_diag_computations(X)
+        target+= self.variance*self._K_diag_dvar
+
+    def _param_grad_helper(self, dL_dK, X, X2, target):
+        """Derivative of the covariance with respect to the parameters."""
+        self._K_computations(X, X2)
+        base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
+        base_cov_grad = base*dL_dK
+
+
+            
+        target[0] += np.sum(self._K_dvar*dL_dK)
+        target[1] += (self._K_inner_prod*base_cov_grad).sum()
+        target[2] += base_cov_grad.sum()
+
+
+    def gradients_X(self, dL_dK, X, X2, target):
+        """Derivative of the covariance matrix with respect to X"""
+        self._K_computations(X, X2)
+        arg = self._K_poly_arg
+        if X2 is None:
+            target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
+        else:
+            target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
+            
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to X"""
+        self._K_diag_computations(X)
+        arg = self._K_diag_poly_arg
+        target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
+    
+    
+    def _K_computations(self, X, X2):
+        if self.ARD:
+            pass
+        else:
+            if X2 is None:
+                self._K_inner_prod = np.dot(X,X.T)
+            else:
+                self._K_inner_prod = np.dot(X,X2.T)
+            self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
+        self._K_dvar = self._K_poly_arg**self.degree
+
+    def _K_diag_computations(self, X):
+        if self.ARD:
+            pass
+        else:
+            self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
+        self._K_diag_dvar = self._K_diag_poly_arg**self.degree
+
+  
+
+
--- a/GPy/kern/_src/todo/rbf_inv.py
+++ b/GPy/kern/_src/todo/rbf_inv.py
@ -0,0 +1,336 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from rbf import RBF
+import numpy as np
+from scipy import weave
+from ...util.linalg import tdot
+from ...core.parameterization import Param
+
+class RBFInv(RBF):
+    """
+    Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. It only
+    differs from RBF in that here the parametrization is wrt the inverse lengthscale:
+
+    .. math::
+
+       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
+
+    where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
+
+    :param input_dim: the number of input dimensions
+    :type input_dim: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the vector of lengthscale of the kernel
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
+    :type ARD: Boolean
+    :rtype: kernel object
+
+    .. Note: this object implements both the ARD and 'spherical' version of the function
+    """
+
+    def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False, name='inverse rbf'):
+        #self.input_dim = input_dim
+        #self.name = 'rbf_inv'
+        if inv_lengthscale is not None: lengthscale = 1./np.array(inv_lengthscale)
+        else: lengthscale = None
+        super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=lengthscale, ARD=ARD, name=name)
+        self.ARD = ARD
+        if not ARD:
+            self.num_params = 2
+            if inv_lengthscale is not None:
+                inv_lengthscale = np.asarray(inv_lengthscale)
+                assert inv_lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
+            else:
+                inv_lengthscale = np.ones(1)
+        else:
+            self.num_params = self.input_dim + 1
+            if inv_lengthscale is not None:
+                inv_lengthscale = np.asarray(inv_lengthscale)
+                assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
+            else:
+                inv_lengthscale = np.ones(self.input_dim)
+        
+        self.variance = Param('variance', variance)
+        self.inv_lengthscale = Param('sensitivity', inv_lengthscale)
+        self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale)
+        self.remove_parameter(self.lengthscale)
+        self.add_parameters(self.variance, self.inv_lengthscale)
+        #self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
+
+        # initialize cache
+        self._Z, self._mu, self._S = np.empty(shape=(3, 1))
+        self._X, self._X2, self._params = np.empty(shape=(3, 1))
+
+        # a set of optional args to pass to weave
+        self.weave_options = {'headers'           : ['<omp.h>'],
+                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
+                         'extra_link_args'   : ['-lgomp']}
+
+
+
+#     def _get_params(self):
+#         return np.hstack((self.variance, self.inv_lengthscale))
+
+    def update_inv_lengthscale(self, il):
+        self.inv_lengthscale2 = np.square(self.inv_lengthscale)
+        # TODO: We can rewrite everything with inv_lengthscale and never need to do the below
+        self.lengthscale = 1. / self.inv_lengthscale
+        self.lengthscale2 = np.square(self.lengthscale)
+
+    #def _set_params(self, x):
+    def parameters_changed(self):
+        #assert x.size == (self.num_params)
+        #self.variance = x[0]
+        #self.inv_lengthscale = x[1:]
+        # reset cached results
+        self._X, self._X2, self._params = np.empty(shape=(3, 1))
+        self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
+
+#     def _get_param_names(self):
+#         if self.num_params == 2:
+#             return ['variance', 'inv_lengthscale']
+#         else:
+#             return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
+
+    # TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
+    def _param_grad_helper(self, dL_dK, X, X2, target):
+        self._K_computations(X, X2)
+        target[0] += np.sum(self._K_dvar * dL_dK)
+        if self.ARD:
+            dvardLdK = self._K_dvar * dL_dK
+            var_len3 = self.variance / np.power(self.lengthscale, 3)
+            len2 = self.lengthscale2
+            if X2 is None:
+                # save computation for the symmetrical case
+                dvardLdK = dvardLdK + dvardLdK.T
+                code = """
+                int q,i,j;
+                double tmp;
+                for(q=0; q<input_dim; q++){
+                  tmp = 0;
+                  for(i=0; i<num_data; i++){
+                    for(j=0; j<i; j++){
+                      tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
+                    }
+                  }
+                  target(q+1) += var_len3(q)*tmp*(-len2(q));
+                }
+                """
+                num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
+                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
+            else:
+                code = """
+                int q,i,j;
+                double tmp;
+                for(q=0; q<input_dim; q++){
+                  tmp = 0;
+                  for(i=0; i<num_data; i++){
+                    for(j=0; j<num_inducing; j++){
+                      tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
+                    }
+                  }
+                  target(q+1) += var_len3(q)*tmp*(-len2(q));
+                }
+                """
+                num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
+                # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
+                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
+        else:
+            target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
+
+    def gradients_X(self, dL_dK, X, X2, target):
+        self._K_computations(X, X2)
+        if X2 is None:            
+            _K_dist = 2*(X[:, None, :] - X[None, :, :])
+        else:
+            _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
+        gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
+        target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
+
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        pass
+
+
+    #---------------------------------------#
+    #             PSI statistics            #
+    #---------------------------------------#
+
+    # def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
+    #     self._psi_computations(Z, mu, S)
+    #     denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
+    #     d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
+    #     target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
+    #     dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
+    #     if not self.ARD:
+    #         target[1] += dpsi1_dlength.sum()*(-self.lengthscale2)
+    #     else:
+    #         target[1:] += dpsi1_dlength.sum(0).sum(0)*(-self.lengthscale2)
+    #     #target[1:] = target[1:]*(-self.lengthscale2)
+
+    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
+        self._psi_computations(Z, mu, S)
+        tmp = 1 + S[:, None, :] * self.inv_lengthscale2
+        # d_inv_length_old = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / (self.lengthscale * self._psi1_denom) + self.inv_lengthscale) / self.inv_lengthscale2
+        d_length = -(self._psi1[:, :, None] * ((np.square(self._psi1_dist) * self.inv_lengthscale) / (tmp ** 2) + (S[:, None, :] * self.inv_lengthscale) / (tmp)))
+        # d_inv_length = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / self._psi1_denom + self.lengthscale)
+        target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
+        dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
+        if not self.ARD:
+            target[1] += dpsi1_dlength.sum() # *(-self.lengthscale2)
+        else:
+            target[1:] += dpsi1_dlength.sum(0).sum(0) # *(-self.lengthscale2)
+        # target[1:] = target[1:]*(-self.lengthscale2)
+
+    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
+        self._psi_computations(Z, mu, S)
+        dpsi1_dZ = -self._psi1[:, :, None] * ((self.inv_lengthscale2 * self._psi1_dist) / self._psi1_denom)
+        target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
+
+    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
+        self._psi_computations(Z, mu, S)
+        tmp = (self._psi1[:, :, None] * self.inv_lengthscale2) / self._psi1_denom
+        target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
+        target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
+
+    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
+        """Shape N,num_inducing,num_inducing,Ntheta"""
+        self._psi_computations(Z, mu, S)
+        d_var = 2.*self._psi2 / self.variance
+        # d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
+        d_length = -2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] * self.inv_lengthscale2) / (self.inv_lengthscale * self._psi2_denom)
+        target[0] += np.sum(dL_dpsi2 * d_var)
+        dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
+        if not self.ARD:
+            target[1] += dpsi2_dlength.sum() # *(-self.lengthscale2)
+        else:
+            target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) # *(-self.lengthscale2)
+        # target[1:] = target[1:]*(-self.lengthscale2)
+
+    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
+        self._psi_computations(Z, mu, S)
+        term1 = self._psi2_Zdist * self.inv_lengthscale2 # num_inducing, num_inducing, input_dim
+        term2 = (self._psi2_mudist * self.inv_lengthscale2) / self._psi2_denom # N, num_inducing, num_inducing, input_dim
+        dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
+        target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
+
+    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
+        """Think N,num_inducing,num_inducing,input_dim """
+        self._psi_computations(Z, mu, S)
+        tmp = (self.inv_lengthscale2 * self._psi2[:, :, :, None]) / self._psi2_denom
+        target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
+        target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
+
+    #---------------------------------------#
+    #            Precomputations            #
+    #---------------------------------------#
+
+    def _K_computations(self, X, X2):
+        if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
+            self._X = X.copy()
+            self._params = self._get_params().copy()
+            if X2 is None:
+                self._X2 = None
+                X = X * self.inv_lengthscale
+                Xsquare = np.sum(np.square(X), 1)
+                self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
+            else:
+                self._X2 = X2.copy()
+                X = X * self.inv_lengthscale
+                X2 = X2 * self.inv_lengthscale
+                self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
+            self._K_dvar = np.exp(-0.5 * self._K_dist2)
+
+    def _psi_computations(self, Z, mu, S):
+        # here are the "statistics" for psi1 and psi2
+        if not np.array_equal(Z, self._Z):
+            # Z has changed, compute Z specific stuff
+            self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
+            self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
+            self._psi2_Zdist_sq = np.square(self._psi2_Zdist * self.inv_lengthscale) # M,M,Q
+
+        if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
+            # something's changed. recompute EVERYTHING
+
+            # psi1
+            self._psi1_denom = S[:, None, :] * self.inv_lengthscale2 + 1.
+            self._psi1_dist = Z[None, :, :] - mu[:, None, :]
+            self._psi1_dist_sq = (np.square(self._psi1_dist) * self.inv_lengthscale2) / self._psi1_denom
+            self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
+            self._psi1 = self.variance * np.exp(self._psi1_exponent)
+
+            # psi2
+            self._psi2_denom = 2.*S[:, None, None, :] * self.inv_lengthscale2 + 1. # N,M,M,Q
+            self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
+            # self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
+            # self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
+            # self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
+            self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
+
+            # store matrices for caching
+            self._Z, self._mu, self._S = Z, mu, S
+
+    def weave_psi2(self, mu, Zhat):
+        N, input_dim = mu.shape
+        num_inducing = Zhat.shape[0]
+
+        mudist = np.empty((N, num_inducing, num_inducing, input_dim))
+        mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
+        psi2_exponent = np.zeros((N, num_inducing, num_inducing))
+        psi2 = np.empty((N, num_inducing, num_inducing))
+
+        psi2_Zdist_sq = self._psi2_Zdist_sq
+        _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
+        half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
+        variance_sq = float(np.square(self.variance))
+        if self.ARD:
+            inv_lengthscale2 = self.inv_lengthscale2
+        else:
+            inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
+        code = """
+        double tmp;
+
+        #pragma omp parallel for private(tmp)
+        for (int n=0; n<N; n++){
+            for (int m=0; m<num_inducing; m++){
+               for (int mm=0; mm<(m+1); mm++){
+                   for (int q=0; q<input_dim; q++){
+                       //compute mudist
+                       tmp = mu(n,q) - Zhat(m,mm,q);
+                       mudist(n,m,mm,q) = tmp;
+                       mudist(n,mm,m,q) = tmp;
+
+                       //now mudist_sq
+                       tmp = tmp*tmp*inv_lengthscale2(q)/_psi2_denom(n,q);
+                       mudist_sq(n,m,mm,q) = tmp;
+                       mudist_sq(n,mm,m,q) = tmp;
+
+                       //now psi2_exponent
+                       tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
+                       psi2_exponent(n,mm,m) += tmp;
+                       if (m !=mm){
+                           psi2_exponent(n,m,mm) += tmp;
+                       }
+                   //psi2 would be computed like this, but np is faster
+                   //tmp = variance_sq*exp(psi2_exponent(n,m,mm));
+                   //psi2(n,m,mm) = tmp;
+                   //psi2(n,mm,m) = tmp;
+                   }
+                }
+            }
+        }
+
+        """
+
+        support_code = """
+        #include <omp.h>
+        #include <math.h>
+        """
+        weave.inline(code, support_code=support_code, libraries=['gomp'],
+                     arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
+                     type_converters=weave.converters.blitz, **self.weave_options)
+
+        return mudist, mudist_sq, psi2_exponent, psi2
--- a/GPy/kern/_src/todo/spline.py
+++ b/GPy/kern/_src/todo/spline.py
@ -0,0 +1,61 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+from ...core.parameterization import Param
+
+def theta(x):
+    """Heaviside step function"""
+    return np.where(x>=0.,1.,0.)
+
+class Spline(Kernpart):
+    """
+    Spline kernel
+
+    :param input_dim: the number of input dimensions (fixed to 1 right now TODO)
+    :type input_dim: int
+    :param variance: the variance of the kernel
+    :type variance: float
+
+    """
+
+    def __init__(self,input_dim,variance=1.,lengthscale=1.):
+        self.input_dim = input_dim
+        assert self.input_dim==1
+        self.num_params = 1
+        self.name = 'spline'
+        self.variance = Param('variance', variance)
+        self.lengthscale = Param('lengthscale', lengthscale)
+        self.add_parameters(self.variance, self.lengthscale)
+        
+#     def _get_params(self):
+#         return self.variance
+# 
+#     def _set_params(self,x):
+#         self.variance = x
+# 
+#     def _get_param_names(self):
+#         return ['variance']
+
+    def K(self,X,X2,target):
+        assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
+        assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
+        t = X
+        s = X2.T
+        s_t = s-t # broadcasted subtraction
+        target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
+
+    def Kdiag(self,X,target):
+        target += self.variance*X.flatten()**3/3.
+
+    def _param_grad_helper(self,X,X2,target):
+        target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
+
+    def dKdiag_dtheta(self,X,target):
+        target += X.flatten()**3/3.
+
+    def dKdiag_dX(self,X,target):
+        target += self.variance*X**2
+
--- a/GPy/kern/_src/todo/symmetric.py
+++ b/GPy/kern/_src/todo/symmetric.py
@ -0,0 +1,81 @@
+# Copyright (c) 2012 James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from kernpart import Kernpart
+import numpy as np
+
+class Symmetric(Kernpart):
+    """
+    Symmetrical kernels
+
+    :param k: the kernel to symmetrify
+    :type k: Kernpart
+    :param transform: the transform to use in symmetrification (allows symmetry on specified axes)
+    :type transform: A numpy array (input_dim x input_dim) specifiying the transform
+    :rtype: Kernpart
+
+    """
+    def __init__(self,k,transform=None):
+        if transform is None:
+            transform = np.eye(k.input_dim)*-1.
+        assert transform.shape == (k.input_dim, k.input_dim)
+        self.transform = transform
+        self.input_dim = k.input_dim
+        self.num_params = k.num_params
+        self.name = k.name + '_symm'
+        self.k = k
+        self.add_parameter(k)
+        #self._set_params(k._get_params())
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            AX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k.K(X,X2,target)
+        self.k.K(AX,X2,target)
+        self.k.K(X,AX2,target)
+        self.k.K(AX,AX2,target)
+
+    def _param_grad_helper(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            ZX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k._param_grad_helper(dL_dK,X,X2,target)
+        self.k._param_grad_helper(dL_dK,AX,X2,target)
+        self.k._param_grad_helper(dL_dK,X,AX2,target)
+        self.k._param_grad_helper(dL_dK,AX,AX2,target)
+
+
+    def gradients_X(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        AX = np.dot(X,self.transform)
+        if X2 is None:
+            X2 = X
+            ZX2 = AX
+        else:
+            AX2 = np.dot(X2, self.transform)
+        self.k.gradients_X(dL_dK, X, X2, target)
+        self.k.gradients_X(dL_dK, AX, X2, target)
+        self.k.gradients_X(dL_dK, X, AX2, target)
+        self.k.gradients_X(dL_dK, AX ,AX2, target)
+
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        foo = np.zeros((X.shape[0],X.shape[0]))
+        self.K(X,X,foo)
+        target += np.diag(foo)
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        raise NotImplementedError
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        raise NotImplementedError