diff --git a/GPy/__init__.py b/GPy/__init__.py
new file mode 100644
index 00000000..718b763b
--- /dev/null
+++ b/GPy/__init__.py
@@ -0,0 +1,6 @@
+import kern
+import models
+import inference
+import util
+import examples
+from core import priors
diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py
new file mode 100644
index 00000000..3e1d8ee5
--- /dev/null
+++ b/GPy/kern/Brownian.py
@@ -0,0 +1,56 @@
+from kernpart import kernpart
+import numpy as np
+
+def theta(x):
+    """Heavisdie step function"""
+    return np.where(x>=0.,1.,0.)
+
+class Brownian(kernpart):
+    """
+    Brownian Motion kernel.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance:
+    :type variance: float
+    """
+    def __init__(self,D,variance=1.):
+        self.D = D
+        assert self.D==1, "Brownian motion in 1D only"
+        self.Nparam = 1.
+        self.name = 'Brownian'
+        self.set_param(np.array([variance]).flatten())
+
+    def get_param(self):
+        return self.variance
+
+    def set_param(self,x):
+        assert x.shape==(1,)
+        self.variance = x
+
+    def get_param_names(self):
+        return ['variance']
+
+    def K(self,X,X2,target):
+        target += self.variance*np.fmin(X,X2.T)
+
+    def Kdiag(self,X,target):
+        target += self.variance*X.flatten()
+
+    def dK_dtheta(self,X,X2,target):
+        target += np.fmin(X,X2.T)
+
+    def dKdiag_dtheta(self,X,target):
+        target += X.flatten()
+
+    def dK_dX(self,X,X2,target):
+        target += self.variance
+        target -= self.variance*theta(X-X2.T)
+        if X.shape==X2.shape:
+            if np.all(X==X2):
+                np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
+
+
+    def dKdiag_dX(self,X,target):
+        target += self.variance
+
diff --git a/GPy/kern/DelayedDecorator.py b/GPy/kern/DelayedDecorator.py
new file mode 100644
index 00000000..bcd53744
--- /dev/null
+++ b/GPy/kern/DelayedDecorator.py
@@ -0,0 +1,86 @@
+from new import instancemethod
+#this is here so that we can have nice syntatic sugar when taking gradients
+# See the transform_gradients decorator in kern
+
+class DelayedDecorator(object):
+    """Wrapper that delays decorating a function until it is invoked.
+
+    This class allows a decorator to be used with both ordinary functions and
+    methods of classes. It wraps the function passed to it with the decorator
+    passed to it, but with some special handling:
+
+      - If the wrapped function is an ordinary function, it will be decorated
+        the first time it is called.
+
+      - If the wrapped function is a method of a class, it will be decorated
+        separately the first time it is called on each instance of the class.
+        It will also be decorated separately the first time it is called as
+        an unbound method of the class itself (though this use case should
+        be rare).
+    """
+
+    def __init__(self, deco, func):
+        # The base decorated function (which may be modified, see below)
+        self._func = func
+        # The decorator that will be applied
+        self._deco = deco
+        # Variable to monitor calling as an ordinary function
+        self.__decofunc = None
+        # Variable to monitor calling as an unbound method
+        self.__clsfunc = None
+
+    def _decorated(self, cls=None, instance=None):
+        """Return the decorated function.
+
+        This method is for internal use only; it can be implemented by
+        subclasses to modify the actual decorated function before it is
+        returned. The ``cls`` and ``instance`` parameters are supplied so
+        this method can tell how it was invoked. If it is not overridden,
+        the base function stored when this class was instantiated will
+        be decorated by the decorator passed when this class was instantiated,
+        and then returned.
+
+        Note that factoring out this method, in addition to allowing
+        subclasses to modify the decorated function, ensures that the
+        right thing is done automatically when the decorated function
+        itself is a higher-order function (e.g., a generator function).
+        Since this method is called every time the decorated function
+        is accessed, a new instance of whatever it returns will be
+        created (e.g., a new generator will be realized), which is
+        exactly the expected semantics.
+        """
+        return self._deco(self._func)
+
+    def __call__(self, *args, **kwargs):
+        """Direct function call syntax support.
+
+        This makes an instance of this class work just like the underlying
+        decorated function when called directly as an ordinary function.
+        An internal reference to the decorated function is stored so that
+        future direct calls will get the stored function.
+        """
+        if not self.__decofunc:
+            self.__decofunc = self._decorated()
+        return self.__decofunc(*args, **kwargs)
+
+    def __get__(self, instance, cls):
+        """Descriptor protocol support.
+
+        This makes an instance of this class function correctly when it
+        is used to decorate a method on a user-defined class. If called
+        as a bound method, we store the decorated function in the instance
+        dictionary, so we will not be called again for that instance. If
+        called as an unbound method, we store a reference to the decorated
+        function internally and use it on future unbound method calls.
+        """
+        if instance:
+            deco = instancemethod(self._decorated(cls, instance), instance, cls)
+            # This prevents us from being called again for this instance
+            setattr(instance, self._func.__name__, deco)
+        elif cls:
+            if not self.__clsfunc:
+                self.__clsfunc = instancemethod(self._decorated(cls), None, cls)
+            deco = self.__clsfunc
+        else:
+            raise ValueError("Must supply instance or class to descriptor.")
+        return deco
diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
new file mode 100644
index 00000000..da4e8a6d
--- /dev/null
+++ b/GPy/kern/Matern32.py
@@ -0,0 +1,107 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+from ..util.linalg import pdinv,mdot
+from scipy import integrate
+
+class Matern32(kernpart):
+    """
+    Matern 3/2 kernel:
+
+    .. math::
+
+       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param lengthscale: the lengthscales :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (D,)
+    :rtype: kernel object
+
+    """
+
+    def __init__(self,D,variance=1.,lengthscales=None):
+        self.D = D
+        if lengthscales is not None:
+            assert lengthscales.shape==(self.D,)
+        else:
+            lengthscales = np.ones(self.D)
+        self.Nparam = self.D + 1
+        self.name = 'Mat32'
+        self.set_param(np.hstack((variance,lengthscales)))
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscales))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==(self.D+1)
+        self.variance = x[0]
+        self.lengthscales = x[1:]
+    def get_param_names(self):
+        """return parameter names."""
+        if self.D==1:
+            return ['variance','lengthscale']
+        else:
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        np.add(target,self.variance,target)
+
+    def Gram_matrix(self,F,F1,F2,lower,upper):
+        """
+        Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
+
+        :param F: vector of functions
+        :type F: np.array
+        :param F1: vector of derivatives of F
+        :type F1: np.array
+        :param F2: vector of second derivatives of F
+        :type F2: np.array    
+        :param lower,upper: boundaries of the input domain
+        :type lower,upper: floats  
+        """
+        assert self.D == 1
+        def L(x,i):
+            return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x))
+        n = F.shape[0]
+        G = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
+        Flower = np.array([f(lower) for f in F])[:,None]
+        F1lower = np.array([f(lower) for f in F1])[:,None]
+        #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
+        #return(G)
+        return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
+
+    def dK_dtheta(self,X,X2,target):
+        """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
+        invdist = 1./np.where(dist!=0.,dist,np.inf)
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
+        dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+        np.add(target[:,:,0],dvar, target[:,:,0])
+        np.add(target[:,:,1:],dl, target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
+        np.add(target[:,0],1.,target[:,0])
+    def dK_dX(self,X,X2,target):
+        """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        target += -  np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
+    def dKdiag_dX(self,X,target):
+        pass
+
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
new file mode 100644
index 00000000..836fd793
--- /dev/null
+++ b/GPy/kern/Matern52.py
@@ -0,0 +1,112 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+from scipy import integrate
+
+class Matern52(kernpart):
+    """
+    Matern 5/2 kernel:
+
+    .. math::
+
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param lengthscale: the lengthscales :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (D,)
+    :rtype: kernel object
+
+    """
+    def __init__(self,D,variance=1.,lengthscales=None):
+        self.D = D
+        if lengthscales is not None:
+            assert lengthscales.shape==(self.D,)
+        else:
+            lengthscales = np.ones(self.D)
+        self.Nparam = self.D + 1
+        self.name = 'Mat52'
+        self.set_param(np.hstack((variance,lengthscales)))
+        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscales))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==(self.D+1)
+        self.variance = x[0]
+        self.lengthscales = x[1:]
+        self.lengthscales2 = np.square(self.lengthscales)
+        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
+    def get_param_names(self):
+        """return parameter names."""
+        if self.D==1:
+            return ['variance','lengthscale']
+        else:
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        np.add(target,self.variance,target)
+
+    def Gram_matrix(self,F,F1,F2,F3,lower,upper):
+        """
+        Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
+
+        :param F: vector of functions
+        :type F: np.array
+        :param F1: vector of derivatives of F
+        :type F1: np.array
+        :param F2: vector of second derivatives of F
+        :type F2: np.array 
+        :param F3: vector of third derivatives of F
+        :type F3: np.array    
+        :param lower,upper: boundaries of the input domain
+        :type lower,upper: floats  
+        """
+        assert self.D == 1
+        def L(x,i):
+            return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x))
+        n = F.shape[0]
+        G = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
+        G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5))
+        Flower = np.array([f(lower) for f in F])[:,None]
+        F1lower = np.array([f(lower) for f in F1])[:,None]
+        F2lower = np.array([f(lower) for f in F2])[:,None]
+        orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T)
+        orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
+        return(1./self.variance* (G_coef*G + orig + orig2))
+
+    def dK_dtheta(self,X,X2,target):
+        """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        invdist = 1./np.where(dist!=0.,dist,np.inf)
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
+        dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
+        dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
+        np.add(target[:,:,0],dvar, target[:,:,0])
+        np.add(target[:,:,1:],dl, target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
+        np.add(target[:,0],1.,target[:,0])
+    def dK_dX(self,X,X2,target):
+        """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        target += -  np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
+    def dKdiag_dX(self,X,target):
+        pass
+
+
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
new file mode 100644
index 00000000..35f70a35
--- /dev/null
+++ b/GPy/kern/__init__.py
@@ -0,0 +1,2 @@
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
+from kern import kern
diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py
new file mode 100644
index 00000000..55c5cff6
--- /dev/null
+++ b/GPy/kern/bias.py
@@ -0,0 +1,47 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+
+class bias(kernpart):
+    def __init__(self,D,variance=1.):
+        """
+        Arguments
+        ----------
+        D: int - the number of input dimensions
+        variance: float
+        """
+        self.D = D
+        self.Nparam = 1
+        self.name = 'bias'
+        self.set_param(np.array([variance]).flatten())
+
+    def get_param(self):
+        return self.variance
+
+    def set_param(self,x):
+        assert x.shape==(1,)
+        self.variance = x
+
+    def get_param_names(self):
+        return ['variance']
+
+    def K(self,X,X2,target):
+        if X2 is None: X2 = X
+        np.add(self.variance, target,target)
+
+    def Kdiag(self,X,target):
+        np.add(target,self.variance,target)
+
+    def dK_dtheta(self,X,X2,target):
+        """Return shape is NxMx(Ntheta)"""
+        if X2 is None: X2 = X
+        np.add(target[:,:,0],1., target[:,:,0])
+
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+
+    def dK_dX(self, X, X2, target):
+        pass
+
+    def dKdiag_dX(self,X,target):
+        pass
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
new file mode 100644
index 00000000..f3ffdba2
--- /dev/null
+++ b/GPy/kern/constructors.py
@@ -0,0 +1,168 @@
+import numpy as np
+from kern import kern
+
+from rbf import rbf as rbfpart
+from rbf_ARD import rbf_ARD as rbf_ARD_part
+from white import white as whitepart
+from linear import linear as linearpart
+from linear_ARD import linear_ARD as linear_ARD_part
+from exponential import exponential as exponentialpart
+from Matern32 import Matern32 as Matern32part
+from Matern52 import Matern52 as Matern52part
+from bias import bias as biaspart
+from finite_dimensional import finite_dimensional as finite_dimensionalpart
+from spline import spline as splinepart
+from Brownian import Brownian as Brownianpart
+
+#TODO these s=constructors are not as clean as we'd like. Tidy the code up
+#using meta-classes to make the objects construct properly wthout them.
+
+def rbf(D,variance=1., lengthscale=1.):
+    """
+    Construct an RBF kernel
+
+    :param D: dimensionality of the kernel, obligatory
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+    """
+    part = rbfpart(D,variance,lengthscale)
+    return kern(D, [part])
+
+def rbf_ARD(D,variance=1., lengthscales=None):
+    """
+    Construct an RBF kernel with Automatic Relevance Determination (ARD)
+
+    :param D: dimensionality of the kernel, obligatory
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscales: the lengthscales of the kernel
+    :type lengthscales: None|np.ndarray
+    """
+    part = rbf_ARD_part(D,variance,lengthscales)
+    return kern(D, [part])
+
+def linear(D,lengthscales=None):
+    """
+     Construct a linear kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     lengthscales (np.ndarray)
+    """
+    part = linearpart(D,lengthscales)
+    return kern(D, [part])
+
+def linear_ARD(D,lengthscales=None):
+    """
+     Construct a linear ARD kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     lengthscales (np.ndarray)
+    """
+    part = linear_ARD_part(D,lengthscales)
+    return kern(D, [part])
+
+def white(D,variance=1.):
+    """
+     Construct a white kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     variance (float)
+    """
+    part = whitepart(D,variance)
+    return kern(D, [part])
+
+def exponential(D,variance=1., lengthscales=None):
+    """
+     Construct a exponential kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     variance (float)
+     lengthscales (np.ndarray)
+    """
+    part = exponentialpart(D,variance, lengthscales)
+    return kern(D, [part])
+
+def Matern32(D,variance=1., lengthscales=None):
+    """
+     Construct a Matern 3/2 kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     variance (float)
+     lengthscales (np.ndarray)
+    """
+    part = Matern32part(D,variance, lengthscales)
+    return kern(D, [part])
+
+def Matern52(D,variance=1., lengthscales=None):
+    """
+     Construct a Matern 5/2 kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     variance (float)
+     lengthscales (np.ndarray)
+    """
+    part = Matern52part(D,variance, lengthscales)
+    return kern(D, [part])
+
+def bias(D,variance=1.):
+    """
+     Construct a bias kernel.
+
+     Arguments
+     ---------
+     D (int), obligatory
+     variance (float)
+    """
+    part = biaspart(D,variance)
+    return kern(D, [part])
+
+def finite_dimensional(D,F,G,variances=1.,weights=None):
+    """
+    Construct a finite dimensional kernel.
+    D: int - the number of input dimensions
+    F: np.array of functions with shape (n,) - the n basis functions
+    G: np.array with shape (n,n) - the Gram matrix associated to F
+    variances : np.ndarray with shape (n,)
+    """
+    part = finite_dimensionalpart(D,F,G,variances,weights)
+    return kern(D, [part])
+
+def spline(D,variance=1.):
+    """
+    Construct a spline kernel.
+
+    :param D: Dimensionality of the kernel
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    """
+    part = splinepart(D,variance)
+    return kern(D, [part])
+
+def Brownian(D,variance=1.):
+    """
+    Construct a Brownian motion kernel.
+
+    :param D: Dimensionality of the kernel
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    """
+    part = Brownianpart(D,variance)
+    return kern(D, [part])
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
new file mode 100644
index 00000000..f1faf51f
--- /dev/null
+++ b/GPy/kern/exponential.py
@@ -0,0 +1,104 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+from scipy import integrate
+
+class exponential(kernpart):
+    """
+    Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
+
+    .. math::
+
+       k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance :math:`\sigma^2`
+    :type variance: float
+    :param lengthscale: the lengthscales :math:`\ell_i`
+    :type lengthscale: np.ndarray of size (D,)
+    :rtype: kernel object
+
+    """
+    def __init__(self,D,variance=1.,lengthscales=None):
+        self.D = D
+        if lengthscales is not None:
+            assert lengthscales.shape==(self.D,)
+        else:
+            lengthscales = np.ones(self.D)
+        self.Nparam = self.D + 1
+        self.name = 'exp'
+        self.set_param(np.hstack((variance,lengthscales)))
+
+    def get_param(self):
+        """return the value of the parameters."""
+        return np.hstack((self.variance,self.lengthscales))
+    def set_param(self,x):
+        """set the value of the parameters."""
+        assert x.size==(self.D+1)
+        self.variance = x[0]
+        self.lengthscales = x[1:]
+    def get_param_names(self):
+        """return parameter names."""
+        if self.D==1:
+            return ['variance','lengthscale']
+        else:
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+    def K(self,X,X2,target):
+        """Compute the covariance matrix between X and X2."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        np.add(self.variance*np.exp(-dist), target,target)
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        np.add(target,self.variance,target)
+
+    def Gram_matrix(self,F,F1,lower,upper):
+        """
+        Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
+
+        :param F: vector of functions
+        :type F: np.array
+        :param F1: vector of derivatives of F
+        :type F1: np.array  
+        :param lower,upper: boundaries of the input domain
+        :type lower,upper: floats  
+        """
+        assert self.D == 1
+        def L(x,i):
+            return(1./self.lengthscales*F[i](x) + F1[i](x))
+        n = F.shape[0]
+        G = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
+        Flower = np.array([f(lower) for f in F])[:,None]
+        return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
+
+    def dK_dtheta(self,X,X2,target):
+        """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
+        invdist = 1./np.where(dist!=0.,dist,np.inf)
+        dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
+        dvar = np.exp(-dist)
+        dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,np.newaxis]
+        np.add(target[:,:,0],dvar, target[:,:,0])
+        np.add(target[:,:,1:],dl, target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
+        np.add(target[:,0],1.,target[:,0])
+    def dK_dX(self,X,X2,target):
+        """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
+        if X2 is None: X2 = X
+        dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
+        ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
+        target += - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
+    def dKdiag_dX(self,X,target):
+        pass
+
+
+
+
+        
+
diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/finite_dimensional.py
new file mode 100644
index 00000000..bb9d0a90
--- /dev/null
+++ b/GPy/kern/finite_dimensional.py
@@ -0,0 +1,70 @@
+from kernpart import kernpart
+import numpy as np
+from ..util.linalg import pdinv,mdot
+
+class finite_dimensional(kernpart):
+    def __init__(self, D, F, G, variance=1., weights=None):
+        """
+        Argumnents
+        ----------
+        D: int - the number of input dimensions
+        F: np.array of functions with shape (n,) - the n basis functions
+        G: np.array with shape (n,n) - the Gram matrix associated to F
+        weights : np.ndarray with shape (n,)
+        """
+        self.D = D
+        self.F = F
+        self.G = G
+        self.G_1 ,tmp = pdinv(G)
+        self.n = F.shape[0]
+        if weights is not None:
+            assert weights.shape==(self.n,)
+        else:
+            weights = np.ones(self.n)
+        self.Nparam = self.n + 1
+        self.name = 'finite_dim'
+        self.set_param(np.hstack((variance,weights)))
+
+    def get_param(self):
+        return np.hstack((self.variance,self.weights))
+    def set_param(self,x):
+        assert x.size == (self.Nparam)
+        self.variance = x[0]
+        self.weights = x[1:]
+    def get_param_names(self):
+        if self.n==1:
+            return ['variance','weight']
+        else:
+            return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
+
+    def K(self,X,X2,target):
+        if X2 is None: X2 = X
+        FX = np.column_stack([f(X) for f in self.F])
+        FX2 = np.column_stack([f(X2) for f in self.F])
+        product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        np.add(product,target,target)
+    def Kdiag(self,X,target):
+        product = np.diag(self.K(X,X2))
+        np.add(target,product,target)
+    def dK_dtheta(self,X,X2,target):
+        """Return shape is NxMx(Ntheta)"""
+        if X2 is None: X2 = X
+        FX = np.column_stack([f(X) for f in self.F])
+        FX2 = np.column_stack([f(X2) for f in self.F])
+        DER = np.zeros((self.n,self.n,self.n))
+        for i in range(self.n):
+            DER[i,i,i] = np.sqrt(self.weights[i])
+        dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
+        np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
+        np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+
+
+
+
+
+
+
+
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
new file mode 100644
index 00000000..a15ee872
--- /dev/null
+++ b/GPy/kern/kern.py
@@ -0,0 +1,308 @@
+import numpy as np
+from ..core.parameterised import parameterised
+from DelayedDecorator import DelayedDecorator
+from functools import partial
+from kernpart import kernpart
+
+class transform_gradients_(object):
+    """
+    A decorator for gradient transformation. Accounts for constraining and tying.
+
+    NB: this uses the Delayed_decorator class so allow it to decorate bound methods
+    """
+    def __init__(self,f):
+        self.f = f
+    def __call__(self,*args,**kwargs):
+        kern = args[0]
+        x = kern.get_param()
+        g = self.f(*args,**kwargs)
+
+        #first roll the axes of gradients. This is because we always want to acces the first axis,
+        #else slicing becomes more difficult
+        g = np.rollaxis(g,-1)
+
+        ##TODO: the shape of the gradients is slightly problematic
+
+        if len(g.shape)==2:
+            x_reshape = (-1,1)
+        elif len(g.shape)==3:
+            x_reshape = (-1,1,1)
+        else:
+            raise NotImplementedError
+
+        #do the simple transforms first
+        np.multiply(g[kern.constrained_positive_indices], x[kern.constrained_positive_indices].reshape(x_reshape),g[kern.constrained_positive_indices])
+        np.multiply(g[kern.constrained_negative_indices], x[kern.constrained_negative_indices].reshape(x_reshape),g[kern.constrained_negative_indices])
+        [np.multiply(g[i], ((x[i]-l)*(h-x[i])/(h-l)).reshape(x_reshape), g[i]) for i,l,h in zip(kern.constrained_bounded_indices, kern.constrained_bounded_lowers, kern.constrained_bounded_uppers)]
+
+        #work out which gradients need to be added (tieing)
+        [np.sum(g[i],g[i[0]], axis=0) for i in kern.tied_indices]
+
+        #work out which gradients are simply cut out (due to fixing)
+        if len(kern.tied_indices) or len(kern.constrained_fixed_indices):
+            to_remove = np.hstack((kern.constrained_fixed_indices+[t[1:] for t in kern.tied_indices]))
+            g =  np.delete(g,to_remove,axis=0)
+
+        return np.swapaxes(np.rollaxis(g,0,-1),-1,-2) # undo the rolling of the axes
+
+transform_gradients = partial(DelayedDecorator,transform_gradients_)
+#transform_param = partial(DelayedDecorator,_transform_param) TODO
+
+
+
+
+class kern(parameterised):
+    def __init__(self,D,parts=[], input_slices=None):
+        """
+        This kernel does 'compound' structures.
+
+        The compund structure enables many features of GPy, including
+         - Hierarchical models
+         - Correleated output models
+         - multi-view learning
+
+        Hadamard product and outer-product kernels will require a new class TODO.
+
+        :param D: The dimensioality of the kernel's input space
+        :type D: int
+        :param parts: the 'parts' (PD functions) of the kernel
+        :type parts: list of kernpart objects
+        :param input_slices: the slices on the inputs which apply to each kernel
+        :type input_slices: list of slice objects, or list of bools
+
+        """
+        self.parts = parts
+        self.Nparts = len(parts)
+        self.Nparam = sum([p.Nparam for p in self.parts])
+
+        self.D = D
+
+        #deal with input_slices
+        if input_slices is None:
+            self.input_slices = [slice(None) for p in self.parts]
+        else:
+            assert len(input_slices)==len(self.parts)
+            self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
+
+        for p in self.parts:
+            assert isinstance(p,kernpart), "bad kernel part"
+
+
+        self.compute_param_slices()
+
+        parameterised.__init__(self)
+
+        #TODO: Xslices can be passed so that we can compute hierarchical and strutured kernels. these also need implementing for the psi statistics (straightforward)
+
+    def compute_param_slices(self):
+        """create a set of slices that can index the parameters of each part"""
+        self.param_slices = []
+        count = 0
+        for p in self.parts:
+            self.param_slices.append(slice(count,count+p.Nparam))
+            count += p.Nparam
+
+    def _process_slices(self,slices1=None,slices2=None):
+        """
+        Format the slices so that they can easily be used.
+        Both slices can be any of three things:
+         - If None, the new points covary through every kernel part (default)
+         - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
+         - If a list of booleans, specifying which kernel parts are active
+
+        if the second arg is False, return only slices1
+
+        returns actual lists of slice objects
+        """
+        if slices1 is None:
+            slices1 = [slice(None)]*self.Nparts
+        elif all([type(s_i) is bool for s_i in slices1]):
+            slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
+        else:
+            assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
+        if slices2 is None:
+            slices2 = [slice(None)]*self.Nparts
+        elif slices2 is False:
+            return slices1
+        elif all([type(s_i) is bool for s_i in slices2]):
+            slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
+        else:
+            assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
+        return slices1, slices2
+
+    def __add__(self,other):
+        assert self.D == other.D
+        newkern =  kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
+        #transfer constraints:
+        newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
+        newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
+        newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
+        newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
+        newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
+        newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
+        newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
+        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
+        return newkern
+    def add(self,other):
+        """
+        Add another kernel to this one. Both kernels are defined on the same _space_
+        :param other: the other kernel to be added
+        :type other: GPy.kern
+        """
+        return self + other
+
+    def add_orthogonal(self,other):
+        """
+        Add another kernel to this one. Both kernels are defined on separate spaces
+        :param other: the other kernel to be added
+        :type other: GPy.kern
+        """
+        #deal with input slices
+        D = self.D + other.D
+        self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
+        other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
+        other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
+
+        newkern =  kern(D,self.parts+other.parts, self_input_slices + other_input_slices)
+
+        #transfer constraints:
+        newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
+        newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
+        newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
+        newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
+        newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
+        newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
+        newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
+        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
+        return newkern
+
+
+    def get_param(self):
+        return np.hstack([p.get_param() for p in self.parts])
+
+    def set_param(self,x):
+        [p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
+
+    def get_param_names(self):
+        return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[])
+
+    def K(self,X,X2=None,slices1=None,slices2=None):
+        assert X.shape[1]==self.D
+        slices1, slices2 = self._process_slices(slices1,slices2)
+        if X2 is None:
+            X2 = X
+        target = np.zeros((X.shape[0],X2.shape[0]))
+        [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
+        return target
+
+    #@transform_gradients
+    def dK_dtheta(self,X,X2=None,slices1=None,slices2=None):
+        """Return shape is NxMx(Ntheta)"""
+        assert X.shape[1]==self.D
+        slices1, slices2 = self._process_slices(slices1,slices2)
+        if X2 is None:
+            X2 = X
+        target = np.zeros((X.shape[0],X2.shape[0],self.Nparam))
+        [p.dK_dtheta(X[s1,i_s],X2[s2,i_s],target[s1,s2,ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+        return target
+
+    def dK_dX(self,X,X2=None,slices1=None,slices2=None):
+        if X2 is None:
+            X2 = X
+        slices1, slices2 = self._process_slices(slices1,slices2)
+        target = np.zeros((X2.shape[0],X.shape[0],X.shape[1]))
+        [p.dK_dX(X[s1],X2[s2],target[s2,s1,:]) for p,ps,s1,s2 in zip(self.parts, self.param_slices,slices1,slices2)]
+        return target
+
+    def Kdiag(self,X,slices=None):
+        assert X.shape[1]==self.D
+        slices = self._process_slices(slices,False)
+        target = np.zeros(X.shape[0])
+        [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
+        return target
+
+    #@transform_gradients
+    def dKdiag_dtheta(self,X,slices=None):
+        assert X.shape[1]==self.D
+        slices = self._process_slices(slices,False)
+        target = np.zeros((X.shape[0],self.Nparam))
+        [p.dKdiag_dtheta(X[s,i_s],target[s,ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
+        return target
+
+    def dKdiag_dX(self, X, slices=None):
+        assert X.shape[1]==self.D
+        slices = self._process_slices(slices,False)
+        target = np.zeros((X.shape[0],X.shape[0],X.shape[1]))
+        [p.dKdiag_dX(X[s,i_s],target[s,:,:]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
+        return target
+
+    def psi0(self,Z,mu,S,slices_mu=None,slices_Z=None):
+        target = np.zeros(mu.shape[0])
+        [p.psi0(Z,mu,S,target) for p in self.parts]
+        return target
+
+    #@transform_gradients
+    def dpsi0_dtheta(self,Z,mu,S):
+        target = np.zeros((mu.shape[0],self.Nparam))
+        [p.dpsi0_dtheta(Z,mu,S,target[s]) for p,s in zip(self.parts, self.param_slices)]
+        return target
+
+    def dpsi0_dmuS(self,Z,mu,S):
+        target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
+        [p.dpsi0_dmuS(Z,mu,S,target_mu,target_S) for p in self.parts]
+        return target_mu,target_S
+
+    def psi1(self,Z,mu,S):
+        """Think N,M,Q """
+        target = np.zeros((mu.shape[0],Z.shape[0]))
+        [p.psi1(Z,mu,S,target=target) for p in self.parts]
+        return target
+
+    #@transform_gradients
+    def dpsi1_dtheta(self,Z,mu,S):
+        """N,M,(Ntheta)"""
+        target = np.zeros((mu.shape[0],Z.shape[0],self.Nparam))
+        [p.dpsi1_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
+        return target
+
+    def dpsi1_dZ(self,Z,mu,S):
+        """N,M,Q"""
+        target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[1]))
+        [p.dpsi1_dZ(Z,mu,S,target) for p in self.parts]
+        return target
+
+    def dpsi1_dmuS(self,Z,mu,S):
+        """return shapes are N,M,Q"""
+        target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[1]))
+        [p.dpsi1_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
+        return target_mu, target_S
+
+    def psi2(self,Z,mu,S):
+        """
+        :Z: np.ndarray of inducing inputs (M x Q)
+        : mu, S: np.ndarrays of means and variacnes (each N x Q)
+        :returns psi2: np.ndarray (N,M,M,Q) """
+        target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
+        [p.psi2(Z,mu,S,target=target) for p in self.parts]
+        return target
+
+    #@transform_gradients
+    def dpsi2_dtheta(self,Z,mu,S):
+        """Returns shape (N,M,M,Ntheta)"""
+        target = np.zeros((Z.shape[0],Z.shape[0],self.Nparam))
+        [p.dpsi2_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
+        return target
+
+    def dpsi2_dZ(self,Z,mu,S):
+        """N,M,M,Q"""
+        target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
+        [p.dpsi2_dZ(Z,mu,S,target) for p in self.parts]
+        return target
+
+    def dpsi2_dmuS(self,Z,mu,S):
+        """return shapes are N,M,M,Q"""
+        target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
+        [p.dpsi2_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
+
+        #TODO: there are some extra terms to compute here!
+        return target_mu, target_S
diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py
new file mode 100644
index 00000000..25eda4d5
--- /dev/null
+++ b/GPy/kern/kernpart.py
@@ -0,0 +1,53 @@
+class kernpart(object):
+    def __init__(self,D):
+        """
+        The base class for a kernpart: a positive definite function which forms part of a kernel
+
+        :param D: the number of input dimensions to the function
+        :type D: int
+        """
+        self.D = D
+        self.Nparam = 1.
+        self.name = 'white'
+        self.set_param(np.array([variance]).flatten())
+
+    def get_param(self):
+        raise NotImplementedError
+    def set_param(self,x):
+        raise NotImplementedError
+    def get_param_names(self):
+        raise NotImplementedError
+    def K(self,X,X2,target):
+        raise NotImplementedError
+    def Kdiag(self,X,target):
+        raise NotImplementedError
+    def dK_dtheta(self,X,X2,target):
+        raise NotImplementedError
+    def dKdiag_dtheta(self,X,target):
+        raise NotImplementedError
+    def psi0(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi0_dtheta(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        raise NotImplementedError
+    def psi1(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi1_dtheta(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi1_dZ(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+        raise NotImplementedError
+    def psi2(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi2_dZ(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi2_dtheta(self,Z,mu,S,target):
+        raise NotImplementedError
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        raise NotImplementedError
+    def dK_dX(self,X,X2,target):
+        raise NotImplementedError
+
+
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
new file mode 100644
index 00000000..17215c75
--- /dev/null
+++ b/GPy/kern/linear.py
@@ -0,0 +1,103 @@
+from kernpart import kernpart
+import numpy as np
+class linear(kernpart):
+    """
+    Linear kernel
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: variance
+    :type variance: None|float
+    """
+
+    def __init__(self, D, variance=None):
+        self.D = D
+        if variance is None:
+            variance = 1.0
+        self.Nparam = 1
+        self.name = 'linear'
+        self.set_param(variance)
+
+    def get_param(self):
+        return self.variance
+
+    def set_param(self,x):
+        self.variance = x
+
+    def get_param_names(self):
+        return ['variance']
+
+    def K(self,X,X2,target):
+        target += self.variance * np.dot(X, X2.T)
+
+    def Kdiag(self,X,target):
+        np.add(target,np.sum(self.variance*np.square(X),-1),target)
+
+    def dK_dtheta(self,X,X2,target):
+        """
+        Computes the derivatives wrt theta
+        Return shape is NxMx(Ntheta)
+
+        """
+
+        if X2 is None: X2 = X
+        product = np.dot(X, X2.T)[:,:, None]#X[:,None,:]*X2[None,:,:]
+        target += product
+
+    def dK_dX(self,X,X2,target):
+        if X2 is None: X2 = X
+        np.add(target,X2[:,None,:]*self.variance,target)
+
+    # def psi0(self,Z,mu,S,target):
+    #     expected = np.square(mu) + S
+    #     np.add(target,np.sum(self.variance*expected),target)
+
+    # def dpsi0_dtheta(self,Z,mu,S,target):
+    #     expected = np.square(mu) + S
+    #     return -2.*np.sum(expected,0)
+
+    # def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+    #     np.add(target_mu,2*mu*self.variances,target_mu)
+    #     np.add(target_S,self.variances,target_S)
+
+    # def dpsi0_dZ(self,Z,mu,S,target):
+    #     pass
+
+    # def psi1(self,Z,mu,S,target):
+    #     """the variance, it does nothing"""
+    #     self.K(mu,Z,target)
+
+    # def dpsi1_dtheta(self,Z,mu,S,target):
+    #     """the variance, it does nothing"""
+    #     self.dK_dtheta(mu,Z,target)
+
+    # def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+    #     """Do nothing for S, it does not affect psi1"""
+    #     np.add(target_mu,Z/self.variances2,target_mu)
+
+    # def dpsi1_dZ(self,Z,mu,S,target):
+    #     self.dK_dX(mu,Z,target)
+
+    # def psi2(self,Z,mu,S,target):
+    #     """Think N,M,M,Q """
+    #     mu2_S = np.square(mu)+SN,Q,
+    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
+    #     psi2 = ZZ*np.square(self.variances)*mu2_S
+    #     np.add(target, psi2.sum(-1),target) M,M
+
+    # def dpsi2_dtheta(self,Z,mu,S,target):
+    #     mu2_S = np.square(mu)+SN,Q,
+    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
+    #     target += 2.*ZZ*mu2_S*self.variances
+
+    # def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+    #     """Think N,M,M,Q """
+    #     mu2_S = np.sum(np.square(mu)+S,0)Q,
+    #     ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
+    #     tmp = ZZ*np.square(self.variances) M,M,Q
+    #     np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q
+    #     np.add(target_S, tmp, target_S) N,M,M,Q
+
+    # def dpsi2_dZ(self,Z,mu,S,target):
+    #     mu2_S = np.sum(np.square(mu)+S,0)Q,
+    #     target += Z[:,None,:]*np.square(self.variances)*mu2_S
diff --git a/GPy/kern/linear_ARD.py b/GPy/kern/linear_ARD.py
new file mode 100644
index 00000000..3c7482d3
--- /dev/null
+++ b/GPy/kern/linear_ARD.py
@@ -0,0 +1,114 @@
+from kernpart import kernpart
+import numpy as np
+
+class linear_ARD(kernpart):
+    """
+    Linear ARD kernel
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variances: ARD variances
+    :type variances: None|np.ndarray
+    """
+
+    def __init__(self,D,variances=None):
+        self.D = D
+        if variances is not None:
+            assert variances.shape==(self.D,)
+        else:
+            variances = np.ones(self.D)
+        self.Nparam = self.D
+        self.name = 'linear'
+        self.set_param(variances)
+
+    def get_param(self):
+        return self.variances
+
+    def set_param(self,x):
+        assert x.size==(self.Nparam)
+        self.variances = x
+
+    def get_param_names(self):
+        if self.D==1:
+            return ['variance']
+        else:
+            return ['variance_%i'%i for i in range(self.variances.size)]
+
+    def K(self,X,X2,target):
+        XX = X*np.sqrt(self.variances)
+        XX2 = X2*np.sqrt(self.variances)
+        target += np.dot(XX, XX2.T)
+
+    def Kdiag(self,X,target):
+        np.add(target,np.sum(self.variances*np.square(X),-1),target)
+
+    def dK_dtheta(self,X,X2,target):
+        """
+        Computes the derivatives wrt theta
+        Return shape is NxMx(Ntheta)
+
+        """
+        
+        if X2 is None: X2 = X
+        product = X[:,None,:]*X2[None,:,:]
+        target += product
+
+    def dK_dX(self,X,X2,target):
+        if X2 is None: X2 = X
+        #product = X[:,None,:]*X2[None,:,:]
+        #scaled_product = product/self.variances2
+        np.add(target,X2[:,None,:]*self.variances,target)
+
+    def psi0(self,Z,mu,S,target):
+        expected = np.square(mu) + S
+        np.add(target,np.sum(self.variances*expected),target)
+
+    def dpsi0_dtheta(self,Z,mu,S,target):
+        expected = np.square(mu) + S
+        return -2.*np.sum(expected,0)
+
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        np.add(target_mu,2*mu*self.variances,target_mu)
+        np.add(target_S,self.variances,target_S)
+
+    def dpsi0_dZ(self,Z,mu,S,target):
+        pass
+
+    def psi1(self,Z,mu,S,target):
+        """the variance, it does nothing"""
+        self.K(mu,Z,target)
+
+    def dpsi1_dtheta(self,Z,mu,S,target):
+        """the variance, it does nothing"""
+        self.dK_dtheta(mu,Z,target)
+
+    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Do nothing for S, it does not affect psi1"""
+        np.add(target_mu,Z/self.variances2,target_mu)
+
+    def dpsi1_dZ(self,Z,mu,S,target):
+        self.dK_dX(mu,Z,target)
+
+    def psi2(self,Z,mu,S,target):
+        """Think N,M,M,Q """
+        mu2_S = np.square(mu)+S# N,Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        psi2 = ZZ*np.square(self.variances)*mu2_S
+        np.add(target, psi2.sum(-1),target) # M,M
+
+    def dpsi2_dtheta(self,Z,mu,S,target):
+        mu2_S = np.square(mu)+S# N,Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        target += 2.*ZZ*mu2_S*self.variances
+
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Think N,M,M,Q """
+        mu2_S = np.sum(np.square(mu)+S,0)# Q,
+        ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+        tmp = ZZ*np.square(self.variances) # M,M,Q
+        np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
+        np.add(target_S, tmp, target_S) #N,M,M,Q
+
+    def dpsi2_dZ(self,Z,mu,S,target):
+        mu2_S = np.sum(np.square(mu)+S,0)# Q,
+        target += Z[:,None,:]*np.square(self.variances)*mu2_S
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
new file mode 100644
index 00000000..dd498d6f
--- /dev/null
+++ b/GPy/kern/rbf.py
@@ -0,0 +1,155 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+
+class rbf(kernpart): 
+    """
+    Radial Basis Function kernel, aka squared-exponential or Gaussian kernel.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+    :param lengthscale: the lengthscale of the kernel
+    :type lengthscale: float
+
+    .. Note: for rbf with different lengthscales on each dimension, see rbf_ARD
+    """
+
+    def __init__(self,D,variance=1.,lengthscale=1.):
+        self.D = D
+        self.Nparam = 2
+        self.name = 'rbf'
+        self.set_param(np.hstack((variance,lengthscale)))
+
+        #initialize cache
+        self._Z, self._mu, self._S = np.empty(shape=(3,1))
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
+
+    def get_param(self):
+        return np.hstack((self.variance,self.lengthscale))
+
+    def set_param(self,x):
+        self.variance, self.lengthscale = x
+        self.lengthscale2 = np.square(self.lengthscale)
+        #reset cached results
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
+        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
+
+    def get_param_names(self):
+        return ['variance','lengthscale']
+
+    def K(self,X,X2,target):
+        self._K_computations(X,X2)
+        np.add(self.variance*self._K_dvar, target,target)
+
+    def Kdiag(self,X,target):
+        np.add(target,self.variance,target)
+
+    def dK_dtheta(self,X,X2,target):
+        """Return shape is NxMx(Ntheta)"""
+        self._K_computations(X,X2)
+        target[:,:,0] += self._K_dvar
+        target[:,:,1] += self._K_dvar*self.variance*self._K_dist2/self.lengthscale
+
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+
+    def dK_dX(self,X,X2,target):
+        self._K_computations(X,X2)
+        _K_dist = X[:,None,:]-X2[None,:,:]
+        target += np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
+
+    def dKdiag_dX(self,X,target):
+        pass
+
+    def _K_computations(self,X,X2):
+        if not (np.all(X==self._X) and np.all(X2==self._X2)):
+            self._X = X
+            self._X2 = X2
+            if X2 is None: X2 = X
+            XXT = np.dot(X,X2.T)
+            if X is X2:
+                self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
+            else:
+                self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
+            self._K_exponent = -0.5*self._K_dist2
+            self._K_dvar = np.exp(-0.5*self._K_dist2)
+
+if __name__=='__main__':
+    #run some simple tests on the kernel (TODO:move these to unititest)
+    #TODO: these are broken in this new structure!
+    N = 10
+    M = 5
+    Q = 3
+
+    Z = np.random.randn(M,Q)
+    mu = np.random.randn(N,Q)
+    S = np.random.rand(N,Q)
+
+    var = 2.5
+    lengthscales = np.ones(Q)*0.7
+
+    k = rbf(Q,var,lengthscales)
+
+    from checkgrad import checkgrad
+
+    def k_theta_test(param,k):
+        k.set_param(param)
+        K = k.K(Z)
+        dK_dtheta = k.dK_dtheta(Z)
+        f = np.sum(K)
+        df = dK_dtheta.sum(0).sum(0)
+        return f,np.array(df)
+    print "dk_dtheta_test"
+    checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
+
+
+    def psi1_mu_test(mu,k):
+        mu = mu.reshape(N,Q)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
+        return f,df.flatten()
+    print "psi1_mu_test"
+    checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
+
+    def psi1_S_test(S,k):
+        S = S.reshape(N,Q)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
+        return f,df.flatten()
+    print "psi1_S_test"
+    checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
+
+    def psi1_theta_test(theta,k):
+        k.set_param(theta)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
+        return f,df
+    print "psi1_theta_test"
+    checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
+
+
+    def psi2_mu_test(mu,k):
+        mu = mu.reshape(N,Q)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
+        return f,df.flatten()
+    print "psi2_mu_test"
+    checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
+
+    def psi2_S_test(S,k):
+        S = S.reshape(N,Q)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
+        return f,df.flatten()
+    print "psi2_S_test"
+    checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
+
+    def psi2_theta_test(theta,k):
+        k.set_param(theta)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
+        return f,df
+    print "psi2_theta_test"
+    checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py
new file mode 100644
index 00000000..8e29b23a
--- /dev/null
+++ b/GPy/kern/rbf_ARD.py
@@ -0,0 +1,234 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+
+class rbf_ARD(kernpart):
+    def __init__(self,D,variance=1.,lengthscales=None):
+        """
+        Arguments
+        ----------
+        D: int - the number of input dimensions
+        variance: float
+        lengthscales : np.ndarray of shape (D,)
+        """
+        self.D = D
+        if lengthscales is not None:
+            assert lengthscales.shape==(self.D,)
+        else:
+            lengthscales = np.ones(self.D)
+        self.Nparam = self.D + 1
+        self.name = 'rbf_ARD'
+        self.set_param(np.hstack((variance,lengthscales)))
+
+        #initialize cache
+        self._Z, self._mu, self._S = np.empty(shape=(3,1))
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
+
+    def get_param(self):
+        return np.hstack((self.variance,self.lengthscales))
+    def set_param(self,x):
+        assert x.size==(self.D+1)
+        self.variance = x[0]
+        self.lengthscales = x[1:]
+        self.lengthscales2 = np.square(self.lengthscales)
+        #reset cached results
+        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
+    def get_param_names(self):
+        if self.D==1:
+            return ['variance','lengthscale']
+        else:
+            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
+    def K(self,X,X2,target):
+        self._K_computations(X,X2)
+        np.add(self.variance*self._K_dvar, target,target)
+    def Kdiag(self,X,target):
+        np.add(target,self.variance,target)
+    def dK_dtheta(self,X,X2,target):
+        """Return shape is NxMx(Ntheta)"""
+        self._K_computations(X,X2)
+        dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
+        np.add(target[:,:,0],self._K_dvar, target[:,:,0])
+        np.add(target[:,:,1:],dl, target[:,:,1:])
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+    def dK_dX(self,X,X2,target):
+        self._K_computations(X,X2)
+        dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
+        np.add(target,-dZ.transpose(1,0,2),target)
+    def psi0(self,Z,mu,S,target):
+        np.add(target, self.variance, target)
+    def dpsi0_dtheta(self,Z,mu,S,target):
+        target[:,0] += 1.
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        pass
+    def psi1(self,Z,mu,S,target):
+        """Think N,M,Q """
+        self._psi_computations(Z,mu,S)
+        np.add(target, self._psi1,target)
+
+    def dpsi1_dtheta(self,Z,mu,S,target):
+        """N,Q,(Ntheta)"""
+        self._psi_computations(Z,mu,S)
+        denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
+        d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
+        target[:,:,0] += self._psi1/self.variance
+        target[:,:,1:] += d_length
+    def dpsi1_dZ(self,Z,mu,S,target):
+        self._psi_computations(Z,mu,S)
+        np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
+
+    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+        """return shapes are N,M,Q"""
+        self._psi_computations(Z,mu,S)
+        tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
+        np.add(target_mu,tmp*self._psi1_dist,target_mu)
+        np.add(target_S, 0.5*tmp*(self._psi1_dist_sq-1), target_S)
+
+    def psi2(self,Z,mu,S,target):
+        """shape N,M,M"""
+        self._psi_computations(Z,mu,S)
+        np.add(target, self._psi2,target)
+
+    def dpsi2_dtheta(self,Z,mu,S,target):
+        """Shape N,M,M,Ntheta"""
+        self._psi_computations(Z,mu,S)
+        d_var = np.sum(2.*self._psi2/self.variance,0)
+        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
+        d_length = d_length.sum(0)
+        target[:,:,0] += d_var
+        target[:,:,1:] += d_length
+
+    def dpsi2_dZ(self,Z,mu,S,target):
+        """Returns shape N,M,M,Q"""
+        self._psi_computations(Z,mu,S)
+        dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
+        target += dZ
+
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        """Think N,M,M,Q """
+        self._psi_computations(Z,mu,S)
+        tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
+        np.add(target_mu, -tmp*(2.*self._psi2_mudist),target_mu) #N,M,M,Q
+        np.add(target_S, tmp*(2.*self._psi2_mudist_sq-1), target_S) #N,M,M,Q
+
+    def _K_computations(self,X,X2):
+        if not (np.all(X==self._X) and np.all(X2==self._X2)):
+            self._X = X
+            self._X2 = X2
+            if X2 is None: X2 = X
+            self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
+            self._params = np.empty(shape=(1,0))#ensure the next section gets called
+        if not np.all(self._params == self.get_param()):
+            self._params == self.get_param()
+            self._K_dist2 = np.square(self._K_dist/self.lengthscales)
+            self._K_exponent = -0.5*self._K_dist2.sum(-1)
+            self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
+
+    def _psi_computations(self,Z,mu,S):
+        #here are the "statistics" for psi1 and psi2
+        if not np.all(Z==self._Z):
+            #Z has changed, compute Z specific stuff
+            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
+            self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
+            self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
+            self._Z = Z
+
+        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
+            #something's changed. recompute EVERYTHING
+
+            #psi1
+            self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
+            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
+            self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
+            self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
+            self._psi1 = self.variance*np.exp(self._psi1_exponent)
+
+            #psi2
+            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
+            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
+            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
+            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
+            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
+
+            self._Z, self._mu, self._S = Z, mu,S
+
+
+if __name__=='__main__':
+    #run some simple tests on the kernel (TODO:move these to unititest)
+    #TODO: these are broken in this new structure!
+    N = 10
+    M = 5
+    Q = 3
+
+    Z = np.random.randn(M,Q)
+    mu = np.random.randn(N,Q)
+    S = np.random.rand(N,Q)
+
+    var = 2.5
+    lengthscales = np.ones(Q)*0.7
+
+    k = rbf(Q,var,lengthscales)
+
+    from checkgrad import checkgrad
+
+    def k_theta_test(param,k):
+        k.set_param(param)
+        K = k.K(Z)
+        dK_dtheta = k.dK_dtheta(Z)
+        f = np.sum(K)
+        df = dK_dtheta.sum(0).sum(0)
+        return f,np.array(df)
+    print "dk_dtheta_test"
+    checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
+
+
+    def psi1_mu_test(mu,k):
+        mu = mu.reshape(N,Q)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
+        return f,df.flatten()
+    print "psi1_mu_test"
+    checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
+
+    def psi1_S_test(S,k):
+        S = S.reshape(N,Q)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
+        return f,df.flatten()
+    print "psi1_S_test"
+    checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
+
+    def psi1_theta_test(theta,k):
+        k.set_param(theta)
+        f = np.sum(k.psi1(Z,mu,S))
+        df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
+        return f,df
+    print "psi1_theta_test"
+    checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
+
+
+    def psi2_mu_test(mu,k):
+        mu = mu.reshape(N,Q)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
+        return f,df.flatten()
+    print "psi2_mu_test"
+    checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
+
+    def psi2_S_test(S,k):
+        S = S.reshape(N,Q)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
+        return f,df.flatten()
+    print "psi2_S_test"
+    checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
+
+    def psi2_theta_test(theta,k):
+        k.set_param(theta)
+        f = np.sum(k.psi2(Z,mu,S))
+        df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
+        return f,df
+    print "psi2_theta_test"
+    checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
+
+
diff --git a/GPy/kern/spline.py b/GPy/kern/spline.py
new file mode 100644
index 00000000..dc68f340
--- /dev/null
+++ b/GPy/kern/spline.py
@@ -0,0 +1,54 @@
+from kernpart import kernpart
+import numpy as np
+import hashlib
+def theta(x):
+    """Heavisdie step function"""
+    return np.where(x>=0.,1.,0.)
+
+class spline(kernpart):
+    """
+    Spline kernel
+
+    :param D: the number of input dimensions (fixed to 1 right now TODO)
+    :type D: int
+    :param variance: the variance of the kernel
+    :type variance: float
+
+    """
+
+    def __init__(self,D,variance=1.,lengthscale=1.):
+        self.D = D
+        assert self.D==1
+        self.Nparam = 1
+        self.name = 'spline'
+        self.set_param(np.squeeze(variance))
+
+    def get_param(self):
+        return self.variance
+
+    def set_param(self,x):
+        self.variance = x
+
+    def get_param_names(self):
+        return ['variance']
+
+    def K(self,X,X2,target):
+        assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
+        assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
+        t = X
+        s = X2.T
+        s_t = s-t # broadcasted subtraction
+        target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
+
+    def Kdiag(self,X,target):
+        target += self.variance*X.flatten()**3/3.
+
+    def dK_dtheta(self,X,X2,target):
+        target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
+
+    def dKdiag_dtheta(self,X,target):
+        target += X.flatten()**3/3.
+
+    def dKdiag_dX(self,X,target):
+        target += self.variance*X**2
+
diff --git a/GPy/kern/white.py b/GPy/kern/white.py
new file mode 100644
index 00000000..147c2fd1
--- /dev/null
+++ b/GPy/kern/white.py
@@ -0,0 +1,82 @@
+from kernpart import kernpart
+import numpy as np
+class white(kernpart):
+    """
+    White noise kernel.
+
+    :param D: the number of input dimensions
+    :type D: int
+    :param variance:
+    :type variance: float
+    """
+    def __init__(self,D,variance=1.):
+        self.D = D
+        self.Nparam = 1.
+        self.name = 'white'
+        self.set_param(np.array([variance]).flatten())
+
+    def get_param(self):
+        return self.variance
+
+    def set_param(self,x):
+        assert x.shape==(1,)
+        self.variance = x
+
+    def get_param_names(self):
+        return ['variance']
+
+    def K(self,X,X2,target):
+        if X.shape==X2.shape:
+            if np.all(X==X2):
+                np.add(target,np.eye(X.shape[0])*self.variance,target)
+
+    def Kdiag(self,X,target):
+        target += self.variance
+
+    def dK_dtheta(self,X,X2,target):
+        if X.shape==X2.shape:
+            if np.all(X==X2):
+                np.add(target[:,:,0],np.eye(X.shape[0]),target[:,:,0])
+
+    def dKdiag_dtheta(self,X,target):
+        np.add(target[:,0],1.,target[:,0])
+
+    def dK_dX(self,X,X2,target):
+        pass
+
+    def dKdiag_dX(self,X,target):
+        pass
+        
+    def psi0(self,Z,mu,S,target):
+        target += self.variance
+
+    def dpsi0_dtheta(self,Z,mu,S,target):
+        target += 1.
+
+    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
+        pass
+
+    def psi1(self,Z,mu,S,target):
+        pass
+
+    def dpsi1_dtheta(self,Z,mu,S,target):
+        pass
+
+    def dpsi1_dZ(self,Z,mu,S,target):
+        pass
+
+    def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
+        pass
+
+    def psi2(self,Z,mu,S,target):
+        pass
+
+    def dpsi2_dZ(self,Z,mu,S,target):
+        pass
+
+    def dpsi2_dtheta(self,Z,mu,S,target):
+        pass
+
+    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
+        pass
+