diff --git a/GPy/__init__.py b/GPy/__init__.py
index 381d6232..c0772c27 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -7,5 +7,5 @@ import models
 import inference
 import util
 import examples
-#import examples TODO: discuss!
 from core import priors
+import likelihoods
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 145a607f..a0628c42 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
+from .. import likelihoods
 
 class model(parameterised):
     def __init__(self):
@@ -82,7 +83,7 @@ class model(parameterised):
 
     def get(self,name, return_names=False):
         """
-        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
+        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
         matches = self.grep_param_names(name)
         if len(matches):
@@ -107,7 +108,7 @@ class model(parameterised):
 
     def get_gradient(self,name, return_names=False):
         """
-        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
+        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
         """
         matches = self.grep_param_names(name)
         if len(matches):
@@ -303,54 +304,62 @@ class model(parameterised):
         return '\n'.join(s)
 
 
-    def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args):
+    def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3):
         """
         Check the gradient of the model by comparing to a numerical estimate.
-        If the overall gradient fails, invividual components are tested.
+        If the verbose flag is passed, invividual components are tested (and printed)
+
+        :param verbose: If True, print a "full" checking of each parameter
+        :type verbose: bool
+        :param step: The size of the step around which to linearise the objective
+        :type step: float (defaul 1e-6)
+        :param tolerance: the tolerance allowed (see note)
+        :type tolerance: float (default 1e-3)
+
+        Note:-
+           The gradient is considered correct if the ratio of the analytical
+           and numerical gradients is within <tolerance> of unity.
         """
 
         x = self._get_params_transformed().copy()
 
-        #choose a random direction to step in:
-        dx = step*np.sign(np.random.uniform(-1,1,x.size))
+        if not verbose:
+            #just check the global ratio
+            dx = step*np.sign(np.random.uniform(-1,1,x.size))
 
-        #evaulate around the point x
-        self._set_params_transformed(x+dx)
-        f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-        self._set_params_transformed(x-dx)
-        f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-        self._set_params_transformed(x)
-        gradient = self._log_likelihood_gradients_transformed()
+            #evaulate around the point x
+            self._set_params_transformed(x+dx)
+            f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+            self._set_params_transformed(x-dx)
+            f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+            self._set_params_transformed(x)
+            gradient = self._log_likelihood_gradients_transformed()
 
-        numerical_gradient = (f1-f2)/(2*dx)
-        global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
-        if verbose:
-            print "Gradient ratio = ", global_ratio, '\n'
-            sys.stdout.flush()
+            numerical_gradient = (f1-f2)/(2*dx)
+            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
 
-        if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
-            if verbose:
-                print 'Gradcheck passed'
+            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
+                return True
+            else:
+                return False
         else:
-            if verbose:
-                print "Global check failed. Testing individual gradients\n"
+            #check the gradient of each parameter individually, and do some pretty printing
+            try:
+                names = self._get_param_names_transformed()
+            except NotImplementedError:
+                names = ['Variable %i'%i for i in range(len(x))]
 
-                try:
-                    names = self._get_param_names_transformed()
-                except NotImplementedError:
-                    names = ['Variable %i'%i for i in range(len(x))]
-
-                # Prepare for pretty-printing
-                header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
-                max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
-                float_len = 10
-                cols = [max_names]
-                cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
-                cols = np.array(cols) + 5
-                header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
-                header_string = map(lambda x: '|'.join(x), [header_string])
-                separator = '-'*len(header_string[0])
-                print '\n'.join([header_string[0], separator])
+            # Prepare for pretty-printing
+            header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
+            max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
+            float_len = 10
+            cols = [max_names]
+            cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
+            cols = np.array(cols) + 5
+            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+            header_string = map(lambda x: '|'.join(x), [header_string])
+            separator = '-'*len(header_string[0])
+            print '\n'.join([header_string[0], separator])
 
             for i in range(len(x)):
                 xx = x.copy()
@@ -368,27 +377,52 @@ class model(parameterised):
                 ratio = (f1-f2)/(2*step*gradient)
                 difference = np.abs((f1-f2)/2/step - gradient)
 
-                if verbose:
-                    if (np.abs(ratio-1)<tolerance):
-                        formatted_name = "\033[92m {0} \033[0m".format(names[i])
-                    else:
-                        formatted_name = "\033[91m {0} \033[0m".format(names[i])
-                    r = '%.6f' % float(ratio)
-                    d = '%.6f' % float(difference)
-                    g = '%.6f' % gradient
-                    ng = '%.6f' % float(numerical_gradient)
-                    grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
-                    print grad_string
+                if (np.abs(ratio-1)<tolerance):
+                    formatted_name = "\033[92m {0} \033[0m".format(names[i])
+                else:
+                    formatted_name = "\033[91m {0} \033[0m".format(names[i])
+                r = '%.6f' % float(ratio)
+                d = '%.6f' % float(difference)
+                g = '%.6f' % gradient
+                ng = '%.6f' % float(numerical_gradient)
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                print grad_string
 
-            if verbose:
-                print ''
+    def EPEM(self,epsilon=.1,**kwargs):
+        """
+        TODO: Should this not bein the GP class?
+        Expectation maximization for Expectation Propagation.
 
-            if return_ratio:
-                return global_ratio
+        kwargs are passed to the optimize function. They can be:
+
+        :epsilon: convergence criterion
+        :max_f_eval: maximum number of function evaluations
+        :messages: whether to display during optimisation
+        :param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
+        :type optimzer: string TODO: valid strings?
+
+        """
+        assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
+        log_change = epsilon + 1.
+        self.log_likelihood_record = []
+        self.gp_params_record = []
+        self.ep_params_record = []
+        iteration = 0
+        last_value = -np.exp(1000)
+        while log_change > epsilon or not iteration:
+            print 'EM iteration %s' %iteration
+            self.update_likelihood_approximation()
+            self.optimize(**kwargs)
+            new_value = self.log_likelihood()
+            log_change = new_value - last_value
+            if log_change > epsilon:
+                self.log_likelihood_record.append(new_value)
+                self.gp_params_record.append(self._get_params())
+                #self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
+                last_value = new_value
             else:
-                return False
-
-        if return_ratio:
-            return global_ratio
-        else:
-            return True
+                convergence = False
+                #self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
+                self._set_params(self.gp_params_record[-1])
+                print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
+            iteration += 1
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index 6e5493ad..ab656f52 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -102,6 +102,11 @@ class parameterised(object):
         else:
             return expr
 
+    def Nparam_transformed(self):
+            ties = 0
+            for ar in self.tied_indices:
+                ties += ar.size - 1
+            return self.Nparam - len(self.constrained_fixed_indices) - ties
 
     def constrain_positive(self, which):
         """
@@ -149,8 +154,6 @@ class parameterised(object):
 
 
 
-
-
     def constrain_negative(self,which):
         """
         Set negative constraints.
diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
index 18a96a47..02092dbf 100644
--- a/GPy/examples/BGPLVM_demo.py
+++ b/GPy/examples/BGPLVM_demo.py
@@ -17,7 +17,7 @@ K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T
 
 # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-k = GPy.kern.linear(Q, ARD = True)  + GPy.kern.white(Q)
+k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
 m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
 m.constrain_positive('(rbf|bias|noise|white|S)')
 # m.constrain_fixed('S', 1)
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 989ed08a..592299d8 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -3,16 +3,15 @@
 
 
 """
-Simple Gaussian Processes classification
+Gaussian Processes classification
 """
 import pylab as pb
 import numpy as np
 import GPy
 
 default_seed=10000
-######################################
-## 2 dimensional example
-def crescent_data(model_type='Full', inducing=10, seed=default_seed):
+
+def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
     """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
     :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
     :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
     :type inducing: int
     """
+
     data = GPy.util.datasets.crescent_data(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'])
+
+    # Kernel object
+    kernel = GPy.kern.rbf(data['X'].shape[1])
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
+
 
     if model_type=='Full':
-        m = GPy.models.GP_EP(data['X'],likelihood)
+        m = GPy.models.GP(data['X'],likelihood,kernel)
     else:
         # create sparse GP EP model
         m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
 
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
     print(m)
 
     # optimize
-    m.em()
+    m.optimize()
     print(m)
 
     # plot
@@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
     return m
 
 def oil():
-    """Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
+    """
+    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+    """
     data = GPy.util.datasets.oil()
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+    # Kernel object
+    kernel = GPy.kern.rbf(12)
 
-    # create simple GP model
-    m = GPy.models.GP_EP(data['X'],likelihood)
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
 
-    # contrain all parameters to be positive
+    # Create GP model
+    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
+
+    # Contrain all parameters to be positive
     m.constrain_positive('')
     m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
 
-    # optimize
+    # Optimize
     m.optimize()
 
-    # plot
-    #m.plot()
     print(m)
     return m
 
-def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
-    """Simple 1D classification example.
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+def toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
     :param seed : seed value for data generation (default is 4).
     :type seed: int
-    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-    :type inducing: int
     """
+
     data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
-    assert model_type in ('Full','DTC','FITC')
+    Y = data['Y'][:, 0:1]
+    Y[Y == -1] = 0
 
-    # create simple GP model
-    if model_type=='Full':
-        m = GPy.models.simple_GP_EP(data['X'],likelihood)
-    else:
-        # create sparse GP EP model
-        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
-            
+    # Kernel object
+    kernel = GPy.kern.rbf(1)
 
-    m.constrain_positive('var')
-    m.constrain_positive('len')
-    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
 
-    # Optimize and plot
-    m.em(plot_all=False) # EM algorithm
+    # Model definition
+    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
+
+    # Optimize
+    """
+    EPEM runs a loop that consists of two steps:
+    1) EP likelihood approximation:
+        m.update_likelihood_approximation()
+    2) Parameters optimization:
+        m.optimize()
+    """
+    m.EPEM()
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f()
+    pb.subplot(212)
     m.plot()
-
     print(m)
+
     return m
diff --git a/GPy/examples/poisson.py b/GPy/examples/poisson.py
new file mode 100644
index 00000000..ce68e921
--- /dev/null
+++ b/GPy/examples/poisson.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+"""
+Gaussian Processes + Expectation Propagation - Poisson Likelihood
+"""
+import pylab as pb
+import numpy as np
+import GPy
+
+default_seed=10000
+
+def  toy_1d(seed=default_seed):
+    """
+    Simple 1D classification example
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    X = np.arange(0,100,5)[:,None]
+    F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None]
+    E = np.random.randint(-5,5,20)[:,None]
+    Y = F + E
+
+    kernel = GPy.kern.rbf(1)
+    distribution = GPy.likelihoods.likelihood_functions.Poisson()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
+
+    m = GPy.models.GP(X,likelihood,kernel)
+    m.ensure_default_constraints()
+
+    # Approximate likelihood
+    m.update_likelihood_approximation()
+
+    # Optimize and plot
+    m.optimize()
+    #m.EPEM FIXME
+    print m
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f() #GP plot
+    pb.subplot(212)
+    m.plot() #Output plot
+
+    return m
diff --git a/GPy/examples/sparse_ep_fix.py b/GPy/examples/sparse_ep_fix.py
new file mode 100644
index 00000000..defcb4eb
--- /dev/null
+++ b/GPy/examples/sparse_ep_fix.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+"""
+Sparse Gaussian Processes regression with an RBF kernel
+"""
+import pylab as pb
+import numpy as np
+import GPy
+np.random.seed(2)
+pb.ion()
+N = 500
+M = 5
+
+pb.close('all')
+######################################
+## 1 dimensional example
+
+# sample inputs and outputs
+X = np.random.uniform(-3.,3.,(N,1))
+#Y = np.sin(X)+np.random.randn(N,1)*0.05
+F = np.sin(X)+np.random.randn(N,1)*0.05
+Y = np.ones([F.shape[0],1])
+Y[F<0] = -1
+likelihood = GPy.inference.likelihoods.probit(Y)
+
+# construct kernel
+rbf =  GPy.kern.rbf(1)
+noise = GPy.kern.white(1)
+kernel = rbf + noise
+
+# create simple GP model
+#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
+
+# contrain all parameters to be positive
+#m.constrain_fixed('prec',100.)
+m = GPy.models.sparse_GP(X, Y, kernel, M=M)
+m.ensure_default_constraints()
+#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
+#    m.approximate_likelihood()
+print m.checkgrad()
+m.optimize('tnc', messages = 1)
+m.plot(samples=3)
+print m
+
+n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
+n.ensure_default_constraints()
+if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
+    n.approximate_likelihood()
+print n.checkgrad()
+pb.figure()
+n.plot()
+
+"""
+m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
+m.ensure_default_constraints()
+print m.checkgrad()
+"""
diff --git a/GPy/inference/Expectation_Propagation.py b/GPy/inference/Expectation_Propagation.py
deleted file mode 100644
index 05453f1d..00000000
--- a/GPy/inference/Expectation_Propagation.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import random
-from scipy import stats, linalg
-from .likelihoods import likelihood
-from ..core import model
-from ..util.linalg import pdinv,mdot,jitchol
-from ..util.plot import gpplot
-from .. import kern
-
-class EP_base:
-    """
-    Expectation Propagation.
-
-    This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
-    """
-    def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
-        self.likelihood = likelihood
-        self.epsilon = epsilon
-        self.eta, self.delta = powerep
-        self.jitter = 1e-12
-
-        #Initial values - Likelihood approximation parameters:
-        #p(y|f) = t(f|tau_tilde,v_tilde)
-        self.restart_EP()
-
-    def restart_EP(self):
-        """
-        Set the EP approximation to initial state
-        """
-        self.tau_tilde = np.zeros(self.N)
-        self.v_tilde = np.zeros(self.N)
-        self.mu = np.zeros(self.N)
-
-class Full(EP_base):
-    """
-    :param likelihood: Output's likelihood (e.g. probit)
-    :type likelihood: GPy.inference.likelihood instance
-    :param K: prior covariance matrix
-    :type K: np.ndarray (N x N)
-    :param likelihood: Output's likelihood (e.g. probit)
-    :type likelihood: GPy.inference.likelihood instance
-    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
-    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
-    """
-    def __init__(self,K,likelihood,*args,**kwargs):
-        assert K.shape[0] == K.shape[1]
-        self.K = K
-        self.N = self.K.shape[0]
-        EP_base.__init__(self,likelihood,*args,**kwargs)
-    def fit_EP(self,messages=False):
-        """
-        The expectation-propagation algorithm.
-        For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
-        """
-        #Prior distribution parameters: p(f|X) = N(f|0,K)
-        #self.K = self.kernel.K(self.X,self.X)
-
-        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
-        self.mu=np.zeros(self.N)
-        self.Sigma=self.K.copy()
-
-        """
-        Initial values - Cavity distribution parameters:
-        q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
-        sigma_ = 1./tau_
-        mu_ = v_/tau_
-        """
-
-        self.tau_ = np.empty(self.N,dtype=np.float64)
-        self.v_ = np.empty(self.N,dtype=np.float64)
-
-        #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=np.float64)
-        self.Z_hat = np.empty(self.N,dtype=np.float64)
-        phi = np.empty(self.N,dtype=np.float64)
-        mu_hat = np.empty(self.N,dtype=np.float64)
-        sigma2_hat = np.empty(self.N,dtype=np.float64)
-
-        #Approximation
-        epsilon_np1 = self.epsilon + 1.
-        epsilon_np2 = self.epsilon + 1.
-       	self.iterations = 0
-        self.np1 = [self.tau_tilde.copy()]
-        self.np2 = [self.v_tilde.copy()]
-        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.random.permutation(self.N)
-            for i in update_order:
-                #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
-                #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
-                #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
-                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
-                self.v_tilde[i] = self.v_tilde[i] + Delta_v
-                #Posterior distribution parameters update
-                si=self.Sigma[:,i].reshape(self.N,1)
-                self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
-                self.mu = np.dot(self.Sigma,self.v_tilde)
-                self.iterations += 1
-            #Sigma recomptutation with Cholesky decompositon
-            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
-            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
-            L = jitchol(B)
-            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
-            self.Sigma = self.K - np.dot(V.T,V)
-            self.mu = np.dot(self.Sigma,self.v_tilde)
-            epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
-            epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
-            self.np1.append(self.tau_tilde.copy())
-            self.np2.append(self.v_tilde.copy())
-            if messages:
-                print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
-
-class FITC(EP_base):
-    """
-    :param likelihood: Output's likelihood (e.g. probit)
-    :type likelihood: GPy.inference.likelihood instance
-    :param Knn_diag: The diagonal elements of Knn is a 1D vector
-    :param Kmn: The 'cross' variance between inducing inputs and data
-    :param Kmm: the covariance matrix of the inducing inputs
-    :param likelihood: Output's likelihood (e.g. probit)
-    :type likelihood: GPy.inference.likelihood instance
-    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
-    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
-    """
-    def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
-        self.Knn_diag = Knn_diag
-        self.Kmn = Kmn
-        self.Kmm = Kmm
-        self.M = self.Kmn.shape[0]
-        self.N = self.Kmn.shape[1]
-        assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
-        assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
-        EP_base.__init__(self,likelihood,*args,**kwargs)
-
-    def fit_EP(self):
-        """
-        The expectation-propagation algorithm with sparse pseudo-input.
-        For nomenclature see Naish-Guzman and Holden, 2008.
-        """
-
-        """
-        Prior approximation parameters:
-        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
-        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
-        """
-        self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
-        self.P0 = self.Kmn.T
-        self.KmnKnm = np.dot(self.P0.T, self.P0)
-        self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
-        self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
-        self.Diag0 = self.Knn_diag - self.Qnn_diag
-        self.R0 = jitchol(self.Kmmi).T
-
-        """
-        Posterior approximation: q(f|y) = N(f| mu, Sigma)
-        Sigma = Diag + P*R.T*R*P.T + K
-        mu = w + P*gamma
-        """
-        self.w = np.zeros(self.N)
-        self.gamma = np.zeros(self.M)
-        self.mu = np.zeros(self.N)
-        self.P = self.P0.copy()
-        self.R = self.R0.copy()
-        self.Diag = self.Diag0.copy()
-        self.Sigma_diag = self.Knn_diag
-
-        """
-        Initial values - Cavity distribution parameters:
-        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
-        sigma_ = 1./tau_
-        mu_ = v_/tau_
-        """
-        self.tau_ = np.empty(self.N,dtype=np.float64)
-        self.v_ = np.empty(self.N,dtype=np.float64)
-
-        #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=np.float64)
-        self.Z_hat = np.empty(self.N,dtype=np.float64)
-        phi = np.empty(self.N,dtype=np.float64)
-        mu_hat = np.empty(self.N,dtype=np.float64)
-        sigma2_hat = np.empty(self.N,dtype=np.float64)
-
-        #Approximation
-        epsilon_np1 = 1
-        epsilon_np2 = 1
-       	self.iterations = 0
-        self.np1 = [self.tau_tilde.copy()]
-        self.np2 = [self.v_tilde.copy()]
-        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.arange(self.N)
-            random.shuffle(update_order)
-            for i in update_order:
-                #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
-                #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
-                #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
-                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
-                self.v_tilde[i] = self.v_tilde[i] + Delta_v
-                #Posterior distribution parameters update
-                dtd1 = Delta_tau*self.Diag[i] + 1.
-                dii = self.Diag[i]
-                self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
-                pi_ = self.P[i,:].reshape(1,self.M)
-                self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
-                Rp_i = np.dot(self.R,pi_.T)
-                RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
-                self.R = jitchol(RTR).T
-                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
-                self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
-                self.RPT = np.dot(self.R,self.P.T)
-                self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-                self.mu = self.w + np.dot(self.P,self.gamma)
-                self.iterations += 1
-            #Sigma recomptutation with Cholesky decompositon
-            self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
-            self.P = (self.Diag / self.Diag0)[:,None] * self.P0
-            self.RPT0 = np.dot(self.R0,self.P0.T)
-            L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
-            self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
-            self.RPT = np.dot(self.R,self.P.T)
-            self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-            self.w = self.Diag * self.v_tilde
-            self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
-            self.mu = self.w + np.dot(self.P,self.gamma)
-            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
-            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
-            self.np1.append(self.tau_tilde.copy())
-            self.np2.append(self.v_tilde.copy())
diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
deleted file mode 100644
index c9b36e10..00000000
--- a/GPy/inference/likelihoods.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-from scipy import stats
-import scipy as sp
-import pylab as pb
-from ..util.plot import gpplot
-
-class likelihood:
-    def __init__(self,Y,location=0,scale=1):
-        """
-        Likelihood class for doing Expectation propagation
-
-        :param Y: observed output (Nx1 numpy.darray)
-        ..Note:: Y values allowed depend on the likelihood used
-        """
-        self.Y = Y
-        self.N = self.Y.shape[0]
-        self.location = location
-        self.scale = scale
-
-    def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
-        """
-        Plot the predictive distribution of the GP model for 1-dimensional inputs
-
-        :param X_new: The points at which to make a prediction
-        :param Mean_new: mean values at X_new
-        :param Var_new: variance values at X_new
-        :param X_u: input (inducing)  points used to train the model
-        :param Mean_u: mean values at X_u
-        :param Var_new: variance values at X_u
-        """
-        assert X_new.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X_new,Mean_new,Var_new)
-        pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+')
-        pb.plot(X_u,Mean_u,'ro')
-
-    def plot2D(self,X,X_new,F_new,U=None):
-        """
-        Predictive distribution of the fitted GP model for 2-dimensional inputs
-
-        :param X_new: The points at which to make a prediction
-        :param Mean_new: mean values at X_new
-        :param Var_new: variance values at X_new
-        :param X_u: input points used to train the model
-        :param Mean_u: mean values at X_u
-        :param Var_new: variance values at X_u
-        """
-        N,D = X_new.shape
-        assert D == 2, 'Number of dimensions must be 2'
-        n = np.sqrt(N)
-        x1min = X_new[:,0].min()
-        x1max = X_new[:,0].max()
-        x2min = X_new[:,1].min()
-        x2max = X_new[:,1].max()
-        pb.imshow(F_new.reshape(n,n),extent=(x1min,x1max,x2max,x2min),vmin=0,vmax=1)
-        pb.colorbar()
-        C1 = np.arange(self.N)[self.Y.flatten()==1]
-        C2 = np.arange(self.N)[self.Y.flatten()==-1]
-        [pb.plot(X[i,0],X[i,1],'ro') for i in C1]
-        [pb.plot(X[i,0],X[i,1],'bo') for i in C2]
-        pb.xlim(x1min,x1max)
-        pb.ylim(x2min,x2max)
-        if U is not None:
-            [pb.plot(a,b,'wo') for a,b in U]
-
-class probit(likelihood):
-    """
-    Probit likelihood
-    Y is expected to take values in {-1,1}
-    -----
-    $$
-    L(x) = \\Phi (Y_i*f_i)
-    $$
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        z = self.Y[i]*v_i/np.sqrt(tau_i**2 + tau_i)
-        Z_hat = stats.norm.cdf(z)
-        phi = stats.norm.pdf(z)
-        mu_hat = v_i/tau_i + self.Y[i]*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
-        sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
-        return Z_hat, mu_hat, sigma2_hat
-
-    def plot1Db(self,X,X_new,F_new,U=None):
-        assert X.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
-        pb.plot(X,(self.Y+1)/2,'kx',mew=1.5)
-        pb.ylim(-0.2,1.2)
-        if U is not None:
-            pb.plot(U,U*0+.5,'r|',mew=1.5,markersize=12)
-
-    def predictive_mean(self,mu,variance):
-        return stats.norm.cdf(mu/np.sqrt(1+variance))
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
-
-class poisson(likelihood):
-    """
-    Poisson likelihood
-    Y is expected to take values in {0,1,2,...}
-    -----
-    $$
-    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
-    $$
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        def poisson_norm(f):
-            """
-            Product of the likelihood and the cavity distribution
-            """
-            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
-            rate = np.exp( (f*self.scale)+self.location)
-            poisson = stats.poisson.pmf(float(self.Y[i]),rate)
-            return pdf_norm_f*poisson
-
-        def log_pnm(f):
-            """
-            Log of poisson_norm
-            """
-            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
-
-        """
-        Golden Search and Simpson's Rule
-        --------------------------------
-        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
-        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
-        """
-        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
-
-        #Golden search
-        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
-        golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
-        golden_A = (golden_A - self.location)/self.scale
-        golden_B = (golden_B - self.location)/self.scale
-        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
-
-        # Simpson's approximation
-        width = 3./np.log(max(self.Y[i],2))
-        A = opt - width #Lower limit
-        B = opt + width #Upper limit
-        K =  10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
-        h = (B-A)/K # length of the intervals
-        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
-        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
-        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
-        first = zeroth*x
-        second = first*x
-        Z_hat = sum(zeroth)*h/3 # Zero-th moment
-        mu_hat = sum(first)*h/(3*Z_hat) # First moment
-        m2 = sum(second)*h/(3*Z_hat) # Second moment
-        sigma2_hat = m2 - mu_hat**2 # Second central moment
-        return float(Z_hat), float(mu_hat), float(sigma2_hat)
-
-    def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
-        pb.subplot(212)
-        #gpplot(X_new,F_new,np.sqrt(F2_new))
-        pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
-        pb.plot(X,self.Y,'kx',mew=1.5)
-        if U is not None:
-            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
-    def predictive_mean(self,mu,variance):
-        return np.exp(mu*self.scale + self.location)
-    def predictive_variance(self,mu,variance):
-        return mu
-    def _log_likelihood_gradients():
-        raise NotImplementedError
-
-class gaussian(likelihood):
-    """
-    Gaussian likelihood
-    Y is expected to take values in (-inf,inf)
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
-        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
-        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
-        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
-        return Z_hat, mu_hat, sigma2_hat
-
-    def plot1Db(self,X,X_new,F_new,U=None):
-        assert X.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
-        pb.plot(X,self.Y,'kx',mew=1.5)
-        if U is not None:
-            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
-
-    def predictive_mean(self,mu,Sigma):
-        return mu
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py
index cfad17c9..9831ae40 100644
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@@ -14,14 +14,14 @@ class Matern32(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
 
     :param D: the number of input dimensions
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
     :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
     :rtype: kernel object
@@ -35,17 +35,19 @@ class Matern32(kernpart):
             self.Nparam = 2
             self.name = 'Mat32'
             if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
             else:
                 lengthscale = np.ones(1)
         else:
             self.Nparam = self.D + 1
-            self.name = 'Mat32_ARD'
+            self.name = 'Mat32'
             if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == self.D, "bad number of lengthscales"
             else:
                 lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -116,9 +118,9 @@ class Matern32(kernpart):
         :param F1: vector of derivatives of F
         :type F1: np.array
         :param F2: vector of second derivatives of F
-        :type F2: np.array    
+        :type F2: np.array
         :param lower,upper: boundaries of the input domain
-        :type lower,upper: floats  
+        :type lower,upper: floats
         """
         assert self.D == 1
         def L(x,i):
@@ -133,4 +135,3 @@ class Matern32(kernpart):
         #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
         #return(G)
         return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
-
diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py
index cbe02c83..2994fc45 100644
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@@ -13,14 +13,14 @@ class Matern52(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
 
     :param D: the number of input dimensions
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
     :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
     :rtype: kernel object
@@ -33,17 +33,19 @@ class Matern52(kernpart):
             self.Nparam = 2
             self.name = 'Mat52'
             if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
             else:
                 lengthscale = np.ones(1)
         else:
             self.Nparam = self.D + 1
-            self.name = 'Mat52_ARD'
+            self.name = 'Mat52'
             if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == self.D, "bad number of lengthscales"
             else:
                 lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
 
     def _get_params(self):
         """return the value of the parameters."""
diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py
index 6c463a63..3c9cb192 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@@ -13,14 +13,14 @@ class exponential(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \sigma^2 \exp(- r) \ \ \ \ \  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
 
     :param D: the number of input dimensions
     :type D: int
     :param variance: the variance :math:`\sigma^2`
     :type variance: float
     :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
     :rtype: kernel object
@@ -33,17 +33,19 @@ class exponential(kernpart):
             self.Nparam = 2
             self.name = 'exp'
             if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
             else:
                 lengthscale = np.ones(1)
         else:
             self.Nparam = self.D + 1
-            self.name = 'exp_ARD'
+            self.name = 'exp'
             if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
+                assert lengthscale.size == self.D, "bad number of lengthscales"
             else:
                 lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -87,7 +89,7 @@ class exponential(kernpart):
             dl = self.variance*dvar*dist2M.sum(-1)*invdist
             target[1] += np.sum(dl*partial)
 
-    def dKdiag_dtheta(self,partial,X,target): 
+    def dKdiag_dtheta(self,partial,X,target):
         """derivative of the diagonal of the covariance matrix with respect to the parameters."""
         #NB: derivative of diagonal elements wrt lengthscale is 0
         target[0] += np.sum(partial)
@@ -110,9 +112,9 @@ class exponential(kernpart):
         :param F: vector of functions
         :type F: np.array
         :param F1: vector of derivatives of F
-        :type F1: np.array  
+        :type F1: np.array
         :param lower,upper: boundaries of the input domain
-        :type lower,upper: floats  
+        :type lower,upper: floats
         """
         assert self.D == 1
         def L(x,i):
@@ -124,8 +126,3 @@ class exponential(kernpart):
                 G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
         Flower = np.array([f(lower) for f in F])[:,None]
         return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
-
-
-
-        
-
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 6bfd224f..aed3536f 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -3,6 +3,7 @@
 
 
 import numpy as np
+import pylab as pb
 from ..core.parameterised import parameterised
 from kernpart import kernpart
 import itertools
@@ -155,7 +156,7 @@ class kern(parameterised):
 
         D = K1.D + K2.D
 
-        newkernparts = [product_orthogonal(k1,k2).parts[0] for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
 
         slices = []
         for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@@ -235,6 +236,8 @@ class kern(parameterised):
             X2 = X
         target = np.zeros(self.Nparam)
         [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+
+	#TODO: transform the gradients here!
         return target
 
     def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
@@ -372,3 +375,59 @@ class kern(parameterised):
 
         #TODO: there are some extra terms to compute here!
         return target_mu, target_S
+
+    def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None):
+        if which_functions=='all':
+            which_functions = [True]*self.Nparts
+        if self.D == 1:
+            if x is None:
+                x = np.zeros((1,1))
+            else:
+                x = np.asarray(x)
+                assert x.size == 1, "The size of the fixed variable x is not 1"
+                x = x.reshape((1,1))
+
+            if plot_limits == None:
+                xmin, xmax = (x-5).flatten(), (x+5).flatten()
+            elif len(plot_limits) == 2:
+                xmin, xmax = plot_limits
+            else:
+                raise ValueError, "Bad limits for plotting"
+
+            Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None]
+            Kx = self.K(Xnew,x,slices2=which_functions)
+            pb.plot(Xnew,Kx)
+            pb.xlim(xmin,xmax)
+            pb.xlabel("x")
+            pb.ylabel("k(x,%0.1f)" %x)
+
+        elif self.D == 2:
+            if x is None:
+                x = np.zeros((1,2))
+            else:
+                x = np.asarray(x)
+                assert x.size == 2, "The size of the fixed variable x is not 2"
+                x = x.reshape((1,2))
+
+            if plot_limits == None:
+                xmin, xmax = (x-5).flatten(), (x+5).flatten()
+            elif len(plot_limits) == 2:
+                xmin, xmax = plot_limits
+            else:
+                raise ValueError, "Bad limits for plotting"
+
+            resolution = resolution or 51
+            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
+            xg = np.linspace(xmin[0],xmax[0],resolution)
+            yg = np.linspace(xmin[1],xmax[1],resolution)
+            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
+            Kx = self.K(Xnew,x,slices2=which_functions)
+            Kx = Kx.reshape(resolution,resolution).T
+            pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet)
+            pb.xlim(xmin[0],xmax[0])
+            pb.ylim(xmin[1],xmax[1])
+            pb.xlabel("x1")
+            pb.ylabel("x2")
+            pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) )
+        else:
+            raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 7c8ba398..bc94b79d 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -15,8 +15,8 @@ class linear(kernpart):
     :param D: the number of input dimensions
     :type D: int
     :param variances: the vector of variances :math:`\sigma^2_i`
-    :type variances: np.ndarray of size (1,) or (D,) depending on ARD
-    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
+    :type variances: array or list of the appropriate size (or float if there is only one variance parameter)
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
     :type ARD: Boolean
     :rtype: kernel object
     """
@@ -28,21 +28,20 @@ class linear(kernpart):
             self.Nparam = 1
             self.name = 'linear'
             if variances is not None:
-                if isinstance(variances, float):
-                    variances = np.array([variances])
-
-                assert variances.shape == (1,)
+                variances = np.asarray(variances)
+                assert variances.size == 1, "Only one variance needed for non-ARD kernel"
             else:
                 variances = np.ones(1)
             self._Xcache, self._X2cache = np.empty(shape=(2,))
         else:
             self.Nparam = self.D
-            self.name = 'linear_ARD'
+            self.name = 'linear'
             if variances is not None:
-                assert variances.shape == (self.D,)
+                variances = np.asarray(variances)
+                assert variances.size == self.D, "bad number of lengthscales"
             else:
                 variances = np.ones(self.D)
-        self._set_params(variances)
+        self._set_params(variances.flatten())
 
     def _get_params(self):
         return self.variances
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 3143c244..5babfa4f 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -12,7 +12,7 @@ class rbf(kernpart):
 
     .. math::
 
-       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
+       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
 
     where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
 
@@ -21,7 +21,7 @@ class rbf(kernpart):
     :param variance: the variance of the kernel
     :type variance: float
     :param lengthscale: the vector of lengthscale of the kernel
-    :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
     :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
     :type ARD: Boolean
     :rtype: kernel object
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
new file mode 100644
index 00000000..efd887ae
--- /dev/null
+++ b/GPy/likelihoods/EP.py
@@ -0,0 +1,311 @@
+import numpy as np
+from scipy import stats, linalg
+from ..util.linalg import pdinv,mdot,jitchol
+from likelihood import likelihood
+
+class EP(likelihood):
+    def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
+        """
+        Expectation Propagation
+
+        Arguments
+        ---------
+        epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
+        likelihood_function : a likelihood function (see likelihood_functions.py)
+        """
+        self.likelihood_function = likelihood_function
+        self.epsilon = epsilon
+        self.eta, self.delta = power_ep
+        self.data = data
+        self.N = self.data.size
+        self.is_heteroscedastic = True
+        self.Nparams = 0
+
+        #Initial values - Likelihood approximation parameters:
+        #p(y|f) = t(f|tau_tilde,v_tilde)
+        self.tau_tilde = np.zeros(self.N)
+        self.v_tilde = np.zeros(self.N)
+
+        #initial values for the GP variables
+        self.Y = np.zeros((self.N,1))
+        self.covariance_matrix = np.eye(self.N)
+        self.precision = np.ones(self.N)
+        self.Z = 0
+        self.YYT = None
+
+    def predictive_values(self,mu,var):
+        return self.likelihood_function.predictive_values(mu,var)
+
+    def _get_params(self):
+        return np.zeros(0)
+    def _get_param_names(self):
+        return []
+    def _set_params(self,p):
+        pass # TODO: the EP likelihood might want to take some parameters...
+    def _gradients(self,partial):
+        return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
+
+    def _compute_GP_variables(self):
+        #Variables to be called from GP
+        mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
+        sigma_sum = 1./self.tau_ + 1./self.tau_tilde
+        mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
+        self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
+
+        self.Y =  mu_tilde[:,None]
+        self.YYT = np.dot(self.Y,self.Y.T)
+        self.precision = self.tau_tilde
+        self.covariance_matrix = np.diag(1./self.precision)
+
+    def fit_full(self,K):
+        """
+        The expectation-propagation algorithm.
+        For nomenclature see Rasmussen & Williams 2006.
+        """
+        #Prior distribution parameters: p(f|X) = N(f|0,K)
+
+        self.tau_tilde = np.zeros(self.N)
+        self.v_tilde = np.zeros(self.N)
+        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
+        mu = np.zeros(self.N)
+        Sigma = K.copy()
+
+        """
+        Initial values - Cavity distribution parameters:
+        q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
+        sigma_ = 1./tau_
+        mu_ = v_/tau_
+        """
+        self.tau_ = np.empty(self.N,dtype=float)
+        self.v_ = np.empty(self.N,dtype=float)
+
+        #Initial values - Marginal moments
+        z = np.empty(self.N,dtype=float)
+        self.Z_hat = np.empty(self.N,dtype=float)
+        phi = np.empty(self.N,dtype=float)
+        mu_hat = np.empty(self.N,dtype=float)
+        sigma2_hat = np.empty(self.N,dtype=float)
+
+        #Approximation
+        epsilon_np1 = self.epsilon + 1.
+        epsilon_np2 = self.epsilon + 1.
+       	self.iterations = 0
+        self.np1 = [self.tau_tilde.copy()]
+        self.np2 = [self.v_tilde.copy()]
+        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
+            update_order = np.random.permutation(self.N)
+            for i in update_order:
+                #Cavity distribution parameters
+                self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
+                #Marginal moments
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
+                #Site parameters update
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
+                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
+                self.v_tilde[i] = self.v_tilde[i] + Delta_v
+                #Posterior distribution parameters update
+                si=Sigma[:,i].reshape(self.N,1)
+                Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
+                mu = np.dot(Sigma,self.v_tilde)
+                self.iterations += 1
+            #Sigma recomptutation with Cholesky decompositon
+            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
+            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
+            L = jitchol(B)
+            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
+            Sigma = K - np.dot(V.T,V)
+            mu = np.dot(Sigma,self.v_tilde)
+            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
+            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
+            self.np1.append(self.tau_tilde.copy())
+            self.np2.append(self.v_tilde.copy())
+
+        return self._compute_GP_variables()
+
+    def fit_DTC(self, Knn_diag, Kmn, Kmm):
+        """
+        The expectation-propagation algorithm with sparse pseudo-input.
+        For nomenclature see ... 2013.
+        """
+
+        #TODO: this doesn;t work with uncertain inputs!
+
+        """
+        Prior approximation parameters:
+        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
+        Sigma0 = Qnn = Knm*Kmmi*Kmn
+        """
+        Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
+        KmnKnm = np.dot(Kmn, Kmn.T)
+        KmmiKmn = np.dot(Kmmi,Kmn)
+        Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
+        LLT0 = Kmm.copy()
+
+        """
+        Posterior approximation: q(f|y) = N(f| mu, Sigma)
+        Sigma = Diag + P*R.T*R*P.T + K
+        mu = w + P*gamma
+        """
+        mu = np.zeros(self.N)
+        LLT = Kmm.copy()
+        Sigma_diag = Qnn_diag.copy()
+
+        """
+        Initial values - Cavity distribution parameters:
+        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
+        sigma_ = 1./tau_
+        mu_ = v_/tau_
+        """
+        tau_ = np.empty(self.N,dtype=float)
+        v_ = np.empty(self.N,dtype=float)
+
+        #Initial values - Marginal moments
+        z = np.empty(self.N,dtype=float)
+        Z_hat = np.empty(self.N,dtype=float)
+        phi = np.empty(self.N,dtype=float)
+        mu_hat = np.empty(self.N,dtype=float)
+        sigma2_hat = np.empty(self.N,dtype=float)
+
+        #Approximation
+        epsilon_np1 = 1
+        epsilon_np2 = 1
+       	self.iterations = 0
+        np1 = [tau_tilde.copy()]
+        np2 = [v_tilde.copy()]
+        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
+            update_order = np.random.permutation(self.N)
+            for i in update_order:
+                #Cavity distribution parameters
+                tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
+                v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
+                #Marginal moments
+                Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
+                #Site parameters update
+                Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
+                tau_tilde[i] = tau_tilde[i] + Delta_tau
+                v_tilde[i] = v_tilde[i] + Delta_v
+                #Posterior distribution parameters update
+                LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
+                L = jitchol(LLT)
+                V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
+                Sigma_diag = np.sum(V*V,-2)
+                si = np.sum(V.T*V[:,i],-1)
+                mu = mu + (Delta_v-Delta_tau*mu[i])*si
+                self.iterations += 1
+            #Sigma recomputation with Cholesky decompositon
+            LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
+            L = jitchol(LLT)
+            V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
+            V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
+            Sigma_diag = np.sum(V*V,-2)
+            Knmv_tilde = np.dot(Kmn,v_tilde)
+            mu = np.dot(V2.T,Knmv_tilde)
+            epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
+            epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
+            np1.append(tau_tilde.copy())
+            np2.append(v_tilde.copy())
+
+        self._compute_GP_variables()
+
+    def fit_FITC(self, Knn_diag, Kmn):
+        """
+        The expectation-propagation algorithm with sparse pseudo-input.
+        For nomenclature see Naish-Guzman and Holden, 2008.
+        """
+
+        """
+        Prior approximation parameters:
+        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
+        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
+        """
+        Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
+        P0 = Kmn.T
+        KmnKnm = np.dot(P0.T, P0)
+        KmmiKmn = np.dot(Kmmi,P0.T)
+        Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
+        Diag0 = Knn_diag - Qnn_diag
+        R0 = jitchol(Kmmi).T
+
+        """
+        Posterior approximation: q(f|y) = N(f| mu, Sigma)
+        Sigma = Diag + P*R.T*R*P.T + K
+        mu = w + P*gamma
+        """
+        self.w = np.zeros(self.N)
+        self.gamma = np.zeros(self.M)
+        mu = np.zeros(self.N)
+        P = P0.copy()
+        R = R0.copy()
+        Diag = Diag0.copy()
+        Sigma_diag = Knn_diag
+
+        """
+        Initial values - Cavity distribution parameters:
+        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
+        sigma_ = 1./tau_
+        mu_ = v_/tau_
+        """
+        self.tau_ = np.empty(self.N,dtype=float)
+        self.v_ = np.empty(self.N,dtype=float)
+
+        #Initial values - Marginal moments
+        z = np.empty(self.N,dtype=float)
+        self.Z_hat = np.empty(self.N,dtype=float)
+        phi = np.empty(self.N,dtype=float)
+        mu_hat = np.empty(self.N,dtype=float)
+        sigma2_hat = np.empty(self.N,dtype=float)
+
+        #Approximation
+        epsilon_np1 = 1
+        epsilon_np2 = 1
+       	self.iterations = 0
+        self.np1 = [self.tau_tilde.copy()]
+        self.np2 = [self.v_tilde.copy()]
+        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
+            update_order = np.random.permutation(self.N)
+            for i in update_order:
+                #Cavity distribution parameters
+                self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
+                #Marginal moments
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
+                #Site parameters update
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
+                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
+                self.v_tilde[i] = self.v_tilde[i] + Delta_v
+                #Posterior distribution parameters update
+                dtd1 = Delta_tau*Diag[i] + 1.
+                dii = Diag[i]
+                Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
+                pi_ = P[i,:].reshape(1,self.M)
+                P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
+                Rp_i = np.dot(R,pi_.T)
+                RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
+                R = jitchol(RTR).T
+                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
+                self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
+                RPT = np.dot(R,P.T)
+                Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+                mu = self.w + np.dot(P,self.gamma)
+                self.iterations += 1
+            #Sigma recomptutation with Cholesky decompositon
+            Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
+            P = (Diag / Diag0)[:,None] * P0
+            RPT0 = np.dot(R0,P0.T)
+            L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
+            R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
+            RPT = np.dot(R,P.T)
+            Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+            self.w = Diag * self.v_tilde
+            self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
+            mu = self.w + np.dot(P,self.gamma)
+            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
+            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
+            self.np1.append(self.tau_tilde.copy())
+            self.np2.append(self.v_tilde.copy())
+
+        return self._compute_GP_variables()
diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
new file mode 100644
index 00000000..a34b3e6c
--- /dev/null
+++ b/GPy/likelihoods/Gaussian.py
@@ -0,0 +1,56 @@
+import numpy as np
+from likelihood import likelihood
+
+class Gaussian(likelihood):
+    def __init__(self,data,variance=1.,normalize=False):
+        self.is_heteroscedastic = False
+        self.Nparams = 1
+        self.data = data
+        self.N,D = data.shape
+        self.Z = 0. # a correction factor which accounts for the approximation made
+
+        #normalisation
+        if normalize:
+            self._mean = data.mean(0)[None,:]
+            self._std = data.std(0)[None,:]
+            self.Y = (self.data - self._mean)/self._std
+        else:
+            self._mean = np.zeros((1,D))
+            self._std = np.ones((1,D))
+            self.Y = self.data
+
+        #TODO: make this work efficiently (only compute YYT if D>>N)
+        self.YYT = np.dot(self.Y,self.Y.T)
+        self.trYYT = np.trace(self.YYT)
+        self._set_params(np.asarray(variance))
+
+
+    def _get_params(self):
+        return np.asarray(self._variance)
+
+    def _get_param_names(self):
+        return ["noise variance"]
+
+    def _set_params(self,x):
+        self._variance = float(x)
+        self.covariance_matrix = np.eye(self.N)*self._variance
+        self.precision = 1./self._variance
+
+    def predictive_values(self,mu,var):
+        """
+        Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
+        """
+        mean = mu*self._std + self._mean
+        true_var = (var + self._variance)*self._std**2
+        _5pc = mean + - 2.*np.sqrt(true_var)
+        _95pc = mean + 2.*np.sqrt(true_var)
+        return mean, _5pc, _95pc
+
+    def fit_full(self):
+        """
+        No approximations needed
+        """
+        pass
+
+    def _gradients(self,partial):
+        return np.sum(partial)
diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py
new file mode 100644
index 00000000..83413255
--- /dev/null
+++ b/GPy/likelihoods/__init__.py
@@ -0,0 +1,4 @@
+from EP import EP
+from Gaussian import Gaussian
+# TODO: from Laplace import Laplace
+import likelihood_functions as functions
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
new file mode 100644
index 00000000..6ec57c07
--- /dev/null
+++ b/GPy/likelihoods/likelihood.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+class likelihood:
+    """
+    The atom for a likelihood class
+
+    This object interfaces the GP and the data. The most basic likelihood
+    (Gaussian) inherits directly from this, as does the EP algorithm
+
+    Some things must be defined for this to work properly:
+    self.Y : the effective Gaussian target of the GP
+    self.N, self.D : Y.shape
+    self.covariance_matrix : the effective (noise) covariance of the GP targets
+    self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
+    self.is_heteroscedastic : enables significant computational savings in GP
+    self.precision : a scalar or vector representation of the effective target precision
+    self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
+    """
+    def __init__(self,data):
+        raise ValueError, "this class is not to be instantiated"
+
+    def _get_params(self):
+        raise NotImplementedError
+
+    def _get_param_names(self):
+        raise NotImplementedError
+
+    def _set_params(self,x):
+        raise NotImplementedError
+
+    def fit(self):
+        raise NotImplementedError
+
+    def _gradients(self,partial):
+        raise NotImplementedError
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
new file mode 100644
index 00000000..23881899
--- /dev/null
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2012, 2013 Ricardo Andrade
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+from scipy import stats
+import scipy as sp
+import pylab as pb
+from ..util.plot import gpplot
+
+class likelihood_function:
+    """
+    Likelihood class for doing Expectation propagation
+
+    :param Y: observed output (Nx1 numpy.darray)
+    ..Note:: Y values allowed depend on the likelihood_function used
+    """
+    def __init__(self,location=0,scale=1):
+        self.location = location
+        self.scale = scale
+
+class probit(likelihood_function):
+    """
+    Probit likelihood
+    Y is expected to take values in {-1,1}
+    -----
+    $$
+    L(x) = \\Phi (Y_i*f_i)
+    $$
+    """
+
+    def moments_match(self,data_i,tau_i,v_i):
+        """
+        Moments match of the marginal approximation in EP algorithm
+
+        :param i: number of observation (int)
+        :param tau_i: precision of the cavity distribution (float)
+        :param v_i: mean/variance of the cavity distribution (float)
+        """
+        # TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
+        if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
+        z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
+        Z_hat = stats.norm.cdf(z)
+        phi = stats.norm.pdf(z)
+        mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
+        sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
+        return Z_hat, mu_hat, sigma2_hat
+
+    def predictive_values(self,mu,var):
+        """
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
+        """
+        mu = mu.flatten()
+        var = var.flatten()
+        mean = stats.norm.cdf(mu/np.sqrt(1+var))
+        p_025 = np.zeros(mu.shape)
+        p_975 = np.ones(mu.shape)
+        return mean, p_025, p_975
+
+class Poisson(likelihood_function):
+    """
+    Poisson likelihood
+    Y is expected to take values in {0,1,2,...}
+    -----
+    $$
+    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
+    $$
+    """
+    def moments_match(self,data_i,tau_i,v_i):
+        """
+        Moments match of the marginal approximation in EP algorithm
+
+        :param i: number of observation (int)
+        :param tau_i: precision of the cavity distribution (float)
+        :param v_i: mean/variance of the cavity distribution (float)
+        """
+        mu = v_i/tau_i
+        sigma = np.sqrt(1./tau_i)
+        def poisson_norm(f):
+            """
+            Product of the likelihood and the cavity distribution
+            """
+            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
+            rate = np.exp( (f*self.scale)+self.location)
+            poisson = stats.poisson.pmf(float(data_i),rate)
+            return pdf_norm_f*poisson
+
+        def log_pnm(f):
+            """
+            Log of poisson_norm
+            """
+            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
+
+        """
+        Golden Search and Simpson's Rule
+        --------------------------------
+        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
+        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
+        """
+        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
+
+        #Golden search
+        golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
+        golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
+        golden_A = (golden_A - self.location)/self.scale
+        golden_B = (golden_B - self.location)/self.scale
+        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
+
+        # Simpson's approximation
+        width = 3./np.log(max(data_i,2))
+        A = opt - width #Lower limit
+        B = opt + width #Upper limit
+        K =  10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
+        h = (B-A)/K # length of the intervals
+        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
+        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
+        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
+        first = zeroth*x
+        second = first*x
+        Z_hat = sum(zeroth)*h/3 # Zero-th moment
+        mu_hat = sum(first)*h/(3*Z_hat) # First moment
+        m2 = sum(second)*h/(3*Z_hat) # Second moment
+        sigma2_hat = m2 - mu_hat**2 # Second central moment
+        return float(Z_hat), float(mu_hat), float(sigma2_hat)
+
+    def predictive_values(self,mu,var):
+        """
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
+        """
+        mean = np.exp(mu*self.scale + self.location)
+        tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
+        p_025 = tmp[:,0]
+        p_975 = tmp[:,1]
+        return mean,p_025,p_975
diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py
index db147944..16115025 100644
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@@ -5,10 +5,12 @@ import numpy as np
 import pylab as pb
 import sys, pdb
 from GPLVM import GPLVM
-from sparse_GP_regression import sparse_GP_regression
+from sparse_GP import sparse_GP
 from GPy.util.linalg import pdinv
+from ..likelihoods import Gaussian
+from .. import kern
 
-class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
+class Bayesian_GPLVM(sparse_GP, GPLVM):
     """
     Bayesian Gaussian Process Latent Variable Model
 
@@ -20,15 +22,23 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', **kwargs):
+    def __init__(self, Y, Q, init='PCA', M=10, Z=None, **kwargs):
         X = self.initialise_latent(init, Q, Y)
-        S = np.ones_like(X) * 1e-2# 
-        sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
+
+        if Z is None:
+            Z = np.random.permutation(X.copy())[:M]
+        else:
+            assert Z.shape[1]==X.shape[1]
+
+        kernel = kern.rbf(Q) + kern.white(Q)
+
+        S = np.ones_like(X) * 1e-2#
+        sparse_GP.__init__(self, X, Gaussian(Y), X_uncertainty=S, Z=Z,**kwargs)
 
     def _get_param_names(self):
         X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
         S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-        return (X_names + S_names + sparse_GP_regression._get_param_names(self))
+        return (X_names + S_names + sparse_GP._get_param_names(self))
 
     def _get_params(self):
         """
@@ -36,17 +46,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
         The resulting 1-D array has this structure:
 
         ===============================================================
-        |       mu       |        S        |    Z    | beta |  theta  |
+        |       mu       |        S        |    Z    | theta |  beta  |
         ===============================================================
 
         """
-        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self)))
+        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self)))
 
     def _set_params(self,x):
         N, Q = self.N, self.Q
         self.X = x[:self.X.size].reshape(N,Q).copy()
         self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
-        sparse_GP_regression._set_params(self, x[(2*N*Q):])
+        sparse_GP._set_params(self, x[(2*N*Q):])
 
     def dL_dmuS(self):
         dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
@@ -58,5 +68,5 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
         return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
 
     def _log_likelihood_gradients(self):
-        return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
+        return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
new file mode 100644
index 00000000..c4c37e44
--- /dev/null
+++ b/GPy/models/GP.py
@@ -0,0 +1,274 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+import pylab as pb
+from .. import kern
+from ..core import model
+from ..util.linalg import pdinv,mdot
+from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
+from ..likelihoods import EP
+
+class GP(model):
+    """
+    Gaussian Process model for regression and EP
+
+    :param X: input observations
+    :param kernel: a GPy kernel, defaults to rbf+white
+    :parm likelihood: a GPy likelihood
+    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_X: False|True
+    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_Y: False|True
+    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
+    :rtype: model object
+    :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
+    :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
+    :type powerep: list
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+    #FIXME normalize vs normalise
+    def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
+
+        # parse arguments
+        self.Xslices = Xslices
+        self.X = X
+        assert len(self.X.shape)==2
+        self.N, self.Q = self.X.shape
+        assert isinstance(kernel, kern.kern)
+        self.kern = kernel
+
+        #here's some simple normalisation for the inputs
+        if normalize_X:
+            self._Xmean = X.mean(0)[None,:]
+            self._Xstd = X.std(0)[None,:]
+            self.X = (X.copy() - self._Xmean) / self._Xstd
+            if hasattr(self,'Z'):
+                self.Z = (self.Z - self._Xmean) / self._Xstd
+        else:
+            self._Xmean = np.zeros((1,self.X.shape[1]))
+            self._Xstd = np.ones((1,self.X.shape[1]))
+
+        self.likelihood = likelihood
+        #assert self.X.shape[0] == self.likelihood.Y.shape[0]
+        #self.N, self.D = self.likelihood.Y.shape
+        assert self.X.shape[0] == self.likelihood.data.shape[0]
+        self.N, self.D = self.likelihood.data.shape
+
+        model.__init__(self)
+
+    def _set_params(self,p):
+        self.kern._set_params_transformed(p[:self.kern.Nparam])
+        #self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
+        self.likelihood._set_params(p[self.kern.Nparam_transformed():])    # test by Nicolas
+
+
+        self.K = self.kern.K(self.X,slices1=self.Xslices)
+        self.K += self.likelihood.covariance_matrix
+
+        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
+
+        #the gradient of the likelihood wrt the covariance matrix
+        if self.likelihood.YYT is None:
+            alpha = np.dot(self.Ki,self.likelihood.Y)
+            self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
+        else:
+            tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            self.dL_dK = 0.5*(tmp - self.D*self.Ki)
+
+    def _get_params(self):
+        return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
+
+    def _get_param_names(self):
+        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
+
+    def update_likelihood_approximation(self):
+        """
+        Approximates a non-gaussian likelihood using Expectation Propagation
+
+        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
+        this function does nothing
+        """
+        self.likelihood.fit_full(self.kern.K(self.X))
+        self._set_params(self._get_params()) # update the GP
+
+    def _model_fit_term(self):
+        """
+        Computes the model fit using YYT if it's available
+        """
+        if self.likelihood.YYT is None:
+            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+        else:
+            return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
+
+    def log_likelihood(self):
+        """
+        The log marginal likelihood of the GP.
+
+        For an EP model,  can be written as the log likelihood of a regression
+        model for a new variable Y* = v_tilde/tau_tilde, with a covariance
+        matrix K* = K + diag(1./tau_tilde) plus a normalization term.
+        """
+        return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
+
+
+    def _log_likelihood_gradients(self):
+        """
+        The gradient of all parameters.
+
+        For the kernel parameters, use the chain rule via dL_dK
+
+        For the likelihood parameters, pass in alpha = K^-1 y
+        """
+        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+
+    def _raw_predict(self,_Xnew,slices=None, full_cov=False):
+        """
+        Internal helper function for making predictions, does not account
+        for normalisation or likelihood
+        """
+        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
+        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
+        KiKx = np.dot(self.Ki,Kx)
+        if full_cov:
+            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
+            var = Kxx - np.dot(KiKx.T,Kx) #NOTE this won't work for plotting
+        else:
+            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
+            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
+            var = var[:,None]
+        return mu, var
+
+
+    def predict(self,Xnew, slices=None, full_cov=False):
+        """
+        Predict the function(s) at the new point(s) Xnew.
+
+        Arguments
+        ---------
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.Q
+        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
+        :type slices: (None, list of slice objects, list of ints)
+        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
+        :type full_cov: bool
+        :rtype: posterior mean,  a Numpy array, Nnew x self.D
+        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.D
+
+        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
+
+             - If None, the new points covary throigh every kernel part (default)
+             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
+             - If a list of booleans, specifying which kernel parts are active
+
+           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
+           This is to allow for different normalisations of the output dimensions.
+
+        """
+        #normalise X values
+        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
+        mu, var = self._raw_predict(Xnew, slices, full_cov)
+
+        #now push through likelihood TODO
+        mean, _025pm, _975pm = self.likelihood.predictive_values(mu, var)
+
+        return mean, var, _025pm, _975pm
+
+
+    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
+        """
+        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
+
+        :param samples: the number of a posteriori samples to plot
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_functions: which of the kernel functions to plot (additively)
+        :type which_functions: list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+
+        Plot the posterior of the GP.
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - In higher dimensions, we've no implemented this yet !TODO!
+
+        Can plot only part of the data and part of the posterior functions using which_data and which_functions
+        Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
+        """
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+
+        if self.X.shape[1] == 1:
+            Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+            if samples == 0:
+                m,v = self._raw_predict(Xnew, slices=which_functions)
+                gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
+                pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
+            else:
+                m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
+                Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
+                gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
+                for i in range(samples):
+                    pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
+            pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
+            pb.xlim(xmin,xmax)
+            ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
+            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
+            pb.ylim(ymin,ymax)
+            if hasattr(self,'Z'):
+                pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
+
+        elif self.X.shape[1] == 2:
+            resolution = resolution or 50
+            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
+            m,v = self._raw_predict(Xnew, slices=which_functions)
+            m = m.reshape(resolution,resolution).T
+            pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
+            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
+            pb.xlim(xmin[0],xmax[0])
+            pb.ylim(xmin[1],xmax[1])
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+        # TODO include samples
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+
+        if self.X.shape[1] == 1:
+
+            Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
+
+            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
+            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
+            gpplot(Xnew,m, lower, upper)
+            pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
+            ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
+            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
+            pb.xlim(xmin,xmax)
+            pb.ylim(ymin,ymax)
+            if hasattr(self,'Z'):
+                Zu = self.Z*self._Xstd + self._Xmean
+                pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
+
+        elif self.X.shape[1]==2: #FIXME
+            resolution = resolution or 50
+            Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
+            x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
+            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
+            m = m.reshape(resolution,resolution).T
+            pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
+            Yf = self.likelihood.Y.flatten()
+            pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
+            pb.xlim(xmin[0],xmax[0])
+            pb.ylim(xmin[1],xmax[1])
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index a8f6a5b1..73762433 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -8,9 +8,10 @@ import sys, pdb
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv, PCA
-from GP_regression import GP_regression
+from GP import GP
+from ..likelihoods import Gaussian
 
-class GPLVM(GP_regression):
+class GPLVM(GP):
     """
     Gaussian Process Latent Variable Model
 
@@ -22,10 +23,13 @@ class GPLVM(GP_regression):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', X = None, **kwargs):
+    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
         if X is None:
             X = self.initialise_latent(init, Q, Y)
-        GP_regression.__init__(self, X, Y, **kwargs)
+        if kernel is None:
+            kernel = kern.rbf(Q) + kern.bias(Q)
+        likelihood = Gaussian(Y)
+        GP.__init__(self, X, likelihood, kernel, **kwargs)
 
     def initialise_latent(self, init, Q, Y):
         if init == 'PCA':
@@ -34,23 +38,19 @@ class GPLVM(GP_regression):
             return np.random.randn(Y.shape[0], Q)
 
     def _get_param_names(self):
-        return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-                + self.kern._get_param_names_transformed())
+        return sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + GP._get_param_names(self)
 
     def _get_params(self):
-        return np.hstack((self.X.flatten(), self.kern._get_params_transformed()))
+        return np.hstack((self.X.flatten(), GP._get_params(self)))
 
     def _set_params(self,x):
         self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
-        GP_regression._set_params(self, x[self.X.size:])
+        GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):
-        dL_dK = self.dL_dK()
+        dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
 
-        dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X)
-        dL_dX = 2*self.kern.dK_dX(dL_dK,self.X)
-
-        return np.hstack((dL_dX.flatten(),dL_dtheta))
+        return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
 
     def plot(self):
         assert self.Y.shape[1]==2
diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py
deleted file mode 100644
index 51d69d0a..00000000
--- a/GPy/models/GP_EP.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-from scipy import stats, linalg
-from .. import kern
-from ..inference.Expectation_Propagation import Full
-from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
-from ..core import model
-from ..util.linalg import pdinv,jitchol
-from ..util.plot import gpplot
-
-class GP_EP(model):
-    def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
-        """
-        Simple Gaussian Process with Non-Gaussian likelihood
-
-        Arguments
-        ---------
-        :param X: input observations (NxD numpy.darray)
-        :param likelihood: a GPy likelihood (likelihood class)
-        :param kernel: a GPy kernel (kern class)
-        :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float)
-        :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
-        :rtype: GPy model class.
-        """
-        if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
-
-        assert isinstance(kernel,kern.kern), 'kernel is not a kern instance'
-        self.likelihood = likelihood
-        self.Y = self.likelihood.Y
-        self.kernel = kernel
-        self.X = X
-        self.N, self.D = self.X.shape
-        self.eta,self.delta = powerep
-        self.epsilon_ep = epsilon_ep
-        self.jitter = 1e-12
-        self.K = self.kernel.K(self.X)
-        model.__init__(self)
-
-    def _set_params(self,p):
-        self.kernel._set_params_transformed(p)
-
-    def _get_params(self):
-        return self.kernel._get_params_transformed()
-
-    def _get_param_names(self):
-        return self.kernel._get_param_names_transformed()
-
-    def approximate_likelihood(self):
-        self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
-        self.ep_approx.fit_EP()
-
-    def posterior_param(self):
-        self.K = self.kernel.K(self.X)
-        self.Sroot_tilde_K =  np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
-        B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K
-        #self.L = np.linalg.cholesky(B)
-        self.L = jitchol(B)
-        V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
-        self.Sigma = self.K - np.dot(V.T,V)
-        self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde)
-
-    def log_likelihood(self):
-        """
-        Returns
-        -------
-        The EP approximation to the log-marginal likelihood
-        """
-        self.posterior_param()
-        mu_ = self.ep_approx.v_/self.ep_approx.tau_
-        L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
-        L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
-        L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
-        L3 = sum(np.log(self.ep_approx.Z_hat))
-        return L1 + L2A + L2B + L3
-
-    def _log_likelihood_gradients(self):
-        dK_dp = self.kernel.dK_dtheta(self.X)
-        self.dK_dp = dK_dp
-        aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
-        b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0]
-        U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1)
-        dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U))
-        self.dL_dK = dL_dK
-        return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T])
-
-    def predict(self,X):
-        #TODO: check output dimensions
-        self.posterior_param()
-        K_x = self.kernel.K(self.X,X)
-        Kxx = self.kernel.K(X)
-        aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
-        aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
-        zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
-        f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
-       	v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
-        variance = Kxx - np.dot(v.T,v)
-       	vdiag = np.diag(variance)
-        y=self.likelihood.predictive_mean(f,vdiag)
-        return f,vdiag,y
-
-    def plot(self):
-        """
-        Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
-        """
-        if self.X.shape[1]==1:
-            pb.figure()
-            xmin,xmax = self.X.min(),self.X.max()
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-            Xnew = np.linspace(xmin,xmax,100)[:,None]
-            mu_f, var_f, mu_phi = self.predict(Xnew)
-            pb.subplot(211)
-            self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
-            pb.subplot(212)
-            self.likelihood.plot1Db(self.X,Xnew,mu_phi)
-        elif self.X.shape[1]==2:
-            pb.figure()
-            x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
-            x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
-            x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
-            x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
-            axis1 = np.linspace(x1min,x1max,50)
-            axis2 = np.linspace(x2min,x2max,50)
-            XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
-            Xnew = np.c_[XX1.flatten(),XX2.flatten()]
-            f,v,p = self.predict(Xnew)
-            self.likelihood.plot2D(self.X,Xnew,p)
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
-
-    def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
-        """
-        Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
-        """
-        self.epsilon_em = epsilon
-        log_likelihood_change = self.epsilon_em + 1.
-        self.parameters_path = [self.kernel._get_params()]
-        self.approximate_likelihood()
-        self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
-        self.log_likelihood_path = [self.log_likelihood()]
-        iteration = 0
-        while log_likelihood_change > self.epsilon_em:
-            print 'EM iteration', iteration
-            self.optimize(max_f_eval = max_f_eval)
-            log_likelihood_new = self.log_likelihood()
-            log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
-            if log_likelihood_change < 0:
-                print 'log_likelihood decrement'
-                self.kernel._set_params_transformed(self.parameters_path[-1])
-                self.kernM._set_params_transformed(self.parameters_path[-1])
-            else:
-                self.approximate_likelihood()
-                self.log_likelihood_path.append(self.log_likelihood())
-                self.parameters_path.append(self.kernel._get_params())
-                self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
-            iteration += 1
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index 72a24307..5f9f9f3e 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -1,18 +1,18 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
 import numpy as np
-import pylab as pb
+from GP import GP
+from .. import likelihoods
 from .. import kern
-from ..core import model
-from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot, Tango
 
-class GP_regression(model):
+class GP_regression(GP):
     """
     Gaussian Process model for regression
 
+    This is a thin wrapper around the GP class, with a set of sensible defalts
+
     :param X: input observations
     :param Y: observed values
     :param kernel: a GPy kernel, defaults to rbf+white
@@ -29,199 +29,8 @@ class GP_regression(model):
 
     def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
         if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
+            kernel = kern.rbf(X.shape[1])
 
-        # parse arguments
-        self.Xslices = Xslices
-        assert isinstance(kernel, kern.kern)
-        self.kern = kernel
-        self.X = X
-        self.Y = Y
-        assert len(self.X.shape)==2
-        assert len(self.Y.shape)==2
-        assert self.X.shape[0] == self.Y.shape[0]
-        self.N, self.D = self.Y.shape
-        self.N, self.Q = self.X.shape
+        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
-        #here's some simple normalisation
-        if normalize_X:
-            self._Xmean = X.mean(0)[None,:]
-            self._Xstd = X.std(0)[None,:]
-            self.X = (X.copy() - self._Xmean) / self._Xstd
-            if hasattr(self,'Z'):
-                self.Z = (self.Z - self._Xmean) / self._Xstd
-        else:
-            self._Xmean = np.zeros((1,self.X.shape[1]))
-            self._Xstd = np.ones((1,self.X.shape[1]))
-
-        if normalize_Y:
-            self._Ymean = Y.mean(0)[None,:]
-            self._Ystd = Y.std(0)[None,:]
-            self.Y = (Y.copy()- self._Ymean) / self._Ystd
-        else:
-            self._Ymean = np.zeros((1,self.Y.shape[1]))
-            self._Ystd = np.ones((1,self.Y.shape[1]))
-
-        if self.D > self.N:
-            # then it's more efficient to store YYT
-            self.YYT = np.dot(self.Y, self.Y.T)
-        else:
-            self.YYT = None
-
-        model.__init__(self)
-
-    def _set_params(self,p):
-        self.kern._set_params_transformed(p)
-        self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
-
-    def _get_params(self):
-        return self.kern._get_params_transformed()
-
-    def _get_param_names(self):
-        return self.kern._get_param_names_transformed()
-
-    def _model_fit_term(self):
-        """
-        Computes the model fit using YYT if it's available
-        """
-        if self.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
-        else:
-            return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
-
-    def log_likelihood(self):
-        complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
-        return complexity_term + self._model_fit_term()
-
-    def dL_dK(self):
-        if self.YYT is None:
-            alpha = np.dot(self.Ki,self.Y)
-            dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
-        else:
-            dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
-
-        return dL_dK
-
-    def _log_likelihood_gradients(self):
-        return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
-
-    def predict(self,Xnew, slices=None, full_cov=False):
-        """
-
-        Predict the function(s) at the new point(s) Xnew.
-
-        Arguments
-        ---------
-        :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.Q
-        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
-        :type slices: (None, list of slice objects, list of ints)
-        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
-        :type full_cov: bool
-        :rtype: posterior mean,  a Numpy array, Nnew x self.D
-        :rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
-
-        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
-
-             - If None, the new points covary throigh every kernel part (default)
-             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
-             - If a list of booleans, specifying which kernel parts are active
-
-           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
-           This is to allow for different normalisations of the output dimensions.
-
-
-        """
-
-        #normalise X values
-        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var = self._raw_predict(Xnew, slices, full_cov)
-
-        #un-normalise
-        mu = mu*self._Ystd + self._Ymean
-        if full_cov:
-            if self.D==1:
-                var *= np.square(self._Ystd)
-            else:
-                var = var[:,:,None] * np.square(self._Ystd)
-        else:
-            if self.D==1:
-                var *= np.square(np.squeeze(self._Ystd))
-            else:
-                var = var[:,None] * np.square(self._Ystd)
-
-        return mu,var
-
-    def _raw_predict(self,_Xnew,slices, full_cov=False):
-        """Internal helper function for making predictions, does not account for normalisation"""
-        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
-        KiKx = np.dot(self.Ki,Kx)
-        if full_cov:
-            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
-            var = Kxx - np.dot(KiKx.T,Kx)
-        else:
-            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
-            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
-        return mu, var
-
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
-        """
-        :param samples: the number of a posteriori samples to plot
-        :param which_data: which if the training data to plot (default all)
-        :type which_data: 'all' or a slice object to slice self.X, self.Y
-        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_functions: which of the kernel functions to plot (additively)
-        :type which_functions: list of bools
-        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-
-        Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions using which_data and which_functions
-        """
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
-            which_data = slice(None)
-
-        X = self.X[which_data,:]
-        Y = self.Y[which_data,:]
-
-        Xorig = X*self._Xstd + self._Xmean
-        Yorig = Y*self._Ystd + self._Ymean
-        if plot_limits is None:
-            xmin,xmax = Xorig.min(0),Xorig.max(0)
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-        elif len(plot_limits)==2:
-            xmin, xmax = plot_limits
-        else:
-            raise ValueError, "Bad limits for plotting"
-
-
-        if self.X.shape[1]==1:
-            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-            m,v = self.predict(Xnew,slices=which_functions)
-            gpplot(Xnew,m,v)
-            if samples:
-                s = np.random.multivariate_normal(m.flatten(),v,samples)
-                pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-            pb.plot(Xorig,Yorig,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
-
-        elif self.X.shape[1]==2:
-            resolution = 50 or resolution
-            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xtest = np.vstack((xx.flatten(),yy.flatten())).T
-            zz,vv = self.predict(Xtest,slices=which_functions)
-            zz = zz.reshape(resolution,resolution)
-            pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
-            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
-
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
+        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 9d5b1d00..8e2c5d84 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -2,12 +2,12 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
+from GP import GP
 from GP_regression import GP_regression
+from sparse_GP import sparse_GP
 from sparse_GP_regression import sparse_GP_regression
 from GPLVM import GPLVM
 from warped_GP import warpedGP
-from GP_EP import GP_EP
-from generalized_FITC import generalized_FITC
 from sparse_GPLVM import sparse_GPLVM
-from uncollapsed_sparse_GP import uncollapsed_sparse_GP
+#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from BGPLVM import Bayesian_GPLVM
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
deleted file mode 100644
index a5ed8d0a..00000000
--- a/GPy/models/generalized_FITC.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-from scipy import stats, linalg
-from .. import kern
-from ..core import model
-from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot
-from ..inference.Expectation_Propagation import FITC
-from ..inference.likelihoods import likelihood,probit
-
-class generalized_FITC(model):
-    def __init__(self,X,likelihood,kernel=None,inducing=10,epsilon_ep=1e-3,powerep=[1.,1.]):
-        """
-        Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
-
-        :param X: input observations
-        :param likelihood: Output's likelihood (likelihood class)
-        :param kernel: a GPy kernel
-        :param inducing:  Either an array specifying the inducing points location or a scalar defining their number.
-        :param epsilon_ep: EP convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
-        :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
-        """
-        assert isinstance(kernel,kern.kern)
-        self.likelihood = likelihood
-        self.Y = self.likelihood.Y
-        self.kernel = kernel
-        self.X = X
-        self.N, self.D = self.X.shape
-        assert self.Y.shape[0] == self.N
-        if type(inducing) == int:
-            self.M = inducing
-            self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
-        elif type(inducing) == np.ndarray:
-            self.Z = inducing
-            self.M = self.Z.shape[0]
-        self.eta,self.delta = powerep
-        self.epsilon_ep = epsilon_ep
-        self.jitter = 1e-12
-        model.__init__(self)
-
-    def _set_params(self,p):
-        self.kernel._set_params_transformed(p[0:-self.Z.size])
-        self.Z = p[-self.Z.size:].reshape(self.M,self.D)
-
-    def _get_params(self):
-        return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
-
-    def _get_param_names(self):
-        return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
-
-    def approximate_likelihood(self):
-        self.Kmm = self.kernel.K(self.Z)
-        self.Knm = self.kernel.K(self.X,self.Z)
-        self.Knn_diag = self.kernel.Kdiag(self.X)
-        self.ep_approx = FITC(self.Kmm,self.likelihood,self.Knm.T,self.Knn_diag,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
-        self.ep_approx.fit_EP()
-
-    def posterior_param(self):
-        self.Knn_diag = self.kernel.Kdiag(self.X)
-        self.Kmm = self.kernel.K(self.Z)
-        self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.Knm = self.kernel.K(self.X,self.Z)
-        self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
-        self.Qnn = np.dot(self.Knm,self.KmmiKmn)
-        self.Diag0 =  self.Knn_diag - np.diag(self.Qnn)
-        self.R0 = np.linalg.cholesky(self.Kmmi).T
-
-        self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
-        self.KmnTaut = self.Knm.T*self.Taut[None,:]
-        self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
-        self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
-        self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
-        self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
-        self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
-
-        self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
-        self.P = (self.Diag / self.Diag0)[:,None] * self.Knm
-        self.RPT0 = np.dot(self.R0,self.Knm.T)
-        self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
-        self.R,info = linalg.flapack.dtrtrs(self.L,self.R0,lower=1)
-        self.RPT = np.dot(self.R,self.P.T)
-        self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
-        self.w = self.Diag * self.ep_approx.v_tilde
-        self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.ep_approx.v_tilde))
-        self.mu = self.w + np.dot(self.P,self.gamma)
-        self.mu_tilde = (self.ep_approx.v_tilde/self.ep_approx.tau_tilde)[:,None]
-
-    def log_likelihood(self):
-        self.posterior_param()
-        self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
-        A = -self.hld
-        B = -.5*np.sum(self.Qi*self.YYT)
-        C = sum(np.log(self.ep_approx.Z_hat))
-        D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
-        E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
-        return  A + B + C + D + E
-
-    def _log_likelihood_gradients(self):
-        dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
-        dKnn_dtheta = self.kernel.dK_dtheta(self.X)
-        dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
-        dKmm_dZ = -self.kernel.dK_dX(self.Z)
-        dKnm_dZ = -self.kernel.dK_dX(self.X,self.Z)
-        tmp = [np.dot(dKmn_dtheta_i,self.KmmiKmn) for dKmn_dtheta_i in dKmn_dtheta.T]
-        dQnn_dtheta = [tmp_i + tmp_i.T - np.dot(np.dot(self.KmmiKmn.T,dKmm_dtheta_i),self.KmmiKmn) for tmp_i,dKmm_dtheta_i in zip(tmp,dKmm_dtheta.T)]
-        dDiag0_dtheta = [np.diag(dKnn_dtheta_i) - np.diag(dQnn_dtheta_i) for dKnn_dtheta_i,dQnn_dtheta_i in zip(dKnn_dtheta.T,dQnn_dtheta)]
-        dQ_dtheta = [np.diag(dDiag0_dtheta_i) + dQnn_dtheta_i for dDiag0_dtheta_i,dQnn_dtheta_i in zip(dDiag0_dtheta,dQnn_dtheta)]
-        dW_dtheta = [dKmm_dtheta_i + 2*np.dot(self.KmnTaut,dKmn_dtheta_i) - np.dot(self.KmnTaut*dDiag0_dtheta_i,self.KmnTaut.T) for dKmm_dtheta_i,dDiag0_dtheta_i,dKmn_dtheta_i in zip(dKmm_dtheta.T,dDiag0_dtheta,dKmn_dtheta.T)]
-
-        QiY = np.dot(self.Qi, self.mu_tilde)
-        QiYYQi = np.outer(QiY,QiY)
-        WiKmnTaut = np.dot(self.Woodbury_inv,self.KmnTaut)
-        K_Y = np.dot(self.KmmiKmn,QiY)
-        # gradient - theta
-        Atheta = [-0.5*np.dot(self.Taut,dDiag0_dtheta_i) + 0.5*np.sum(self.Kmmi*dKmm_dtheta_i) - 0.5*np.sum(self.Woodbury_inv*dW_dtheta_i) for dDiag0_dtheta_i,dKmm_dtheta_i,dW_dtheta_i in zip(dDiag0_dtheta,dKmm_dtheta.T,dW_dtheta)]
-        Btheta = np.array([0.5*np.sum(QiYYQi*dQ_dtheta_i) for dQ_dtheta_i in dQ_dtheta])
-        dL_dtheta = Atheta + Btheta
-        # gradient - Z
-        # Az
-        dQnn_dZ_diag_a2 = (np.array([d[:,:,None]*self.KmmiKmn[:,:,None] for d in dKnm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
-        dQnn_dZ_diag_b2 = (np.array([(self.KmmiKmn*np.sum(d[:,:,None]*self.KmmiKmn,-2))[:,:,None] for d in dKmm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
-        dQnn_dZ_diag = dQnn_dZ_diag_a2 - dQnn_dZ_diag_b2
-        d_hld_Diag1_dZ = -np.sum(np.dot(self.KmmiKmn*self.Taut,self.KmmiKmn.T)[:,:,None]*dKmm_dZ,-2) + np.sum((self.KmmiKmn*self.Taut)[:,:,None]*dKnm_dZ,-2)
-        d_hld_Kmm_dZ = np.sum(self.Kmmi[:,:,None]*dKmm_dZ,-2)
-        d_hld_W_dZ1 = np.sum(WiKmnTaut[:,:,None]*dKnm_dZ,-2)
-        d_hld_W_dZ3 = np.sum(self.Woodbury_inv[:,:,None]*dKmm_dZ,-2)
-        d_hld_W_dZ2 = np.array([np.sum(np.sum(WiKmnTaut.T*d[:,:,None]*self.KmnTaut.T,-2),-1) for d in dQnn_dZ_diag.transpose(2,0,1)]).T
-        Az = d_hld_Diag1_dZ + d_hld_Kmm_dZ - d_hld_W_dZ1 - d_hld_W_dZ2 - d_hld_W_dZ3
-        # Bz
-        Bz2 = np.sum(np.dot(K_Y,QiY.T)[:,:,None]*dKnm_dZ,-2)
-        Bz3 = - np.sum(np.dot(K_Y,K_Y.T)[:,:,None]*dKmm_dZ,-2)
-        Bz1 = -np.array([np.sum((QiY**2)*d[:,:,None],-2) for d in dQnn_dZ_diag.transpose(2,0,1)]).reshape(self.D,self.M).T
-        Bz = Bz1 + Bz2 + Bz3
-        dL_dZ = (Az + Bz).flatten()
-        return np.hstack([dL_dtheta, dL_dZ])
-
-    def predict(self,X):
-        """
-        Make a prediction for the vsGP model
-
-        Arguments
-        ---------
-        X : Input prediction data - Nx1 numpy array (floats)
-        """
-        #TODO: check output dimensions
-        K_x = self.kernel.K(self.Z,X)
-        Kxx = self.kernel.K(X)
-        #K_x = self.kernM.cross.K(X)
-        # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
-
-        # Ci = I + (RPT0)Di(RPT0).T
-        # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
-        #   = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
-        #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
-        #   = I - V.T * V
-        U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
-        V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
-        C = np.eye(self.M) - np.dot(V.T,V)
-        mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
-        #self.C = C
-        #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
-        #self.mu_u = mu_u
-        #self.U = U
-        # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
-        mu_H = np.dot(mu_u,self.mu)
-        self.mu_H = mu_H
-        Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
-        # q(f_star|y) = N(f_star|mu_star,sigma2_star)
-        KR0T = np.dot(K_x.T,self.R0.T)
-        mu_star = np.dot(KR0T,mu_H)
-        sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
-        vdiag = np.diag(sigma2_star)
-        # q(y_star|y) = non-gaussian posterior probability of class membership
-        p = self.likelihood.predictive_mean(mu_star,vdiag)
-        return mu_star,vdiag,p
-
-    def plot(self):
-        """
-        Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
-        """
-        if self.X.shape[1]==1:
-            pb.figure()
-            xmin,xmax = np.r_[self.X,self.Z].min(),np.r_[self.X,self.Z].max()
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-            Xnew = np.linspace(xmin,xmax,100)[:,None]
-            mu_f, var_f, mu_phi = self.predict(Xnew)
-            self.mu_inducing,self.var_diag_inducing,self.phi_inducing = self.predict(self.Z)
-            pb.subplot(211)
-            self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.Z,Mean_u=self.mu_inducing,Var_u=self.var_diag_inducing)
-            pb.subplot(212)
-            self.likelihood.plot1Db(self.X,Xnew,mu_phi,self.Z)
-        elif self.X.shape[1]==2:
-            pb.figure()
-            x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
-            x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
-            x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
-            x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
-            axis1 = np.linspace(x1min,x1max,50)
-            axis2 = np.linspace(x2min,x2max,50)
-            XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
-            Xnew = np.c_[XX1.flatten(),XX2.flatten()]
-            f,v,p = self.predict(Xnew)
-            self.likelihood.plot2D(self.X,Xnew,p,self.Z)
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
-
-    def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
-        """
-        Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
-        """
-        self.epsilon_em = epsilon
-        log_likelihood_change = self.epsilon_em + 1.
-        self.parameters_path = [self.kernel._get_params()]
-        self.approximate_likelihood()
-        self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
-        self.inducing_inputs_path = [self.Z]
-        self.log_likelihood_path = [self.log_likelihood()]
-        iteration = 0
-        while log_likelihood_change > self.epsilon_em:
-            print 'EM iteration', iteration
-            self.optimize(max_f_eval = max_f_eval)
-            log_likelihood_new = self.log_likelihood()
-            log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
-            if log_likelihood_change < 0:
-                print 'log_likelihood decrement'
-                self.kernel._set_params_transformed(self.parameters_path[-1])
-                self.kernM = self.kernel.copy()
-                slef.kernM.expand_X(self.iducing_inputs_path[-1])
-                self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
-
-            else:
-                self.approximate_likelihood()
-                self.log_likelihood_path.append(self.log_likelihood())
-                self.parameters_path.append(self.kernel._get_params())
-                self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
-                self.inducing_inputs_path.append(self.Z)
-            iteration += 1
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
new file mode 100644
index 00000000..3239d462
--- /dev/null
+++ b/GPy/models/sparse_GP.py
@@ -0,0 +1,217 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv
+from ..util.plot import gpplot
+from .. import kern
+from GP import GP
+
+#Still TODO:
+# make use of slices properly (kernel can now do this)
+# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
+
+class sparse_GP(GP):
+    """
+    Variational sparse GP model
+
+    :param X: inputs
+    :type X: np.ndarray (N x Q)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP)
+    :param kernel : the kernel/covariance function. See link kernels
+    :type kernel: a GPy kernel
+    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
+    :type X_uncertainty: np.ndarray (N x Q) | None
+    :param Z: inducing inputs (optional, see note)
+    :type Z: np.ndarray (M x Q) | None
+    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
+    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :type M: int
+    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
+    :type normalize_(X|Y): bool
+    """
+
+    def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
+        self.scale_factor = 1.0# a scaling factor to help keep the algorithm stable
+
+        self.Z = Z
+        self.Zslices = Zslices
+        self.Xslices = Xslices
+        self.M = Z.shape[0]
+        self.likelihood = likelihood
+
+        if X_uncertainty is None:
+            self.has_uncertain_inputs=False
+        else:
+            assert X_uncertainty.shape==X.shape
+            self.has_uncertain_inputs=True
+            self.X_uncertainty = X_uncertainty
+
+        GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
+
+        #normalise X uncertainty also
+        if self.has_uncertain_inputs:
+            self.X_uncertainty /= np.square(self._Xstd)
+
+    def _computations(self):
+        # TODO find routine to multiply triangular matrices
+        #TODO: slices for psi statistics (easy enough)
+
+        sf = self.scale_factor
+        sf2 = sf**2
+
+        # kernel computations, using BGPLVM notation
+        self.Kmm = self.kern.K(self.Z)
+        if self.has_uncertain_inputs:
+            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
+            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
+            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
+            if self.likelihood.is_heteroscedastic:
+                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
+                #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
+            else:
+                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
+        else:
+            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
+            self.psi1 = self.kern.K(self.Z,self.X)
+            if self.likelihood.is_heteroscedastic:
+                tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
+            else:
+                tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
+            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+            self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
+
+        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
+
+        self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
+        self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
+        self.B = np.eye(self.M)/sf2 + self.A
+
+        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
+
+        self.psi1V = np.dot(self.psi1, self.V)
+        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
+        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
+        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
+
+        # Compute dL_dpsi # FIXME: this is untested for the het. case
+        self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
+        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
+        if self.likelihood.is_heteroscedastic:
+            self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
+            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
+            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
+        else:
+            self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi[None,:,:] # dB
+            self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C[None,:,:] # dC
+            self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E[None,:,:] # dD
+
+        # Compute dL_dKmm
+        self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
+        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
+        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
+
+        #the partial derivative vector for the likelihood
+        if self.likelihood.Nparams ==0:
+            #save computation here.
+            self.partial_for_likelihood = None
+        elif self.likelihood.is_heteroscedastic:
+            raise NotImplementedError, "heteroscedatic derivates not implemented"
+            #self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
+            #self.partial_for_likelihood +=  0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
+            #self.partial_for_likelihood +=  0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
+            #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
+        else:
+            #likelihood is not heterscedatic
+            beta = self.likelihood.precision
+            dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
+            dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
+            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
+            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
+            self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
+
+
+    def _set_params(self, p):
+        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
+        self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
+        self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
+        self._computations()
+
+    def _get_params(self):
+        return np.hstack([self.Z.flatten(),GP._get_params(self)])
+
+    def _get_param_names(self):
+        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
+
+    def log_likelihood(self):
+        """ Compute the (lower bound on the) log marginal likelihood """
+        sf2 = self.scale_factor**2
+        if self.likelihood.is_heteroscedastic:
+            A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
+        else:
+            A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
+        B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
+        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
+        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
+        return A+B+C+D
+
+    def _log_likelihood_gradients(self):
+        return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
+
+    def dL_dtheta(self):
+        """
+        Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
+        """
+        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
+        if self.has_uncertain_inputs:
+            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
+            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
+            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
+        else:
+            #re-cast computations in psi2 back to psi1:
+            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
+            dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
+            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
+
+        return dL_dtheta
+
+    def dL_dZ(self):
+        """
+        The derivative of the bound wrt the inducing inputs Z
+        """
+        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
+        if self.has_uncertain_inputs:
+            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
+            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
+        else:
+            #re-cast computations in psi2 back to psi1:
+            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
+            dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
+        return dL_dZ
+
+    def _raw_predict(self, Xnew, slices, full_cov=False):
+        """Internal helper function for making predictions, does not account for normalisation"""
+
+        Kx = self.kern.K(self.Z, Xnew)
+        mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
+        if full_cov:
+            Kxx = self.kern.K(Xnew)
+            var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
+        else:
+            Kxx = self.kern.Kdiag(Xnew)
+            var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
+
+        return mu,var[:,None]
+
+    def plot(self, *args, **kwargs):
+        """
+        Plot the fitted model: just call the GP plot function and then add inducing inputs
+        """
+        GP.plot(self,*args,**kwargs)
+        if self.Q==1:
+            if self.has_uncertain_inputs:
+                pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
+        if self.Q==2:
+            pb.plot(self.Z[:,0],self.Z[:,1],'wo')
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 07ce4d97..178c8023 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -1,205 +1,44 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+
 import numpy as np
-import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
-from ..util.plot import gpplot
+from sparse_GP import sparse_GP
+from .. import likelihoods
 from .. import kern
-from ..inference.likelihoods import likelihood
-from GP_regression import GP_regression
 
-#Still TODO:
-# make use of slices properly (kernel can now do this)
-# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
-
-class sparse_GP_regression(GP_regression):
+class sparse_GP_regression(sparse_GP):
     """
-    Variational sparse GP model (Regression)
+    Gaussian Process model for regression
+
+    This is a thin wrapper around the GP class, with a set of sensible defalts
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to rbf+white
+    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_X: False|True
+    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_Y: False|True
+    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
+    :rtype: model object
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
 
-    :param X: inputs
-    :type X: np.ndarray (N x Q)
-    :param Y: observed data
-    :type Y: np.ndarray of observations (N x D)
-    :param kernel : the kernel/covariance function. See link kernels
-    :type kernel: a GPy kernel
-    :param Z: inducing inputs (optional, see note)
-    :type Z: np.ndarray (M x Q) | None
-    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
-    :type X_uncertainty: np.ndarray (N x Q) | None
-    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
-    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
-    :type M: int
-    :param beta: noise precision. TODO> ignore beta if doing EP
-    :type beta: float
-    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
-    :type normalize_(X|Y): bool
     """
 
-    def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
-        self.scale_factor = 1000.0
-        self.beta = beta
+    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
+        #kern defaults to rbf
+        if kernel is None:
+            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
+
+        #Z defaults to a subset of the data
         if Z is None:
-            self.Z = np.random.permutation(X.copy())[:M]
-            self.M = M
+            Z = np.random.permutation(X.copy())[:M]
         else:
             assert Z.shape[1]==X.shape[1]
-            self.Z = Z
-            self.M = Z.shape[0]
-        if X_uncertainty is None:
-            self.has_uncertain_inputs=False
-        else:
-            assert X_uncertainty.shape==X.shape
-            self.has_uncertain_inputs=True
-            self.X_uncertainty = X_uncertainty
 
-        GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
-        self.trYYT = np.sum(np.square(self.Y))
+        #likelihood defaults to Gaussian
+        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
-        #normalise X uncertainty also
-        if self.has_uncertain_inputs:
-            self.X_uncertainty /= np.square(self._Xstd)
-
-    def _computations(self):
-        # TODO find routine to multiply triangular matrices
-        #TODO: slices for psi statistics (easy enough)
-
-        # kernel computations, using BGPLVM notation
-        self.Kmm = self.kern.K(self.Z)
-        if self.has_uncertain_inputs:
-            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
-            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
-            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
-            self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
-        else:
-            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
-            self.psi1 = self.kern.K(self.Z,self.X)
-            #self.psi2 = np.dot(self.psi1,self.psi1.T)
-            #self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
-            tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
-            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
-
-        sf = self.scale_factor
-        sf2 = sf**2
-
-        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
-
-        self.V = (self.beta/self.scale_factor)*self.Y
-        self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
-        self.B = np.eye(self.M)/sf2 + self.A
-
-        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
-
-        self.psi1V = np.dot(self.psi1, self.V)
-        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
-        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
-
-        # Compute dL_dpsi
-        self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
-        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
-        self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
-        self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
-        self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
-
-        # Compute dL_dKmm
-        self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
-        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
-        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
-
-
-    def _set_params(self, p):
-        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
-        self.beta = p[self.M*self.Q]
-        self.kern._set_params(p[self.Z.size + 1:])
-        self._computations()
-
-    def _get_params(self):
-        return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
-
-    def _get_param_names(self):
-        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
-
-        
-    def log_likelihood(self):
-        """ Compute the (lower bound on the) log marginal likelihood """
-        sf2 = self.scale_factor**2
-        A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
-        B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
-        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
-        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
-        return A+B+C+D
-
-    def _log_likelihood_gradients(self):
-        return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
-    
-    def dL_dbeta(self):
-        """
-        Compute the gradient of the log likelihood wrt beta.
-        """
-        #TODO: suport heteroscedatic noise
-        sf2 = self.scale_factor**2
-        dA_dbeta =   0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
-        dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
-        dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
-        dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
-
-        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
-
-    def dL_dtheta(self):
-        """
-        Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
-        """
-        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
-        if self.has_uncertain_inputs:
-            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
-            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
-            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
-        else:
-            #re-cast computations in psi2 back to psi1:
-            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
-            dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
-            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
-
-        return dL_dtheta
-
-    def dL_dZ(self):
-        """
-        The derivative of the bound wrt the inducing inputs Z
-        """
-        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
-        if self.has_uncertain_inputs:
-            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
-            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
-        else:
-            #re-cast computations in psi2 back to psi1:
-            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
-            dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
-        return dL_dZ
-
-    def _raw_predict(self, Xnew, slices, full_cov=False):
-        """Internal helper function for making predictions, does not account for normalisation"""
-
-        Kx = self.kern.K(self.Z, Xnew)
-        mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
-
-        if full_cov:
-            Kxx = self.kern.K(Xnew)
-            var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
-        else:
-            Kxx = self.kern.Kdiag(Xnew)
-            var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
-
-        return mu,var
-
-    def plot(self, *args, **kwargs):
-        """
-        Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
-        """
-        GP_regression.plot(self,*args,**kwargs)
-        if self.Q==1:
-            pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
-            if self.has_uncertain_inputs:
-                pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
-        if self.Q==2:
-            pb.plot(self.Z[:,0],self.Z[:,1],'wo')
+        sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index 3bb72e60..0fccfc71 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -6,7 +6,7 @@ import pylab as pb
 from ..util.linalg import mdot, jitchol, chol_inv, pdinv
 from ..util.plot import gpplot
 from .. import kern
-from ..inference.likelihoods import likelihood
+from ..likelihoods import likelihood
 from sparse_GP_regression import sparse_GP_regression
 
 class uncollapsed_sparse_GP(sparse_GP_regression):
@@ -136,8 +136,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
         #dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
         dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
 
-        #dL_dSim = 
-        #dL_dmhSi = 
+        #dL_dSim =
+        #dL_dmhSi =
 
         return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten()))  # natgrad only, grad TODO
 
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index a302b25f..61fb15bb 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
         m.constrain_positive('(linear|bias|white)')
         self.assertTrue(m.checkgrad())
 
-    def test_GP_EP(self):
-        return # Disabled TODO
+    def test_GP_EP_probit(self):
         N = 20
-        X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
-        k = GPy.kern.rbf(1) + GPy.kern.white(1)
-        Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
-        likelihood = GPy.inference.likelihoods.probit(Y)
-        m = GPy.models.GP_EP(X,likelihood,k)
-        m.constrain_positive('(var|len)')
-        m.approximate_likelihood()
-        self.assertTrue(m.checkgrad())
+        X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
+        Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
+        kernel = GPy.kern.rbf(1)
+        distribution = GPy.likelihoods.likelihood_functions.probit()
+        likelihood = GPy.likelihoods.EP(Y, distribution)
+        m = GPy.models.GP(X, likelihood, kernel)
+        m.ensure_default_constraints()
+        self.assertTrue(m.EPEM)
 
     @unittest.skip("FITC will be broken for a while")
     def test_generalized_FITC(self):
diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index 8c06633e..8e71764d 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -6,30 +6,26 @@ import Tango
 import pylab as pb
 import numpy as np
 
-def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
+def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
     if axes is None:
         axes = pb.gca()
     mu = mu.flatten()
     x = x.flatten()
+    lower = lower.flatten()
+    upper = upper.flatten()
 
     #here's the mean
     axes.plot(x,mu,color=edgecol,linewidth=2)
 
-    #ensure variance is a vector
-    if len(var.shape)>1:
-        err = 2*np.sqrt(np.diag(var))
-    else:
-        err = 2*np.sqrt(var)
-
-    #here's the 2*std box
+    #here's the box
     kwargs['linewidth']=0.5
     if not 'alpha' in kwargs.keys():
         kwargs['alpha'] = 0.3
-    axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
+    axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
 
     #this is the edge:
-    axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
-    axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
+    axes.plot(x,upper,color=edgecol,linewidth=0.2)
+    axes.plot(x,lower,color=edgecol,linewidth=0.2)
 
 def removeRightTicks(ax=None):
     ax = ax or pb.gca()
@@ -74,4 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
         else:
             removeUpperTicks()
 
+def x_frame1D(X,plot_limits=None,resolution=None):
+    """
+    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
+    """
+    assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
+    if plot_limits is None:
+        xmin,xmax = X.min(0),X.max(0)
+        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
+    elif len(plot_limits)==2:
+        xmin, xmax = plot_limits
+    else:
+        raise ValueError, "Bad limits for plotting"
 
+    Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
+    return Xnew, xmin, xmax
+
+def x_frame2D(X,plot_limits=None,resolution=None):
+    """
+    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
+    """
+    assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs"
+    if plot_limits is None:
+        xmin,xmax = X.min(0),X.max(0)
+        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
+    elif len(plot_limits)==2:
+        xmin, xmax = plot_limits
+    else:
+        raise ValueError, "Bad limits for plotting"
+
+    resolution = resolution or 50
+    xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
+    Xnew = np.vstack((xx.flatten(),yy.flatten())).T
+    return Xnew, xx, yy, xmin, xmax
diff --git a/doc/Figures/kern-def.png b/doc/Figures/kern-def.png
new file mode 100644
index 00000000..bad43b09
Binary files /dev/null and b/doc/Figures/kern-def.png differ
diff --git a/doc/Figures/tuto_GP_regression_m1.png b/doc/Figures/tuto_GP_regression_m1.png
index c78d8a04..e1a11fb1 100644
Binary files a/doc/Figures/tuto_GP_regression_m1.png and b/doc/Figures/tuto_GP_regression_m1.png differ
diff --git a/doc/Figures/tuto_GP_regression_m2.png b/doc/Figures/tuto_GP_regression_m2.png
index b976a69c..7e54e919 100644
Binary files a/doc/Figures/tuto_GP_regression_m2.png and b/doc/Figures/tuto_GP_regression_m2.png differ
diff --git a/doc/Figures/tuto_GP_regression_m3.png b/doc/Figures/tuto_GP_regression_m3.png
index a675a463..5b2b227c 100644
Binary files a/doc/Figures/tuto_GP_regression_m3.png and b/doc/Figures/tuto_GP_regression_m3.png differ
diff --git a/doc/Figures/tuto_kern_overview_add_orth.png b/doc/Figures/tuto_kern_overview_add_orth.png
new file mode 100644
index 00000000..0d4f1c4e
Binary files /dev/null and b/doc/Figures/tuto_kern_overview_add_orth.png differ
diff --git a/doc/Figures/tuto_kern_overview_allkern.png b/doc/Figures/tuto_kern_overview_allkern.png
new file mode 100644
index 00000000..f3406b07
Binary files /dev/null and b/doc/Figures/tuto_kern_overview_allkern.png differ
diff --git a/doc/Figures/tuto_kern_overview_basicdef.png b/doc/Figures/tuto_kern_overview_basicdef.png
new file mode 100644
index 00000000..bad43b09
Binary files /dev/null and b/doc/Figures/tuto_kern_overview_basicdef.png differ
diff --git a/doc/Figures/tuto_kern_overview_mANOVA.png b/doc/Figures/tuto_kern_overview_mANOVA.png
new file mode 100644
index 00000000..db49e3bd
Binary files /dev/null and b/doc/Figures/tuto_kern_overview_mANOVA.png differ
diff --git a/doc/Figures/tuto_kern_overview_mANOVAdec.png b/doc/Figures/tuto_kern_overview_mANOVAdec.png
new file mode 100644
index 00000000..ef154263
Binary files /dev/null and b/doc/Figures/tuto_kern_overview_mANOVAdec.png differ
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index 244e3012..59ffd43d 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -33,6 +33,14 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`poisson` Module
+---------------------
+
+.. automodule:: GPy.examples.poisson
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`regression` Module
 ------------------------
 
@@ -57,6 +65,14 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`sparse_ep_fix` Module
+---------------------------
+
+.. automodule:: GPy.examples.sparse_ep_fix
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`uncertain_input_GP_regression_demo` Module
 ------------------------------------------------
 
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
index 6f4ab691..357e70c7 100644
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@@ -1,22 +1,6 @@
 inference Package
 =================
 
-:mod:`Expectation_Propagation` Module
--------------------------------------
-
-.. automodule:: GPy.inference.Expectation_Propagation
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`likelihoods` Module
--------------------------
-
-.. automodule:: GPy.inference.likelihoods
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`optimization` Module
 --------------------------
 
diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst
new file mode 100644
index 00000000..34672d11
--- /dev/null
+++ b/doc/GPy.likelihoods.rst
@@ -0,0 +1,43 @@
+likelihoods Package
+===================
+
+:mod:`likelihoods` Package
+--------------------------
+
+.. automodule:: GPy.likelihoods
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`EP` Module
+----------------
+
+.. automodule:: GPy.likelihoods.EP
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`Gaussian` Module
+----------------------
+
+.. automodule:: GPy.likelihoods.Gaussian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`likelihood` Module
+------------------------
+
+.. automodule:: GPy.likelihoods.likelihood
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`likelihood_functions` Module
+----------------------------------
+
+.. automodule:: GPy.likelihoods.likelihood_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index b0a7a298..8837ac4e 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -17,18 +17,18 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`GPLVM` Module
--------------------
+:mod:`GP` Module
+----------------
 
-.. automodule:: GPy.models.GPLVM
+.. automodule:: GPy.models.GP
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`GP_EP` Module
+:mod:`GPLVM` Module
 -------------------
 
-.. automodule:: GPy.models.GP_EP
+.. automodule:: GPy.models.GPLVM
     :members:
     :undoc-members:
     :show-inheritance:
@@ -41,10 +41,10 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`generalized_FITC` Module
-------------------------------
+:mod:`sparse_GP` Module
+-----------------------
 
-.. automodule:: GPy.models.generalized_FITC
+.. automodule:: GPy.models.sparse_GP
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index d3c1e843..3fd4bcfd 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -18,6 +18,7 @@ Subpackages
     GPy.examples
     GPy.inference
     GPy.kern
+    GPy.likelihoods
     GPy.models
     GPy.util
 
diff --git a/doc/Makefile b/doc/Makefile
index faa4ed65..95018f47 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -41,6 +41,7 @@ help:
 clean:
 	-rm -rf $(BUILDDIR)/*
 
+
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
diff --git a/doc/conf.py b/doc/conf.py
index 2a02e18d..8a05f386 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,101 +11,49 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os
+import sys
+import os
 
-#Mocking uninstalled modules: https://read-the-docs.readthedocs.org/en/latest/faq.html
-
-#class Mock(object):
-    #__all__ = []
-    #def __init__(self, *args, **kwargs):
-        #for key, value in kwargs.iteritems():
-            #setattr(self, key, value)
-
-    #def __call__(self, *args, **kwargs):
-        #return Mock()
-
-    #__add__  = __mul__  = __getitem__ = __setitem__ = \
-#__delitem__ = __sub__ =  __floordiv__ = __mod__ = __divmod__ = \
-#__pow__ = __lshift__ = __rshift__ = __and__ = __xor__ = __or__ = \
-#__rmul__  = __rsub__  = __rfloordiv__ = __rmod__ = __rdivmod__ = \
-#__rpow__ = __rlshift__ = __rrshift__ = __rand__ = __rxor__ = __ror__ = \
-#__imul__  = __isub__  = __ifloordiv__ = __imod__ = __idivmod__ = \
-#__ipow__ = __ilshift__ = __irshift__ = __iand__ = __ixor__ = __ior__ = \
-#__neg__ = __pos__ = __abs__ = __invert__ = __call__
-
-    #def __getattr__(self, name):
-        #if name in ('__file__', '__path__'):
-            #return '/dev/null'
-        #if name == 'sqrt':
-            #return math.sqrt
-        #elif name[0] != '_' and name[0] == name[0].upper():
-            #return type(name, (), {})
-        #else:
-            #return Mock(**vars(self))
-
-    #def __lt__(self, *args, **kwargs):
-        #return True
-
-    #__nonzero__ = __le__ = __eq__ = __ne__ = __gt__ = __ge__ = __contains__ = \
-#__lt__
-
-
-    #def __repr__(self):
-        ## Use _mock_repr to fake the __repr__ call
-        #res = getattr(self, "_mock_repr")
-        #return res if isinstance(res, str) else "Mock"
-
-    #def __hash__(self):
-        #return 1
-
-    #__len__ = __int__ = __long__ = __index__ = __hash__
-    
-    #def __oct__(self):
-        #return '01'
-
-    #def __hex__(self):
-        #return '0x1'
-
-    #def __float__(self):
-        #return 0.1
-
-    #def __complex__(self):
-        #return 1j
-
-
-#MOCK_MODULES = [
-    #'pylab', 'scipy', 'matplotlib', 'matplotlib.pyplot', 'pyfits',
-    #'scipy.constants.constants', 'matplotlib.cm',
-    #'matplotlib.image', 'matplotlib.colors', 'sunpy.cm',
-    #'pandas', 'pandas.io', 'pandas.io.parsers',
-    #'suds', 'matplotlib.ticker', 'matplotlib.colorbar',
-    #'matplotlib.dates', 'scipy.optimize', 'scipy.ndimage',
-    #'matplotlib.figure', 'scipy.ndimage.interpolation', 'bs4']
-#for mod_name in MOCK_MODULES:
-    #sys.modules[mod_name] = Mock()
-
-
-#sys.modules['numpy'] = Mock(pi=math.pi, G=6.67364e-11,
-                            #ndarray=type('ndarray', (), {}),
-                            #dtype=lambda _: Mock(_mock_repr='np.dtype(\'float32\')'))
-#sys.modules['scipy.constants'] = Mock(pi=math.pi, G=6.67364e-11)
+print "python exec:", sys.executable
+print "sys.path:", sys.path
+try:
+    import numpy
+    print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
+except ImportError:
+    print "no numpy"
+try:
+    import matplotlib
+    print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__)
+except ImportError:
+    print "no matplotlib"
+try:
+    import ipython
+    print "ipython: %s, %s" % (ipython.__version__, ipython.__file__)
+except ImportError:
+    print "no ipython"
+try:
+    import sphinx
+    print "sphinx: %s, %s" % (sphinx.__version__, sphinx.__file__)
+except ImportError:
+    print "no sphinx"
 
+print "sys.path:", sys.path
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('..'))
+#sys.path.insert(0, os.path.abspath('../GPy'))
+
+#print "sys.path.after:", sys.path
 
-print "Adding path"
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-#sys.path.append(os.path.abspath('./sphinxext'))
+sys.path.append(os.path.abspath('sphinxext'))
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
 #sys.path.insert(0, os.path.abspath('./sphinxext'))
 
 # -- General configuration -----------------------------------------------------
@@ -116,23 +64,24 @@ print "Adding path"
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 print "Importing extensions"
-              
-extensions = [#'ipython_directive',
-              'sphinx.ext.autodoc', 'sphinx.ext.viewcode'
-              #'matplotlib.sphinxext.mathmpl',
-              #'matplotlib.sphinxext.only_directives',
-              #'matplotlib.sphinxext.plot_directive',
-              #'ipython_directive'
-            ]
-              #'sphinx.ext.doctest',
-              #'ipython_console_highlighting',
-              #'inheritance_diagram',
-              #'numpydoc'] 
+
+extensions = ['sphinx.ext.autodoc',
+              #'sphinx.ext.doctest'
+              'sphinx.ext.viewcode',
+              'sphinx.ext.pngmath',
+              'ipython_directive',
+              'ipython_console_highlighting'
+              #'matplotlib.sphinxext.plot_directive'
+             ]
+plot_formats = [('png', 80), ('pdf', 50)]
+
 print "finished importing"
 
 ##############################################################################
 ##
 ## Mock out imports with C dependencies because ReadTheDocs can't build them.
+#############################################################################
+
 class Mock(object):
     def __init__(self, *args, **kwargs):
         pass
@@ -151,23 +100,38 @@ class Mock(object):
         else:
             return Mock()
 
-#sys.path.append("../GPy")
 #import mock
 
 print "Mocking"
-MOCK_MODULES = ['pylab', 'matplotlib', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser']#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
+MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
+#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = Mock()
 
 # ----------------------- READTHEDOCS ------------------
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
+on_rtd = True
 if on_rtd:
-  sys.path.append("../GPy")
-  os.system("pwd")
-  os.system("sphinx-apidoc -f -o . ../GPy")
-  #os.system("cd ..")
-  #os.system("cd ./docs")
+    sys.path.append(os.path.abspath('../GPy'))
+
+    import subprocess
+
+    proc = subprocess.Popen("pwd", stdout=subprocess.PIPE, shell=True)
+    (out, err) = proc.communicate()
+    print "program output:", out
+    proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True)
+    (out, err) = proc.communicate()
+    print "program output:", out
+    proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
+    (out, err) = proc.communicate()
+    print "program output:", out
+    #proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True)
+    #(out, err) = proc.communicate()
+    #print "program output:", out
+    #proc = subprocess.Popen("whereis matplotlib", stdout=subprocess.PIPE, shell=True)
+    #(out, err) = proc.communicate()
+    #print "program output:", out
 
 print "Compiled files"
 
@@ -317,21 +281,21 @@ htmlhelp_basename = 'GPydoc'
 # -- Options for LaTeX output --------------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
 
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
 
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'GPy.tex', u'GPy Documentation',
-   u'Author', 'manual'),
+    ('index', 'GPy.tex', u'GPy Documentation',
+    u'Author', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -374,9 +338,9 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'GPy', u'GPy Documentation',
-   u'Author', 'GPy', 'One line description of project.',
-   'Miscellaneous'),
+    ('index', 'GPy', u'GPy Documentation',
+    u'Author', 'GPy', 'One line description of project.',
+    'Miscellaneous'),
 ]
 
 # Documents to append as an appendix to all manuals.
@@ -431,17 +395,4 @@ epub_copyright = u'2013, Author'
 # Allow duplicate toc entries.
 #epub_tocdup = True
 
-#############################################################################
-#
-# Include constructors in all the docs
-# Got this method from:
-# http://stackoverflow.com/questions/5599254/how-to-use-sphinxs-autodoc-to-document-a-classs-init-self-method
-#def skip(app, what, name, obj, skip, options):
-    #if name == "__init__":
-        #return False
-    #return skip
-
-#def setup(app):
-    #app.connect("autodoc-skip-member", skip)
-
-
+autodoc_member_order = "source"
diff --git a/doc/doc-requirements.txt b/doc/doc-requirements.txt
new file mode 100644
index 00000000..0b5ac59b
--- /dev/null
+++ b/doc/doc-requirements.txt
@@ -0,0 +1,3 @@
+ipython
+numpy
+scipy
diff --git a/doc/index.rst b/doc/index.rst
index 28690e99..b62ff6a7 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -8,8 +8,8 @@ Welcome to GPy's documentation!
 For a quick start, you can have a look at one of the tutorials:
 
 * `Basic Gaussian process regression <tuto_GP_regression.html>`_  
+* `A kernel overview <tuto_kernel_overview.html>`_ 
 * Advanced GP regression (Forthcoming)
-* Kernel manipulation (Forthcoming)
 * Writting kernels (Forthcoming)
 
 You may also be interested by some examples in the GPy/examples folder.
@@ -28,4 +28,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-
diff --git a/doc/mock.py b/doc/mock.py
deleted file mode 100644
index cc4aa653..00000000
--- a/doc/mock.py
+++ /dev/null
@@ -1,2366 +0,0 @@
-# mock.py
-# Test tools for mocking and patching.
-# Copyright (C) 2007-2012 Michael Foord & the mock team
-# E-mail: fuzzyman AT voidspace DOT org DOT uk
-
-# mock 1.0.1
-# http://www.voidspace.org.uk/python/mock/
-
-# Released subject to the BSD License
-# Please see http://www.voidspace.org.uk/python/license.shtml
-
-__all__ = (
-    'Mock',
-    'MagicMock',
-    'patch',
-    'sentinel',
-    'DEFAULT',
-    'ANY',
-    'call',
-    'create_autospec',
-    'FILTER_DIR',
-    'NonCallableMock',
-    'NonCallableMagicMock',
-    'mock_open',
-    'PropertyMock',
-)
-
-
-__version__ = '1.0.1'
-
-
-import pprint
-import sys
-
-try:
-    import inspect
-except ImportError:
-    # for alternative platforms that
-    # may not have inspect
-    inspect = None
-
-try:
-    from functools import wraps as original_wraps
-except ImportError:
-    # Python 2.4 compatibility
-    def wraps(original):
-        def inner(f):
-            f.__name__ = original.__name__
-            f.__doc__ = original.__doc__
-            f.__module__ = original.__module__
-            wrapped = getattr(original, '__wrapped__', original)
-            f.__wrapped__ = wrapped
-            return f
-        return inner
-else:
-    if sys.version_info[:2] >= (3, 2):
-        wraps = original_wraps
-    else:
-        def wraps(func):
-            def inner(f):
-                f = original_wraps(func)(f)
-                wrapped = getattr(func, '__wrapped__', func)
-                f.__wrapped__ = wrapped
-                return f
-            return inner
-
-try:
-    unicode
-except NameError:
-    # Python 3
-    basestring = unicode = str
-
-try:
-    long
-except NameError:
-    # Python 3
-    long = int
-
-try:
-    BaseException
-except NameError:
-    # Python 2.4 compatibility
-    BaseException = Exception
-
-try:
-    next
-except NameError:
-    def next(obj):
-        return obj.next()
-
-
-BaseExceptions = (BaseException,)
-if 'java' in sys.platform:
-    # jython
-    import java
-    BaseExceptions = (BaseException, java.lang.Throwable)
-
-try:
-    _isidentifier = str.isidentifier
-except AttributeError:
-    # Python 2.X
-    import keyword
-    import re
-    regex = re.compile(r'^[a-z_][a-z0-9_]*$', re.I)
-    def _isidentifier(string):
-        if string in keyword.kwlist:
-            return False
-        return regex.match(string)
-
-
-inPy3k = sys.version_info[0] == 3
-
-# Needed to work around Python 3 bug where use of "super" interferes with
-# defining __class__ as a descriptor
-_super = super
-
-self = 'im_self'
-builtin = '__builtin__'
-if inPy3k:
-    self = '__self__'
-    builtin = 'builtins'
-
-FILTER_DIR = True
-
-
-def _is_instance_mock(obj):
-    # can't use isinstance on Mock objects because they override __class__
-    # The base class for all mocks is NonCallableMock
-    return issubclass(type(obj), NonCallableMock)
-
-
-def _is_exception(obj):
-    return (
-        isinstance(obj, BaseExceptions) or
-        isinstance(obj, ClassTypes) and issubclass(obj, BaseExceptions)
-    )
-
-
-class _slotted(object):
-    __slots__ = ['a']
-
-
-DescriptorTypes = (
-    type(_slotted.a),
-    property,
-)
-
-
-def _getsignature(func, skipfirst, instance=False):
-    if inspect is None:
-        raise ImportError('inspect module not available')
-
-    if isinstance(func, ClassTypes) and not instance:
-        try:
-            func = func.__init__
-        except AttributeError:
-            return
-        skipfirst = True
-    elif not isinstance(func, FunctionTypes):
-        # for classes where instance is True we end up here too
-        try:
-            func = func.__call__
-        except AttributeError:
-            return
-
-    if inPy3k:
-        try:
-            argspec = inspect.getfullargspec(func)
-        except TypeError:
-            # C function / method, possibly inherited object().__init__
-            return
-        regargs, varargs, varkw, defaults, kwonly, kwonlydef, ann = argspec
-    else:
-        try:
-            regargs, varargs, varkwargs, defaults = inspect.getargspec(func)
-        except TypeError:
-            # C function / method, possibly inherited object().__init__
-            return
-
-    # instance methods and classmethods need to lose the self argument
-    if getattr(func, self, None) is not None:
-        regargs = regargs[1:]
-    if skipfirst:
-        # this condition and the above one are never both True - why?
-        regargs = regargs[1:]
-
-    if inPy3k:
-        signature = inspect.formatargspec(
-            regargs, varargs, varkw, defaults,
-            kwonly, kwonlydef, ann, formatvalue=lambda value: "")
-    else:
-        signature = inspect.formatargspec(
-            regargs, varargs, varkwargs, defaults,
-            formatvalue=lambda value: "")
-    return signature[1:-1], func
-
-
-def _check_signature(func, mock, skipfirst, instance=False):
-    if not _callable(func):
-        return
-
-    result = _getsignature(func, skipfirst, instance)
-    if result is None:
-        return
-    signature, func = result
-
-    # can't use self because "self" is common as an argument name
-    # unfortunately even not in the first place
-    src = "lambda _mock_self, %s: None" % signature
-    checksig = eval(src, {})
-    _copy_func_details(func, checksig)
-    type(mock)._mock_check_sig = checksig
-
-
-def _copy_func_details(func, funcopy):
-    funcopy.__name__ = func.__name__
-    funcopy.__doc__ = func.__doc__
-    #funcopy.__dict__.update(func.__dict__)
-    funcopy.__module__ = func.__module__
-    if not inPy3k:
-        funcopy.func_defaults = func.func_defaults
-        return
-    funcopy.__defaults__ = func.__defaults__
-    funcopy.__kwdefaults__ = func.__kwdefaults__
-
-
-def _callable(obj):
-    if isinstance(obj, ClassTypes):
-        return True
-    if getattr(obj, '__call__', None) is not None:
-        return True
-    return False
-
-
-def _is_list(obj):
-    # checks for list or tuples
-    # XXXX badly named!
-    return type(obj) in (list, tuple)
-
-
-def _instance_callable(obj):
-    """Given an object, return True if the object is callable.
-    For classes, return True if instances would be callable."""
-    if not isinstance(obj, ClassTypes):
-        # already an instance
-        return getattr(obj, '__call__', None) is not None
-
-    klass = obj
-    # uses __bases__ instead of __mro__ so that we work with old style classes
-    if klass.__dict__.get('__call__') is not None:
-        return True
-
-    for base in klass.__bases__:
-        if _instance_callable(base):
-            return True
-    return False
-
-
-def _set_signature(mock, original, instance=False):
-    # creates a function with signature (*args, **kwargs) that delegates to a
-    # mock. It still does signature checking by calling a lambda with the same
-    # signature as the original.
-    if not _callable(original):
-        return
-
-    skipfirst = isinstance(original, ClassTypes)
-    result = _getsignature(original, skipfirst, instance)
-    if result is None:
-        # was a C function (e.g. object().__init__ ) that can't be mocked
-        return
-
-    signature, func = result
-
-    src = "lambda %s: None" % signature
-    checksig = eval(src, {})
-    _copy_func_details(func, checksig)
-
-    name = original.__name__
-    if not _isidentifier(name):
-        name = 'funcopy'
-    context = {'_checksig_': checksig, 'mock': mock}
-    src = """def %s(*args, **kwargs):
-    _checksig_(*args, **kwargs)
-    return mock(*args, **kwargs)""" % name
-    exec (src, context)
-    funcopy = context[name]
-    _setup_func(funcopy, mock)
-    return funcopy
-
-
-def _setup_func(funcopy, mock):
-    funcopy.mock = mock
-
-    # can't use isinstance with mocks
-    if not _is_instance_mock(mock):
-        return
-
-    def assert_called_with(*args, **kwargs):
-        return mock.assert_called_with(*args, **kwargs)
-    def assert_called_once_with(*args, **kwargs):
-        return mock.assert_called_once_with(*args, **kwargs)
-    def assert_has_calls(*args, **kwargs):
-        return mock.assert_has_calls(*args, **kwargs)
-    def assert_any_call(*args, **kwargs):
-        return mock.assert_any_call(*args, **kwargs)
-    def reset_mock():
-        funcopy.method_calls = _CallList()
-        funcopy.mock_calls = _CallList()
-        mock.reset_mock()
-        ret = funcopy.return_value
-        if _is_instance_mock(ret) and not ret is mock:
-            ret.reset_mock()
-
-    funcopy.called = False
-    funcopy.call_count = 0
-    funcopy.call_args = None
-    funcopy.call_args_list = _CallList()
-    funcopy.method_calls = _CallList()
-    funcopy.mock_calls = _CallList()
-
-    funcopy.return_value = mock.return_value
-    funcopy.side_effect = mock.side_effect
-    funcopy._mock_children = mock._mock_children
-
-    funcopy.assert_called_with = assert_called_with
-    funcopy.assert_called_once_with = assert_called_once_with
-    funcopy.assert_has_calls = assert_has_calls
-    funcopy.assert_any_call = assert_any_call
-    funcopy.reset_mock = reset_mock
-
-    mock._mock_delegate = funcopy
-
-
-def _is_magic(name):
-    return '__%s__' % name[2:-2] == name
-
-
-class _SentinelObject(object):
-    "A unique, named, sentinel object."
-    def __init__(self, name):
-        self.name = name
-
-    def __repr__(self):
-        return 'sentinel.%s' % self.name
-
-
-class _Sentinel(object):
-    """Access attributes to return a named object, usable as a sentinel."""
-    def __init__(self):
-        self._sentinels = {}
-
-    def __getattr__(self, name):
-        if name == '__bases__':
-            # Without this help(mock) raises an exception
-            raise AttributeError
-        return self._sentinels.setdefault(name, _SentinelObject(name))
-
-
-sentinel = _Sentinel()
-
-DEFAULT = sentinel.DEFAULT
-_missing = sentinel.MISSING
-_deleted = sentinel.DELETED
-
-
-class OldStyleClass:
-    pass
-ClassType = type(OldStyleClass)
-
-
-def _copy(value):
-    if type(value) in (dict, list, tuple, set):
-        return type(value)(value)
-    return value
-
-
-ClassTypes = (type,)
-if not inPy3k:
-    ClassTypes = (type, ClassType)
-
-_allowed_names = set(
-    [
-        'return_value', '_mock_return_value', 'side_effect',
-        '_mock_side_effect', '_mock_parent', '_mock_new_parent',
-        '_mock_name', '_mock_new_name'
-    ]
-)
-
-
-def _delegating_property(name):
-    _allowed_names.add(name)
-    _the_name = '_mock_' + name
-    def _get(self, name=name, _the_name=_the_name):
-        sig = self._mock_delegate
-        if sig is None:
-            return getattr(self, _the_name)
-        return getattr(sig, name)
-    def _set(self, value, name=name, _the_name=_the_name):
-        sig = self._mock_delegate
-        if sig is None:
-            self.__dict__[_the_name] = value
-        else:
-            setattr(sig, name, value)
-
-    return property(_get, _set)
-
-
-
-class _CallList(list):
-
-    def __contains__(self, value):
-        if not isinstance(value, list):
-            return list.__contains__(self, value)
-        len_value = len(value)
-        len_self = len(self)
-        if len_value > len_self:
-            return False
-
-        for i in range(0, len_self - len_value + 1):
-            sub_list = self[i:i+len_value]
-            if sub_list == value:
-                return True
-        return False
-
-    def __repr__(self):
-        return pprint.pformat(list(self))
-
-
-def _check_and_set_parent(parent, value, name, new_name):
-    if not _is_instance_mock(value):
-        return False
-    if ((value._mock_name or value._mock_new_name) or
-        (value._mock_parent is not None) or
-        (value._mock_new_parent is not None)):
-        return False
-
-    _parent = parent
-    while _parent is not None:
-        # setting a mock (value) as a child or return value of itself
-        # should not modify the mock
-        if _parent is value:
-            return False
-        _parent = _parent._mock_new_parent
-
-    if new_name:
-        value._mock_new_parent = parent
-        value._mock_new_name = new_name
-    if name:
-        value._mock_parent = parent
-        value._mock_name = name
-    return True
-
-
-
-class Base(object):
-    _mock_return_value = DEFAULT
-    _mock_side_effect = None
-    def __init__(self, *args, **kwargs):
-        pass
-
-
-
-class NonCallableMock(Base):
-    """A non-callable version of `Mock`"""
-
-    def __new__(cls, *args, **kw):
-        # every instance has its own class
-        # so we can create magic methods on the
-        # class without stomping on other mocks
-        new = type(cls.__name__, (cls,), {'__doc__': cls.__doc__})
-        instance = object.__new__(new)
-        return instance
-
-
-    def __init__(
-            self, spec=None, wraps=None, name=None, spec_set=None,
-            parent=None, _spec_state=None, _new_name='', _new_parent=None,
-            **kwargs
-        ):
-        if _new_parent is None:
-            _new_parent = parent
-
-        __dict__ = self.__dict__
-        __dict__['_mock_parent'] = parent
-        __dict__['_mock_name'] = name
-        __dict__['_mock_new_name'] = _new_name
-        __dict__['_mock_new_parent'] = _new_parent
-
-        if spec_set is not None:
-            spec = spec_set
-            spec_set = True
-
-        self._mock_add_spec(spec, spec_set)
-
-        __dict__['_mock_children'] = {}
-        __dict__['_mock_wraps'] = wraps
-        __dict__['_mock_delegate'] = None
-
-        __dict__['_mock_called'] = False
-        __dict__['_mock_call_args'] = None
-        __dict__['_mock_call_count'] = 0
-        __dict__['_mock_call_args_list'] = _CallList()
-        __dict__['_mock_mock_calls'] = _CallList()
-
-        __dict__['method_calls'] = _CallList()
-
-        if kwargs:
-            self.configure_mock(**kwargs)
-
-        _super(NonCallableMock, self).__init__(
-            spec, wraps, name, spec_set, parent,
-            _spec_state
-        )
-
-
-    def attach_mock(self, mock, attribute):
-        """
-        Attach a mock as an attribute of this one, replacing its name and
-        parent. Calls to the attached mock will be recorded in the
-        `method_calls` and `mock_calls` attributes of this one."""
-        mock._mock_parent = None
-        mock._mock_new_parent = None
-        mock._mock_name = ''
-        mock._mock_new_name = None
-
-        setattr(self, attribute, mock)
-
-
-    def mock_add_spec(self, spec, spec_set=False):
-        """Add a spec to a mock. `spec` can either be an object or a
-        list of strings. Only attributes on the `spec` can be fetched as
-        attributes from the mock.
-
-        If `spec_set` is True then only attributes on the spec can be set."""
-        self._mock_add_spec(spec, spec_set)
-
-
-    def _mock_add_spec(self, spec, spec_set):
-        _spec_class = None
-
-        if spec is not None and not _is_list(spec):
-            if isinstance(spec, ClassTypes):
-                _spec_class = spec
-            else:
-                _spec_class = _get_class(spec)
-
-            spec = dir(spec)
-
-        __dict__ = self.__dict__
-        __dict__['_spec_class'] = _spec_class
-        __dict__['_spec_set'] = spec_set
-        __dict__['_mock_methods'] = spec
-
-
-    def __get_return_value(self):
-        ret = self._mock_return_value
-        if self._mock_delegate is not None:
-            ret = self._mock_delegate.return_value
-
-        if ret is DEFAULT:
-            ret = self._get_child_mock(
-                _new_parent=self, _new_name='()'
-            )
-            self.return_value = ret
-        return ret
-
-
-    def __set_return_value(self, value):
-        if self._mock_delegate is not None:
-            self._mock_delegate.return_value = value
-        else:
-            self._mock_return_value = value
-            _check_and_set_parent(self, value, None, '()')
-
-    __return_value_doc = "The value to be returned when the mock is called."
-    return_value = property(__get_return_value, __set_return_value,
-                            __return_value_doc)
-
-
-    @property
-    def __class__(self):
-        if self._spec_class is None:
-            return type(self)
-        return self._spec_class
-
-    called = _delegating_property('called')
-    call_count = _delegating_property('call_count')
-    call_args = _delegating_property('call_args')
-    call_args_list = _delegating_property('call_args_list')
-    mock_calls = _delegating_property('mock_calls')
-
-
-    def __get_side_effect(self):
-        sig = self._mock_delegate
-        if sig is None:
-            return self._mock_side_effect
-        return sig.side_effect
-
-    def __set_side_effect(self, value):
-        value = _try_iter(value)
-        sig = self._mock_delegate
-        if sig is None:
-            self._mock_side_effect = value
-        else:
-            sig.side_effect = value
-
-    side_effect = property(__get_side_effect, __set_side_effect)
-
-
-    def reset_mock(self):
-        "Restore the mock object to its initial state."
-        self.called = False
-        self.call_args = None
-        self.call_count = 0
-        self.mock_calls = _CallList()
-        self.call_args_list = _CallList()
-        self.method_calls = _CallList()
-
-        for child in self._mock_children.values():
-            if isinstance(child, _SpecState):
-                continue
-            child.reset_mock()
-
-        ret = self._mock_return_value
-        if _is_instance_mock(ret) and ret is not self:
-            ret.reset_mock()
-
-
-    def configure_mock(self, **kwargs):
-        """Set attributes on the mock through keyword arguments.
-
-        Attributes plus return values and side effects can be set on child
-        mocks using standard dot notation and unpacking a dictionary in the
-        method call:
-
-        >>> attrs = {'method.return_value': 3, 'other.side_effect': KeyError}
-        >>> mock.configure_mock(**attrs)"""
-        for arg, val in sorted(kwargs.items(),
-                               # we sort on the number of dots so that
-                               # attributes are set before we set attributes on
-                               # attributes
-                               key=lambda entry: entry[0].count('.')):
-            args = arg.split('.')
-            final = args.pop()
-            obj = self
-            for entry in args:
-                obj = getattr(obj, entry)
-            setattr(obj, final, val)
-
-
-    def __getattr__(self, name):
-        if name == '_mock_methods':
-            raise AttributeError(name)
-        elif self._mock_methods is not None:
-            if name not in self._mock_methods or name in _all_magics:
-                raise AttributeError("Mock object has no attribute %r" % name)
-        elif _is_magic(name):
-            raise AttributeError(name)
-
-        result = self._mock_children.get(name)
-        if result is _deleted:
-            raise AttributeError(name)
-        elif result is None:
-            wraps = None
-            if self._mock_wraps is not None:
-                # XXXX should we get the attribute without triggering code
-                # execution?
-                wraps = getattr(self._mock_wraps, name)
-
-            result = self._get_child_mock(
-                parent=self, name=name, wraps=wraps, _new_name=name,
-                _new_parent=self
-            )
-            self._mock_children[name]  = result
-
-        elif isinstance(result, _SpecState):
-            result = create_autospec(
-                result.spec, result.spec_set, result.instance,
-                result.parent, result.name
-            )
-            self._mock_children[name]  = result
-
-        return result
-
-
-    def __repr__(self):
-        _name_list = [self._mock_new_name]
-        _parent = self._mock_new_parent
-        last = self
-
-        dot = '.'
-        if _name_list == ['()']:
-            dot = ''
-        seen = set()
-        while _parent is not None:
-            last = _parent
-
-            _name_list.append(_parent._mock_new_name + dot)
-            dot = '.'
-            if _parent._mock_new_name == '()':
-                dot = ''
-
-            _parent = _parent._mock_new_parent
-
-            # use ids here so as not to call __hash__ on the mocks
-            if id(_parent) in seen:
-                break
-            seen.add(id(_parent))
-
-        _name_list = list(reversed(_name_list))
-        _first = last._mock_name or 'mock'
-        if len(_name_list) > 1:
-            if _name_list[1] not in ('()', '().'):
-                _first += '.'
-        _name_list[0] = _first
-        name = ''.join(_name_list)
-
-        name_string = ''
-        if name not in ('mock', 'mock.'):
-            name_string = ' name=%r' % name
-
-        spec_string = ''
-        if self._spec_class is not None:
-            spec_string = ' spec=%r'
-            if self._spec_set:
-                spec_string = ' spec_set=%r'
-            spec_string = spec_string % self._spec_class.__name__
-        return "<%s%s%s id='%s'>" % (
-            type(self).__name__,
-            name_string,
-            spec_string,
-            id(self)
-        )
-
-
-    def __dir__(self):
-        """Filter the output of `dir(mock)` to only useful members.
-        XXXX
-        """
-        extras = self._mock_methods or []
-        from_type = dir(type(self))
-        from_dict = list(self.__dict__)
-
-        if FILTER_DIR:
-            from_type = [e for e in from_type if not e.startswith('_')]
-            from_dict = [e for e in from_dict if not e.startswith('_') or
-                         _is_magic(e)]
-        return sorted(set(extras + from_type + from_dict +
-                          list(self._mock_children)))
-
-
-    def __setattr__(self, name, value):
-        if name in _allowed_names:
-            # property setters go through here
-            return object.__setattr__(self, name, value)
-        elif (self._spec_set and self._mock_methods is not None and
-            name not in self._mock_methods and
-            name not in self.__dict__):
-            raise AttributeError("Mock object has no attribute '%s'" % name)
-        elif name in _unsupported_magics:
-            msg = 'Attempting to set unsupported magic method %r.' % name
-            raise AttributeError(msg)
-        elif name in _all_magics:
-            if self._mock_methods is not None and name not in self._mock_methods:
-                raise AttributeError("Mock object has no attribute '%s'" % name)
-
-            if not _is_instance_mock(value):
-                setattr(type(self), name, _get_method(name, value))
-                original = value
-                value = lambda *args, **kw: original(self, *args, **kw)
-            else:
-                # only set _new_name and not name so that mock_calls is tracked
-                # but not method calls
-                _check_and_set_parent(self, value, None, name)
-                setattr(type(self), name, value)
-                self._mock_children[name] = value
-        elif name == '__class__':
-            self._spec_class = value
-            return
-        else:
-            if _check_and_set_parent(self, value, name, name):
-                self._mock_children[name] = value
-        return object.__setattr__(self, name, value)
-
-
-    def __delattr__(self, name):
-        if name in _all_magics and name in type(self).__dict__:
-            delattr(type(self), name)
-            if name not in self.__dict__:
-                # for magic methods that are still MagicProxy objects and
-                # not set on the instance itself
-                return
-
-        if name in self.__dict__:
-            object.__delattr__(self, name)
-
-        obj = self._mock_children.get(name, _missing)
-        if obj is _deleted:
-            raise AttributeError(name)
-        if obj is not _missing:
-            del self._mock_children[name]
-        self._mock_children[name] = _deleted
-
-
-
-    def _format_mock_call_signature(self, args, kwargs):
-        name = self._mock_name or 'mock'
-        return _format_call_signature(name, args, kwargs)
-
-
-    def _format_mock_failure_message(self, args, kwargs):
-        message = 'Expected call: %s\nActual call: %s'
-        expected_string = self._format_mock_call_signature(args, kwargs)
-        call_args = self.call_args
-        if len(call_args) == 3:
-            call_args = call_args[1:]
-        actual_string = self._format_mock_call_signature(*call_args)
-        return message % (expected_string, actual_string)
-
-
-    def assert_called_with(_mock_self, *args, **kwargs):
-        """assert that the mock was called with the specified arguments.
-
-        Raises an AssertionError if the args and keyword args passed in are
-        different to the last call to the mock."""
-        self = _mock_self
-        if self.call_args is None:
-            expected = self._format_mock_call_signature(args, kwargs)
-            raise AssertionError('Expected call: %s\nNot called' % (expected,))
-
-        if self.call_args != (args, kwargs):
-            msg = self._format_mock_failure_message(args, kwargs)
-            raise AssertionError(msg)
-
-
-    def assert_called_once_with(_mock_self, *args, **kwargs):
-        """assert that the mock was called exactly once and with the specified
-        arguments."""
-        self = _mock_self
-        if not self.call_count == 1:
-            msg = ("Expected to be called once. Called %s times." %
-                   self.call_count)
-            raise AssertionError(msg)
-        return self.assert_called_with(*args, **kwargs)
-
-
-    def assert_has_calls(self, calls, any_order=False):
-        """assert the mock has been called with the specified calls.
-        The `mock_calls` list is checked for the calls.
-
-        If `any_order` is False (the default) then the calls must be
-        sequential. There can be extra calls before or after the
-        specified calls.
-
-        If `any_order` is True then the calls can be in any order, but
-        they must all appear in `mock_calls`."""
-        if not any_order:
-            if calls not in self.mock_calls:
-                raise AssertionError(
-                    'Calls not found.\nExpected: %r\n'
-                    'Actual: %r' % (calls, self.mock_calls)
-                )
-            return
-
-        all_calls = list(self.mock_calls)
-
-        not_found = []
-        for kall in calls:
-            try:
-                all_calls.remove(kall)
-            except ValueError:
-                not_found.append(kall)
-        if not_found:
-            raise AssertionError(
-                '%r not all found in call list' % (tuple(not_found),)
-            )
-
-
-    def assert_any_call(self, *args, **kwargs):
-        """assert the mock has been called with the specified arguments.
-
-        The assert passes if the mock has *ever* been called, unlike
-        `assert_called_with` and `assert_called_once_with` that only pass if
-        the call is the most recent one."""
-        kall = call(*args, **kwargs)
-        if kall not in self.call_args_list:
-            expected_string = self._format_mock_call_signature(args, kwargs)
-            raise AssertionError(
-                '%s call not found' % expected_string
-            )
-
-
-    def _get_child_mock(self, **kw):
-        """Create the child mocks for attributes and return value.
-        By default child mocks will be the same type as the parent.
-        Subclasses of Mock may want to override this to customize the way
-        child mocks are made.
-
-        For non-callable mocks the callable variant will be used (rather than
-        any custom subclass)."""
-        _type = type(self)
-        if not issubclass(_type, CallableMixin):
-            if issubclass(_type, NonCallableMagicMock):
-                klass = MagicMock
-            elif issubclass(_type, NonCallableMock) :
-                klass = Mock
-        else:
-            klass = _type.__mro__[1]
-        return klass(**kw)
-
-
-
-def _try_iter(obj):
-    if obj is None:
-        return obj
-    if _is_exception(obj):
-        return obj
-    if _callable(obj):
-        return obj
-    try:
-        return iter(obj)
-    except TypeError:
-        # XXXX backwards compatibility
-        # but this will blow up on first call - so maybe we should fail early?
-        return obj
-
-
-
-class CallableMixin(Base):
-
-    def __init__(self, spec=None, side_effect=None, return_value=DEFAULT,
-                 wraps=None, name=None, spec_set=None, parent=None,
-                 _spec_state=None, _new_name='', _new_parent=None, **kwargs):
-        self.__dict__['_mock_return_value'] = return_value
-
-        _super(CallableMixin, self).__init__(
-            spec, wraps, name, spec_set, parent,
-            _spec_state, _new_name, _new_parent, **kwargs
-        )
-
-        self.side_effect = side_effect
-
-
-    def _mock_check_sig(self, *args, **kwargs):
-        # stub method that can be replaced with one with a specific signature
-        pass
-
-
-    def __call__(_mock_self, *args, **kwargs):
-        # can't use self in-case a function / method we are mocking uses self
-        # in the signature
-        _mock_self._mock_check_sig(*args, **kwargs)
-        return _mock_self._mock_call(*args, **kwargs)
-
-
-    def _mock_call(_mock_self, *args, **kwargs):
-        self = _mock_self
-        self.called = True
-        self.call_count += 1
-        self.call_args = _Call((args, kwargs), two=True)
-        self.call_args_list.append(_Call((args, kwargs), two=True))
-
-        _new_name = self._mock_new_name
-        _new_parent = self._mock_new_parent
-        self.mock_calls.append(_Call(('', args, kwargs)))
-
-        seen = set()
-        skip_next_dot = _new_name == '()'
-        do_method_calls = self._mock_parent is not None
-        name = self._mock_name
-        while _new_parent is not None:
-            this_mock_call = _Call((_new_name, args, kwargs))
-            if _new_parent._mock_new_name:
-                dot = '.'
-                if skip_next_dot:
-                    dot = ''
-
-                skip_next_dot = False
-                if _new_parent._mock_new_name == '()':
-                    skip_next_dot = True
-
-                _new_name = _new_parent._mock_new_name + dot + _new_name
-
-            if do_method_calls:
-                if _new_name == name:
-                    this_method_call = this_mock_call
-                else:
-                    this_method_call = _Call((name, args, kwargs))
-                _new_parent.method_calls.append(this_method_call)
-
-                do_method_calls = _new_parent._mock_parent is not None
-                if do_method_calls:
-                    name = _new_parent._mock_name + '.' + name
-
-            _new_parent.mock_calls.append(this_mock_call)
-            _new_parent = _new_parent._mock_new_parent
-
-            # use ids here so as not to call __hash__ on the mocks
-            _new_parent_id = id(_new_parent)
-            if _new_parent_id in seen:
-                break
-            seen.add(_new_parent_id)
-
-        ret_val = DEFAULT
-        effect = self.side_effect
-        if effect is not None:
-            if _is_exception(effect):
-                raise effect
-
-            if not _callable(effect):
-                result = next(effect)
-                if _is_exception(result):
-                    raise result
-                return result
-
-            ret_val = effect(*args, **kwargs)
-            if ret_val is DEFAULT:
-                ret_val = self.return_value
-
-        if (self._mock_wraps is not None and
-             self._mock_return_value is DEFAULT):
-            return self._mock_wraps(*args, **kwargs)
-        if ret_val is DEFAULT:
-            ret_val = self.return_value
-        return ret_val
-
-
-
-class Mock(CallableMixin, NonCallableMock):
-    """
-    Create a new `Mock` object. `Mock` takes several optional arguments
-    that specify the behaviour of the Mock object:
-
-    * `spec`: This can be either a list of strings or an existing object (a
-      class or instance) that acts as the specification for the mock object. If
-      you pass in an object then a list of strings is formed by calling dir on
-      the object (excluding unsupported magic attributes and methods). Accessing
-      any attribute not in this list will raise an `AttributeError`.
-
-      If `spec` is an object (rather than a list of strings) then
-      `mock.__class__` returns the class of the spec object. This allows mocks
-      to pass `isinstance` tests.
-
-    * `spec_set`: A stricter variant of `spec`. If used, attempting to *set*
-      or get an attribute on the mock that isn't on the object passed as
-      `spec_set` will raise an `AttributeError`.
-
-    * `side_effect`: A function to be called whenever the Mock is called. See
-      the `side_effect` attribute. Useful for raising exceptions or
-      dynamically changing return values. The function is called with the same
-      arguments as the mock, and unless it returns `DEFAULT`, the return
-      value of this function is used as the return value.
-
-      Alternatively `side_effect` can be an exception class or instance. In
-      this case the exception will be raised when the mock is called.
-
-      If `side_effect` is an iterable then each call to the mock will return
-      the next value from the iterable. If any of the members of the iterable
-      are exceptions they will be raised instead of returned.
-
-    * `return_value`: The value returned when the mock is called. By default
-      this is a new Mock (created on first access). See the
-      `return_value` attribute.
-
-    * `wraps`: Item for the mock object to wrap. If `wraps` is not None then
-      calling the Mock will pass the call through to the wrapped object
-      (returning the real result). Attribute access on the mock will return a
-      Mock object that wraps the corresponding attribute of the wrapped object
-      (so attempting to access an attribute that doesn't exist will raise an
-      `AttributeError`).
-
-      If the mock has an explicit `return_value` set then calls are not passed
-      to the wrapped object and the `return_value` is returned instead.
-
-    * `name`: If the mock has a name then it will be used in the repr of the
-      mock. This can be useful for debugging. The name is propagated to child
-      mocks.
-
-    Mocks can also be called with arbitrary keyword arguments. These will be
-    used to set attributes on the mock after it is created.
-    """
-
-
-
-def _dot_lookup(thing, comp, import_path):
-    try:
-        return getattr(thing, comp)
-    except AttributeError:
-        __import__(import_path)
-        return getattr(thing, comp)
-
-
-def _importer(target):
-    components = target.split('.')
-    import_path = components.pop(0)
-    thing = __import__(import_path)
-
-    for comp in components:
-        import_path += ".%s" % comp
-        thing = _dot_lookup(thing, comp, import_path)
-    return thing
-
-
-def _is_started(patcher):
-    # XXXX horrible
-    return hasattr(patcher, 'is_local')
-
-
-class _patch(object):
-
-    attribute_name = None
-    _active_patches = set()
-
-    def __init__(
-            self, getter, attribute, new, spec, create,
-            spec_set, autospec, new_callable, kwargs
-        ):
-        if new_callable is not None:
-            if new is not DEFAULT:
-                raise ValueError(
-                    "Cannot use 'new' and 'new_callable' together"
-                )
-            if autospec is not None:
-                raise ValueError(
-                    "Cannot use 'autospec' and 'new_callable' together"
-                )
-
-        self.getter = getter
-        self.attribute = attribute
-        self.new = new
-        self.new_callable = new_callable
-        self.spec = spec
-        self.create = create
-        self.has_local = False
-        self.spec_set = spec_set
-        self.autospec = autospec
-        self.kwargs = kwargs
-        self.additional_patchers = []
-
-
-    def copy(self):
-        patcher = _patch(
-            self.getter, self.attribute, self.new, self.spec,
-            self.create, self.spec_set,
-            self.autospec, self.new_callable, self.kwargs
-        )
-        patcher.attribute_name = self.attribute_name
-        patcher.additional_patchers = [
-            p.copy() for p in self.additional_patchers
-        ]
-        return patcher
-
-
-    def __call__(self, func):
-        if isinstance(func, ClassTypes):
-            return self.decorate_class(func)
-        return self.decorate_callable(func)
-
-
-    def decorate_class(self, klass):
-        for attr in dir(klass):
-            if not attr.startswith(patch.TEST_PREFIX):
-                continue
-
-            attr_value = getattr(klass, attr)
-            if not hasattr(attr_value, "__call__"):
-                continue
-
-            patcher = self.copy()
-            setattr(klass, attr, patcher(attr_value))
-        return klass
-
-
-    def decorate_callable(self, func):
-        if hasattr(func, 'patchings'):
-            func.patchings.append(self)
-            return func
-
-        @wraps(func)
-        def patched(*args, **keywargs):
-            # don't use a with here (backwards compatability with Python 2.4)
-            extra_args = []
-            entered_patchers = []
-
-            # can't use try...except...finally because of Python 2.4
-            # compatibility
-            exc_info = tuple()
-            try:
-                try:
-                    for patching in patched.patchings:
-                        arg = patching.__enter__()
-                        entered_patchers.append(patching)
-                        if patching.attribute_name is not None:
-                            keywargs.update(arg)
-                        elif patching.new is DEFAULT:
-                            extra_args.append(arg)
-
-                    args += tuple(extra_args)
-                    return func(*args, **keywargs)
-                except:
-                    if (patching not in entered_patchers and
-                        _is_started(patching)):
-                        # the patcher may have been started, but an exception
-                        # raised whilst entering one of its additional_patchers
-                        entered_patchers.append(patching)
-                    # Pass the exception to __exit__
-                    exc_info = sys.exc_info()
-                    # re-raise the exception
-                    raise
-            finally:
-                for patching in reversed(entered_patchers):
-                    patching.__exit__(*exc_info)
-
-        patched.patchings = [self]
-        if hasattr(func, 'func_code'):
-            # not in Python 3
-            patched.compat_co_firstlineno = getattr(
-                func, "compat_co_firstlineno",
-                func.func_code.co_firstlineno
-            )
-        return patched
-
-
-    def get_original(self):
-        target = self.getter()
-        name = self.attribute
-
-        original = DEFAULT
-        local = False
-
-        try:
-            original = target.__dict__[name]
-        except (AttributeError, KeyError):
-            original = getattr(target, name, DEFAULT)
-        else:
-            local = True
-
-        if not self.create and original is DEFAULT:
-            raise AttributeError(
-                "%s does not have the attribute %r" % (target, name)
-            )
-        return original, local
-
-
-    def __enter__(self):
-        """Perform the patch."""
-        new, spec, spec_set = self.new, self.spec, self.spec_set
-        autospec, kwargs = self.autospec, self.kwargs
-        new_callable = self.new_callable
-        self.target = self.getter()
-
-        # normalise False to None
-        if spec is False:
-            spec = None
-        if spec_set is False:
-            spec_set = None
-        if autospec is False:
-            autospec = None
-
-        if spec is not None and autospec is not None:
-            raise TypeError("Can't specify spec and autospec")
-        if ((spec is not None or autospec is not None) and
-            spec_set not in (True, None)):
-            raise TypeError("Can't provide explicit spec_set *and* spec or autospec")
-
-        original, local = self.get_original()
-
-        if new is DEFAULT and autospec is None:
-            inherit = False
-            if spec is True:
-                # set spec to the object we are replacing
-                spec = original
-                if spec_set is True:
-                    spec_set = original
-                    spec = None
-            elif spec is not None:
-                if spec_set is True:
-                    spec_set = spec
-                    spec = None
-            elif spec_set is True:
-                spec_set = original
-
-            if spec is not None or spec_set is not None:
-                if original is DEFAULT:
-                    raise TypeError("Can't use 'spec' with create=True")
-                if isinstance(original, ClassTypes):
-                    # If we're patching out a class and there is a spec
-                    inherit = True
-
-            Klass = MagicMock
-            _kwargs = {}
-            if new_callable is not None:
-                Klass = new_callable
-            elif spec is not None or spec_set is not None:
-                this_spec = spec
-                if spec_set is not None:
-                    this_spec = spec_set
-                if _is_list(this_spec):
-                    not_callable = '__call__' not in this_spec
-                else:
-                    not_callable = not _callable(this_spec)
-                if not_callable:
-                    Klass = NonCallableMagicMock
-
-            if spec is not None:
-                _kwargs['spec'] = spec
-            if spec_set is not None:
-                _kwargs['spec_set'] = spec_set
-
-            # add a name to mocks
-            if (isinstance(Klass, type) and
-                issubclass(Klass, NonCallableMock) and self.attribute):
-                _kwargs['name'] = self.attribute
-
-            _kwargs.update(kwargs)
-            new = Klass(**_kwargs)
-
-            if inherit and _is_instance_mock(new):
-                # we can only tell if the instance should be callable if the
-                # spec is not a list
-                this_spec = spec
-                if spec_set is not None:
-                    this_spec = spec_set
-                if (not _is_list(this_spec) and not
-                    _instance_callable(this_spec)):
-                    Klass = NonCallableMagicMock
-
-                _kwargs.pop('name')
-                new.return_value = Klass(_new_parent=new, _new_name='()',
-                                         **_kwargs)
-        elif autospec is not None:
-            # spec is ignored, new *must* be default, spec_set is treated
-            # as a boolean. Should we check spec is not None and that spec_set
-            # is a bool?
-            if new is not DEFAULT:
-                raise TypeError(
-                    "autospec creates the mock for you. Can't specify "
-                    "autospec and new."
-                )
-            if original is DEFAULT:
-                raise TypeError("Can't use 'autospec' with create=True")
-            spec_set = bool(spec_set)
-            if autospec is True:
-                autospec = original
-
-            new = create_autospec(autospec, spec_set=spec_set,
-                                  _name=self.attribute, **kwargs)
-        elif kwargs:
-            # can't set keyword args when we aren't creating the mock
-            # XXXX If new is a Mock we could call new.configure_mock(**kwargs)
-            raise TypeError("Can't pass kwargs to a mock we aren't creating")
-
-        new_attr = new
-
-        self.temp_original = original
-        self.is_local = local
-        setattr(self.target, self.attribute, new_attr)
-        if self.attribute_name is not None:
-            extra_args = {}
-            if self.new is DEFAULT:
-                extra_args[self.attribute_name] =  new
-            for patching in self.additional_patchers:
-                arg = patching.__enter__()
-                if patching.new is DEFAULT:
-                    extra_args.update(arg)
-            return extra_args
-
-        return new
-
-
-    def __exit__(self, *exc_info):
-        """Undo the patch."""
-        if not _is_started(self):
-            raise RuntimeError('stop called on unstarted patcher')
-
-        if self.is_local and self.temp_original is not DEFAULT:
-            setattr(self.target, self.attribute, self.temp_original)
-        else:
-            delattr(self.target, self.attribute)
-            if not self.create and not hasattr(self.target, self.attribute):
-                # needed for proxy objects like django settings
-                setattr(self.target, self.attribute, self.temp_original)
-
-        del self.temp_original
-        del self.is_local
-        del self.target
-        for patcher in reversed(self.additional_patchers):
-            if _is_started(patcher):
-                patcher.__exit__(*exc_info)
-
-
-    def start(self):
-        """Activate a patch, returning any created mock."""
-        result = self.__enter__()
-        self._active_patches.add(self)
-        return result
-
-
-    def stop(self):
-        """Stop an active patch."""
-        self._active_patches.discard(self)
-        return self.__exit__()
-
-
-
-def _get_target(target):
-    try:
-        target, attribute = target.rsplit('.', 1)
-    except (TypeError, ValueError):
-        raise TypeError("Need a valid target to patch. You supplied: %r" %
-                        (target,))
-    getter = lambda: _importer(target)
-    return getter, attribute
-
-
-def _patch_object(
-        target, attribute, new=DEFAULT, spec=None,
-        create=False, spec_set=None, autospec=None,
-        new_callable=None, **kwargs
-    ):
-    """
-    patch.object(target, attribute, new=DEFAULT, spec=None, create=False,
-                 spec_set=None, autospec=None, new_callable=None, **kwargs)
-
-    patch the named member (`attribute`) on an object (`target`) with a mock
-    object.
-
-    `patch.object` can be used as a decorator, class decorator or a context
-    manager. Arguments `new`, `spec`, `create`, `spec_set`,
-    `autospec` and `new_callable` have the same meaning as for `patch`. Like
-    `patch`, `patch.object` takes arbitrary keyword arguments for configuring
-    the mock object it creates.
-
-    When used as a class decorator `patch.object` honours `patch.TEST_PREFIX`
-    for choosing which methods to wrap.
-    """
-    getter = lambda: target
-    return _patch(
-        getter, attribute, new, spec, create,
-        spec_set, autospec, new_callable, kwargs
-    )
-
-
-def _patch_multiple(target, spec=None, create=False, spec_set=None,
-                    autospec=None, new_callable=None, **kwargs):
-    """Perform multiple patches in a single call. It takes the object to be
-    patched (either as an object or a string to fetch the object by importing)
-    and keyword arguments for the patches::
-
-        with patch.multiple(settings, FIRST_PATCH='one', SECOND_PATCH='two'):
-            ...
-
-    Use `DEFAULT` as the value if you want `patch.multiple` to create
-    mocks for you. In this case the created mocks are passed into a decorated
-    function by keyword, and a dictionary is returned when `patch.multiple` is
-    used as a context manager.
-
-    `patch.multiple` can be used as a decorator, class decorator or a context
-    manager. The arguments `spec`, `spec_set`, `create`,
-    `autospec` and `new_callable` have the same meaning as for `patch`. These
-    arguments will be applied to *all* patches done by `patch.multiple`.
-
-    When used as a class decorator `patch.multiple` honours `patch.TEST_PREFIX`
-    for choosing which methods to wrap.
-    """
-    if type(target) in (unicode, str):
-        getter = lambda: _importer(target)
-    else:
-        getter = lambda: target
-
-    if not kwargs:
-        raise ValueError(
-            'Must supply at least one keyword argument with patch.multiple'
-        )
-    # need to wrap in a list for python 3, where items is a view
-    items = list(kwargs.items())
-    attribute, new = items[0]
-    patcher = _patch(
-        getter, attribute, new, spec, create, spec_set,
-        autospec, new_callable, {}
-    )
-    patcher.attribute_name = attribute
-    for attribute, new in items[1:]:
-        this_patcher = _patch(
-            getter, attribute, new, spec, create, spec_set,
-            autospec, new_callable, {}
-        )
-        this_patcher.attribute_name = attribute
-        patcher.additional_patchers.append(this_patcher)
-    return patcher
-
-
-def patch(
-        target, new=DEFAULT, spec=None, create=False,
-        spec_set=None, autospec=None, new_callable=None, **kwargs
-    ):
-    """
-    `patch` acts as a function decorator, class decorator or a context
-    manager. Inside the body of the function or with statement, the `target`
-    is patched with a `new` object. When the function/with statement exits
-    the patch is undone.
-
-    If `new` is omitted, then the target is replaced with a
-    `MagicMock`. If `patch` is used as a decorator and `new` is
-    omitted, the created mock is passed in as an extra argument to the
-    decorated function. If `patch` is used as a context manager the created
-    mock is returned by the context manager.
-
-    `target` should be a string in the form `'package.module.ClassName'`. The
-    `target` is imported and the specified object replaced with the `new`
-    object, so the `target` must be importable from the environment you are
-    calling `patch` from. The target is imported when the decorated function
-    is executed, not at decoration time.
-
-    The `spec` and `spec_set` keyword arguments are passed to the `MagicMock`
-    if patch is creating one for you.
-
-    In addition you can pass `spec=True` or `spec_set=True`, which causes
-    patch to pass in the object being mocked as the spec/spec_set object.
-
-    `new_callable` allows you to specify a different class, or callable object,
-    that will be called to create the `new` object. By default `MagicMock` is
-    used.
-
-    A more powerful form of `spec` is `autospec`. If you set `autospec=True`
-    then the mock with be created with a spec from the object being replaced.
-    All attributes of the mock will also have the spec of the corresponding
-    attribute of the object being replaced. Methods and functions being
-    mocked will have their arguments checked and will raise a `TypeError` if
-    they are called with the wrong signature. For mocks replacing a class,
-    their return value (the 'instance') will have the same spec as the class.
-
-    Instead of `autospec=True` you can pass `autospec=some_object` to use an
-    arbitrary object as the spec instead of the one being replaced.
-
-    By default `patch` will fail to replace attributes that don't exist. If
-    you pass in `create=True`, and the attribute doesn't exist, patch will
-    create the attribute for you when the patched function is called, and
-    delete it again afterwards. This is useful for writing tests against
-    attributes that your production code creates at runtime. It is off by by
-    default because it can be dangerous. With it switched on you can write
-    passing tests against APIs that don't actually exist!
-
-    Patch can be used as a `TestCase` class decorator. It works by
-    decorating each test method in the class. This reduces the boilerplate
-    code when your test methods share a common patchings set. `patch` finds
-    tests by looking for method names that start with `patch.TEST_PREFIX`.
-    By default this is `test`, which matches the way `unittest` finds tests.
-    You can specify an alternative prefix by setting `patch.TEST_PREFIX`.
-
-    Patch can be used as a context manager, with the with statement. Here the
-    patching applies to the indented block after the with statement. If you
-    use "as" then the patched object will be bound to the name after the
-    "as"; very useful if `patch` is creating a mock object for you.
-
-    `patch` takes arbitrary keyword arguments. These will be passed to
-    the `Mock` (or `new_callable`) on construction.
-
-    `patch.dict(...)`, `patch.multiple(...)` and `patch.object(...)` are
-    available for alternate use-cases.
-    """
-    getter, attribute = _get_target(target)
-    return _patch(
-        getter, attribute, new, spec, create,
-        spec_set, autospec, new_callable, kwargs
-    )
-
-
-class _patch_dict(object):
-    """
-    Patch a dictionary, or dictionary like object, and restore the dictionary
-    to its original state after the test.
-
-    `in_dict` can be a dictionary or a mapping like container. If it is a
-    mapping then it must at least support getting, setting and deleting items
-    plus iterating over keys.
-
-    `in_dict` can also be a string specifying the name of the dictionary, which
-    will then be fetched by importing it.
-
-    `values` can be a dictionary of values to set in the dictionary. `values`
-    can also be an iterable of `(key, value)` pairs.
-
-    If `clear` is True then the dictionary will be cleared before the new
-    values are set.
-
-    `patch.dict` can also be called with arbitrary keyword arguments to set
-    values in the dictionary::
-
-        with patch.dict('sys.modules', mymodule=Mock(), other_module=Mock()):
-            ...
-
-    `patch.dict` can be used as a context manager, decorator or class
-    decorator. When used as a class decorator `patch.dict` honours
-    `patch.TEST_PREFIX` for choosing which methods to wrap.
-    """
-
-    def __init__(self, in_dict, values=(), clear=False, **kwargs):
-        if isinstance(in_dict, basestring):
-            in_dict = _importer(in_dict)
-        self.in_dict = in_dict
-        # support any argument supported by dict(...) constructor
-        self.values = dict(values)
-        self.values.update(kwargs)
-        self.clear = clear
-        self._original = None
-
-
-    def __call__(self, f):
-        if isinstance(f, ClassTypes):
-            return self.decorate_class(f)
-        @wraps(f)
-        def _inner(*args, **kw):
-            self._patch_dict()
-            try:
-                return f(*args, **kw)
-            finally:
-                self._unpatch_dict()
-
-        return _inner
-
-
-    def decorate_class(self, klass):
-        for attr in dir(klass):
-            attr_value = getattr(klass, attr)
-            if (attr.startswith(patch.TEST_PREFIX) and
-                 hasattr(attr_value, "__call__")):
-                decorator = _patch_dict(self.in_dict, self.values, self.clear)
-                decorated = decorator(attr_value)
-                setattr(klass, attr, decorated)
-        return klass
-
-
-    def __enter__(self):
-        """Patch the dict."""
-        self._patch_dict()
-
-
-    def _patch_dict(self):
-        values = self.values
-        in_dict = self.in_dict
-        clear = self.clear
-
-        try:
-            original = in_dict.copy()
-        except AttributeError:
-            # dict like object with no copy method
-            # must support iteration over keys
-            original = {}
-            for key in in_dict:
-                original[key] = in_dict[key]
-        self._original = original
-
-        if clear:
-            _clear_dict(in_dict)
-
-        try:
-            in_dict.update(values)
-        except AttributeError:
-            # dict like object with no update method
-            for key in values:
-                in_dict[key] = values[key]
-
-
-    def _unpatch_dict(self):
-        in_dict = self.in_dict
-        original = self._original
-
-        _clear_dict(in_dict)
-
-        try:
-            in_dict.update(original)
-        except AttributeError:
-            for key in original:
-                in_dict[key] = original[key]
-
-
-    def __exit__(self, *args):
-        """Unpatch the dict."""
-        self._unpatch_dict()
-        return False
-
-    start = __enter__
-    stop = __exit__
-
-
-def _clear_dict(in_dict):
-    try:
-        in_dict.clear()
-    except AttributeError:
-        keys = list(in_dict)
-        for key in keys:
-            del in_dict[key]
-
-
-def _patch_stopall():
-    """Stop all active patches."""
-    for patch in list(_patch._active_patches):
-        patch.stop()
-
-
-patch.object = _patch_object
-patch.dict = _patch_dict
-patch.multiple = _patch_multiple
-patch.stopall = _patch_stopall
-patch.TEST_PREFIX = 'test'
-
-magic_methods = (
-    "lt le gt ge eq ne "
-    "getitem setitem delitem "
-    "len contains iter "
-    "hash str sizeof "
-    "enter exit "
-    "divmod neg pos abs invert "
-    "complex int float index "
-    "trunc floor ceil "
-)
-
-numerics = "add sub mul div floordiv mod lshift rshift and xor or pow "
-inplace = ' '.join('i%s' % n for n in numerics.split())
-right = ' '.join('r%s' % n for n in numerics.split())
-extra = ''
-if inPy3k:
-    extra = 'bool next '
-else:
-    extra = 'unicode long nonzero oct hex truediv rtruediv '
-
-# not including __prepare__, __instancecheck__, __subclasscheck__
-# (as they are metaclass methods)
-# __del__ is not supported at all as it causes problems if it exists
-
-_non_defaults = set('__%s__' % method for method in [
-    'cmp', 'getslice', 'setslice', 'coerce', 'subclasses',
-    'format', 'get', 'set', 'delete', 'reversed',
-    'missing', 'reduce', 'reduce_ex', 'getinitargs',
-    'getnewargs', 'getstate', 'setstate', 'getformat',
-    'setformat', 'repr', 'dir'
-])
-
-
-def _get_method(name, func):
-    "Turns a callable object (like a mock) into a real function"
-    def method(self, *args, **kw):
-        return func(self, *args, **kw)
-    method.__name__ = name
-    return method
-
-
-_magics = set(
-    '__%s__' % method for method in
-    ' '.join([magic_methods, numerics, inplace, right, extra]).split()
-)
-
-_all_magics = _magics | _non_defaults
-
-_unsupported_magics = set([
-    '__getattr__', '__setattr__',
-    '__init__', '__new__', '__prepare__'
-    '__instancecheck__', '__subclasscheck__',
-    '__del__'
-])
-
-_calculate_return_value = {
-    '__hash__': lambda self: object.__hash__(self),
-    '__str__': lambda self: object.__str__(self),
-    '__sizeof__': lambda self: object.__sizeof__(self),
-    '__unicode__': lambda self: unicode(object.__str__(self)),
-}
-
-_return_values = {
-    '__lt__': NotImplemented,
-    '__gt__': NotImplemented,
-    '__le__': NotImplemented,
-    '__ge__': NotImplemented,
-    '__int__': 1,
-    '__contains__': False,
-    '__len__': 0,
-    '__exit__': False,
-    '__complex__': 1j,
-    '__float__': 1.0,
-    '__bool__': True,
-    '__nonzero__': True,
-    '__oct__': '1',
-    '__hex__': '0x1',
-    '__long__': long(1),
-    '__index__': 1,
-}
-
-
-def _get_eq(self):
-    def __eq__(other):
-        ret_val = self.__eq__._mock_return_value
-        if ret_val is not DEFAULT:
-            return ret_val
-        return self is other
-    return __eq__
-
-def _get_ne(self):
-    def __ne__(other):
-        if self.__ne__._mock_return_value is not DEFAULT:
-            return DEFAULT
-        return self is not other
-    return __ne__
-
-def _get_iter(self):
-    def __iter__():
-        ret_val = self.__iter__._mock_return_value
-        if ret_val is DEFAULT:
-            return iter([])
-        # if ret_val was already an iterator, then calling iter on it should
-        # return the iterator unchanged
-        return iter(ret_val)
-    return __iter__
-
-_side_effect_methods = {
-    '__eq__': _get_eq,
-    '__ne__': _get_ne,
-    '__iter__': _get_iter,
-}
-
-
-
-def _set_return_value(mock, method, name):
-    fixed = _return_values.get(name, DEFAULT)
-    if fixed is not DEFAULT:
-        method.return_value = fixed
-        return
-
-    return_calulator = _calculate_return_value.get(name)
-    if return_calulator is not None:
-        try:
-            return_value = return_calulator(mock)
-        except AttributeError:
-            # XXXX why do we return AttributeError here?
-            #      set it as a side_effect instead?
-            return_value = AttributeError(name)
-        method.return_value = return_value
-        return
-
-    side_effector = _side_effect_methods.get(name)
-    if side_effector is not None:
-        method.side_effect = side_effector(mock)
-
-
-
-class MagicMixin(object):
-    def __init__(self, *args, **kw):
-        _super(MagicMixin, self).__init__(*args, **kw)
-        self._mock_set_magics()
-
-
-    def _mock_set_magics(self):
-        these_magics = _magics
-
-        if self._mock_methods is not None:
-            these_magics = _magics.intersection(self._mock_methods)
-
-            remove_magics = set()
-            remove_magics = _magics - these_magics
-
-            for entry in remove_magics:
-                if entry in type(self).__dict__:
-                    # remove unneeded magic methods
-                    delattr(self, entry)
-
-        # don't overwrite existing attributes if called a second time
-        these_magics = these_magics - set(type(self).__dict__)
-
-        _type = type(self)
-        for entry in these_magics:
-            setattr(_type, entry, MagicProxy(entry, self))
-
-
-
-class NonCallableMagicMock(MagicMixin, NonCallableMock):
-    """A version of `MagicMock` that isn't callable."""
-    def mock_add_spec(self, spec, spec_set=False):
-        """Add a spec to a mock. `spec` can either be an object or a
-        list of strings. Only attributes on the `spec` can be fetched as
-        attributes from the mock.
-
-        If `spec_set` is True then only attributes on the spec can be set."""
-        self._mock_add_spec(spec, spec_set)
-        self._mock_set_magics()
-
-
-
-class MagicMock(MagicMixin, Mock):
-    """
-    MagicMock is a subclass of Mock with default implementations
-    of most of the magic methods. You can use MagicMock without having to
-    configure the magic methods yourself.
-
-    If you use the `spec` or `spec_set` arguments then *only* magic
-    methods that exist in the spec will be created.
-
-    Attributes and the return value of a `MagicMock` will also be `MagicMocks`.
-    """
-    def mock_add_spec(self, spec, spec_set=False):
-        """Add a spec to a mock. `spec` can either be an object or a
-        list of strings. Only attributes on the `spec` can be fetched as
-        attributes from the mock.
-
-        If `spec_set` is True then only attributes on the spec can be set."""
-        self._mock_add_spec(spec, spec_set)
-        self._mock_set_magics()
-
-
-
-class MagicProxy(object):
-    def __init__(self, name, parent):
-        self.name = name
-        self.parent = parent
-
-    def __call__(self, *args, **kwargs):
-        m = self.create_mock()
-        return m(*args, **kwargs)
-
-    def create_mock(self):
-        entry = self.name
-        parent = self.parent
-        m = parent._get_child_mock(name=entry, _new_name=entry,
-                                   _new_parent=parent)
-        setattr(parent, entry, m)
-        _set_return_value(parent, m, entry)
-        return m
-
-    def __get__(self, obj, _type=None):
-        return self.create_mock()
-
-
-
-class _ANY(object):
-    "A helper object that compares equal to everything."
-
-    def __eq__(self, other):
-        return True
-
-    def __ne__(self, other):
-        return False
-
-    def __repr__(self):
-        return '<ANY>'
-
-ANY = _ANY()
-
-
-
-def _format_call_signature(name, args, kwargs):
-    message = '%s(%%s)' % name
-    formatted_args = ''
-    args_string = ', '.join([repr(arg) for arg in args])
-    kwargs_string = ', '.join([
-        '%s=%r' % (key, value) for key, value in kwargs.items()
-    ])
-    if args_string:
-        formatted_args = args_string
-    if kwargs_string:
-        if formatted_args:
-            formatted_args += ', '
-        formatted_args += kwargs_string
-
-    return message % formatted_args
-
-
-
-class _Call(tuple):
-    """
-    A tuple for holding the results of a call to a mock, either in the form
-    `(args, kwargs)` or `(name, args, kwargs)`.
-
-    If args or kwargs are empty then a call tuple will compare equal to
-    a tuple without those values. This makes comparisons less verbose::
-
-        _Call(('name', (), {})) == ('name',)
-        _Call(('name', (1,), {})) == ('name', (1,))
-        _Call(((), {'a': 'b'})) == ({'a': 'b'},)
-
-    The `_Call` object provides a useful shortcut for comparing with call::
-
-        _Call(((1, 2), {'a': 3})) == call(1, 2, a=3)
-        _Call(('foo', (1, 2), {'a': 3})) == call.foo(1, 2, a=3)
-
-    If the _Call has no name then it will match any name.
-    """
-    def __new__(cls, value=(), name=None, parent=None, two=False,
-                from_kall=True):
-        name = ''
-        args = ()
-        kwargs = {}
-        _len = len(value)
-        if _len == 3:
-            name, args, kwargs = value
-        elif _len == 2:
-            first, second = value
-            if isinstance(first, basestring):
-                name = first
-                if isinstance(second, tuple):
-                    args = second
-                else:
-                    kwargs = second
-            else:
-                args, kwargs = first, second
-        elif _len == 1:
-            value, = value
-            if isinstance(value, basestring):
-                name = value
-            elif isinstance(value, tuple):
-                args = value
-            else:
-                kwargs = value
-
-        if two:
-            return tuple.__new__(cls, (args, kwargs))
-
-        return tuple.__new__(cls, (name, args, kwargs))
-
-
-    def __init__(self, value=(), name=None, parent=None, two=False,
-                 from_kall=True):
-        self.name = name
-        self.parent = parent
-        self.from_kall = from_kall
-
-
-    def __eq__(self, other):
-        if other is ANY:
-            return True
-        try:
-            len_other = len(other)
-        except TypeError:
-            return False
-
-        self_name = ''
-        if len(self) == 2:
-            self_args, self_kwargs = self
-        else:
-            self_name, self_args, self_kwargs = self
-
-        other_name = ''
-        if len_other == 0:
-            other_args, other_kwargs = (), {}
-        elif len_other == 3:
-            other_name, other_args, other_kwargs = other
-        elif len_other == 1:
-            value, = other
-            if isinstance(value, tuple):
-                other_args = value
-                other_kwargs = {}
-            elif isinstance(value, basestring):
-                other_name = value
-                other_args, other_kwargs = (), {}
-            else:
-                other_args = ()
-                other_kwargs = value
-        else:
-            # len 2
-            # could be (name, args) or (name, kwargs) or (args, kwargs)
-            first, second = other
-            if isinstance(first, basestring):
-                other_name = first
-                if isinstance(second, tuple):
-                    other_args, other_kwargs = second, {}
-                else:
-                    other_args, other_kwargs = (), second
-            else:
-                other_args, other_kwargs = first, second
-
-        if self_name and other_name != self_name:
-            return False
-
-        # this order is important for ANY to work!
-        return (other_args, other_kwargs) == (self_args, self_kwargs)
-
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-
-    def __call__(self, *args, **kwargs):
-        if self.name is None:
-            return _Call(('', args, kwargs), name='()')
-
-        name = self.name + '()'
-        return _Call((self.name, args, kwargs), name=name, parent=self)
-
-
-    def __getattr__(self, attr):
-        if self.name is None:
-            return _Call(name=attr, from_kall=False)
-        name = '%s.%s' % (self.name, attr)
-        return _Call(name=name, parent=self, from_kall=False)
-
-
-    def __repr__(self):
-        if not self.from_kall:
-            name = self.name or 'call'
-            if name.startswith('()'):
-                name = 'call%s' % name
-            return name
-
-        if len(self) == 2:
-            name = 'call'
-            args, kwargs = self
-        else:
-            name, args, kwargs = self
-            if not name:
-                name = 'call'
-            elif not name.startswith('()'):
-                name = 'call.%s' % name
-            else:
-                name = 'call%s' % name
-        return _format_call_signature(name, args, kwargs)
-
-
-    def call_list(self):
-        """For a call object that represents multiple calls, `call_list`
-        returns a list of all the intermediate calls as well as the
-        final call."""
-        vals = []
-        thing = self
-        while thing is not None:
-            if thing.from_kall:
-                vals.append(thing)
-            thing = thing.parent
-        return _CallList(reversed(vals))
-
-
-call = _Call(from_kall=False)
-
-
-
-def create_autospec(spec, spec_set=False, instance=False, _parent=None,
-                    _name=None, **kwargs):
-    """Create a mock object using another object as a spec. Attributes on the
-    mock will use the corresponding attribute on the `spec` object as their
-    spec.
-
-    Functions or methods being mocked will have their arguments checked
-    to check that they are called with the correct signature.
-
-    If `spec_set` is True then attempting to set attributes that don't exist
-    on the spec object will raise an `AttributeError`.
-
-    If a class is used as a spec then the return value of the mock (the
-    instance of the class) will have the same spec. You can use a class as the
-    spec for an instance object by passing `instance=True`. The returned mock
-    will only be callable if instances of the mock are callable.
-
-    `create_autospec` also takes arbitrary keyword arguments that are passed to
-    the constructor of the created mock."""
-    if _is_list(spec):
-        # can't pass a list instance to the mock constructor as it will be
-        # interpreted as a list of strings
-        spec = type(spec)
-
-    is_type = isinstance(spec, ClassTypes)
-
-    _kwargs = {'spec': spec}
-    if spec_set:
-        _kwargs = {'spec_set': spec}
-    elif spec is None:
-        # None we mock with a normal mock without a spec
-        _kwargs = {}
-
-    _kwargs.update(kwargs)
-
-    Klass = MagicMock
-    if type(spec) in DescriptorTypes:
-        # descriptors don't have a spec
-        # because we don't know what type they return
-        _kwargs = {}
-    elif not _callable(spec):
-        Klass = NonCallableMagicMock
-    elif is_type and instance and not _instance_callable(spec):
-        Klass = NonCallableMagicMock
-
-    _new_name = _name
-    if _parent is None:
-        # for a top level object no _new_name should be set
-        _new_name = ''
-
-    mock = Klass(parent=_parent, _new_parent=_parent, _new_name=_new_name,
-                 name=_name, **_kwargs)
-
-    if isinstance(spec, FunctionTypes):
-        # should only happen at the top level because we don't
-        # recurse for functions
-        mock = _set_signature(mock, spec)
-    else:
-        _check_signature(spec, mock, is_type, instance)
-
-    if _parent is not None and not instance:
-        _parent._mock_children[_name] = mock
-
-    if is_type and not instance and 'return_value' not in kwargs:
-        mock.return_value = create_autospec(spec, spec_set, instance=True,
-                                            _name='()', _parent=mock)
-
-    for entry in dir(spec):
-        if _is_magic(entry):
-            # MagicMock already does the useful magic methods for us
-            continue
-
-        if isinstance(spec, FunctionTypes) and entry in FunctionAttributes:
-            # allow a mock to actually be a function
-            continue
-
-        # XXXX do we need a better way of getting attributes without
-        # triggering code execution (?) Probably not - we need the actual
-        # object to mock it so we would rather trigger a property than mock
-        # the property descriptor. Likewise we want to mock out dynamically
-        # provided attributes.
-        # XXXX what about attributes that raise exceptions other than
-        # AttributeError on being fetched?
-        # we could be resilient against it, or catch and propagate the
-        # exception when the attribute is fetched from the mock
-        try:
-            original = getattr(spec, entry)
-        except AttributeError:
-            continue
-
-        kwargs = {'spec': original}
-        if spec_set:
-            kwargs = {'spec_set': original}
-
-        if not isinstance(original, FunctionTypes):
-            new = _SpecState(original, spec_set, mock, entry, instance)
-            mock._mock_children[entry] = new
-        else:
-            parent = mock
-            if isinstance(spec, FunctionTypes):
-                parent = mock.mock
-
-            new = MagicMock(parent=parent, name=entry, _new_name=entry,
-                            _new_parent=parent, **kwargs)
-            mock._mock_children[entry] = new
-            skipfirst = _must_skip(spec, entry, is_type)
-            _check_signature(original, new, skipfirst=skipfirst)
-
-        # so functions created with _set_signature become instance attributes,
-        # *plus* their underlying mock exists in _mock_children of the parent
-        # mock. Adding to _mock_children may be unnecessary where we are also
-        # setting as an instance attribute?
-        if isinstance(new, FunctionTypes):
-            setattr(mock, entry, new)
-
-    return mock
-
-
-def _must_skip(spec, entry, is_type):
-    if not isinstance(spec, ClassTypes):
-        if entry in getattr(spec, '__dict__', {}):
-            # instance attribute - shouldn't skip
-            return False
-        spec = spec.__class__
-    if not hasattr(spec, '__mro__'):
-        # old style class: can't have descriptors anyway
-        return is_type
-
-    for klass in spec.__mro__:
-        result = klass.__dict__.get(entry, DEFAULT)
-        if result is DEFAULT:
-            continue
-        if isinstance(result, (staticmethod, classmethod)):
-            return False
-        return is_type
-
-    # shouldn't get here unless function is a dynamically provided attribute
-    # XXXX untested behaviour
-    return is_type
-
-
-def _get_class(obj):
-    try:
-        return obj.__class__
-    except AttributeError:
-        # in Python 2, _sre.SRE_Pattern objects have no __class__
-        return type(obj)
-
-
-class _SpecState(object):
-
-    def __init__(self, spec, spec_set=False, parent=None,
-                 name=None, ids=None, instance=False):
-        self.spec = spec
-        self.ids = ids
-        self.spec_set = spec_set
-        self.parent = parent
-        self.instance = instance
-        self.name = name
-
-
-FunctionTypes = (
-    # python function
-    type(create_autospec),
-    # instance method
-    type(ANY.__eq__),
-    # unbound method
-    type(_ANY.__eq__),
-)
-
-FunctionAttributes = set([
-    'func_closure',
-    'func_code',
-    'func_defaults',
-    'func_dict',
-    'func_doc',
-    'func_globals',
-    'func_name',
-])
-
-
-file_spec = None
-
-
-def mock_open(mock=None, read_data=''):
-    """
-    A helper function to create a mock to replace the use of `open`. It works
-    for `open` called directly or used as a context manager.
-
-    The `mock` argument is the mock object to configure. If `None` (the
-    default) then a `MagicMock` will be created for you, with the API limited
-    to methods or attributes available on standard file handles.
-
-    `read_data` is a string for the `read` method of the file handle to return.
-    This is an empty string by default.
-    """
-    global file_spec
-    if file_spec is None:
-        # set on first use
-        if inPy3k:
-            import _io
-            file_spec = list(set(dir(_io.TextIOWrapper)).union(set(dir(_io.BytesIO))))
-        else:
-            file_spec = file
-
-    if mock is None:
-        mock = MagicMock(name='open', spec=open)
-
-    handle = MagicMock(spec=file_spec)
-    handle.write.return_value = None
-    handle.__enter__.return_value = handle
-    handle.read.return_value = read_data
-
-    mock.return_value = handle
-    return mock
-
-
-class PropertyMock(Mock):
-    """
-    A mock intended to be used as a property, or other descriptor, on a class.
-    `PropertyMock` provides `__get__` and `__set__` methods so you can specify
-    a return value when it is fetched.
-
-    Fetching a `PropertyMock` instance from an object calls the mock, with
-    no args. Setting it calls the mock with the value being set.
-    """
-    def _get_child_mock(self, **kwargs):
-        return MagicMock(**kwargs)
-
-    def __get__(self, obj, obj_type):
-        return self()
-    def __set__(self, obj, val):
-        self(val)
-
diff --git a/doc/sphinxext/__init__.py b/doc/sphinxext/__init__.py
deleted file mode 100644
index 2caf15b1..00000000
--- a/doc/sphinxext/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from __future__ import print_function
-
diff --git a/doc/sphinxext/apigen.py b/doc/sphinxext/apigen.py
deleted file mode 100644
index 12374096..00000000
--- a/doc/sphinxext/apigen.py
+++ /dev/null
@@ -1,427 +0,0 @@
-"""Attempt to generate templates for module reference with Sphinx
-
-XXX - we exclude extension modules
-
-To include extension modules, first identify them as valid in the
-``_uri2path`` method, then handle them in the ``_parse_module`` script.
-
-We get functions and classes by parsing the text of .py files.
-Alternatively we could import the modules for discovery, and we'd have
-to do that for extension modules.  This would involve changing the
-``_parse_module`` method to work via import and introspection, and
-might involve changing ``discover_modules`` (which determines which
-files are modules, and therefore which module URIs will be passed to
-``_parse_module``).
-
-NOTE: this is a modified version of a script originally shipped with the
-PyMVPA project, which we've adapted for NIPY use.  PyMVPA is an MIT-licensed
-project."""
-
-# Stdlib imports
-import os
-import re
-
-# Functions and classes
-class ApiDocWriter(object):
-    ''' Class for automatic detection and parsing of API docs
-    to Sphinx-parsable reST format'''
-
-    # only separating first two levels
-    rst_section_levels = ['*', '=', '-', '~', '^']
-
-    def __init__(self,
-                 package_name,
-                 rst_extension='.rst',
-                 package_skip_patterns=None,
-                 module_skip_patterns=None,
-                 ):
-        ''' Initialize package for parsing
-
-        Parameters
-        ----------
-        package_name : string
-            Name of the top-level package.  *package_name* must be the
-            name of an importable package
-        rst_extension : string, optional
-            Extension for reST files, default '.rst'
-        package_skip_patterns : None or sequence of {strings, regexps}
-            Sequence of strings giving URIs of packages to be excluded
-            Operates on the package path, starting at (including) the
-            first dot in the package path, after *package_name* - so,
-            if *package_name* is ``sphinx``, then ``sphinx.util`` will
-            result in ``.util`` being passed for earching by these
-            regexps.  If is None, gives default. Default is:
-            ['\.tests$']
-        module_skip_patterns : None or sequence
-            Sequence of strings giving URIs of modules to be excluded
-            Operates on the module name including preceding URI path,
-            back to the first dot after *package_name*.  For example
-            ``sphinx.util.console`` results in the string to search of
-            ``.util.console``
-            If is None, gives default. Default is:
-            ['\.setup$', '\._']
-        '''
-        if package_skip_patterns is None:
-            package_skip_patterns = ['\\.tests$']
-        if module_skip_patterns is None:
-            module_skip_patterns = ['\\.setup$', '\\._']
-        self.package_name = package_name
-        self.rst_extension = rst_extension
-        self.package_skip_patterns = package_skip_patterns
-        self.module_skip_patterns = module_skip_patterns
-
-    def get_package_name(self):
-        return self._package_name
-
-    def set_package_name(self, package_name):
-        ''' Set package_name
-
-        >>> docwriter = ApiDocWriter('sphinx')
-        >>> import sphinx
-        >>> docwriter.root_path == sphinx.__path__[0]
-        True
-        >>> docwriter.package_name = 'docutils'
-        >>> import docutils
-        >>> docwriter.root_path == docutils.__path__[0]
-        True
-        '''
-        # It's also possible to imagine caching the module parsing here
-        self._package_name = package_name
-        self.root_module = __import__(package_name)
-        self.root_path = self.root_module.__path__[0]
-        self.written_modules = None
-
-    package_name = property(get_package_name, set_package_name, None,
-                            'get/set package_name')
-
-    def _get_object_name(self, line):
-        ''' Get second token in line
-        >>> docwriter = ApiDocWriter('sphinx')
-        >>> docwriter._get_object_name("  def func():  ")
-        'func'
-        >>> docwriter._get_object_name("  class Klass(object):  ")
-        'Klass'
-        >>> docwriter._get_object_name("  class Klass:  ")
-        'Klass'
-        '''
-        name = line.split()[1].split('(')[0].strip()
-        # in case we have classes which are not derived from object
-        # ie. old style classes
-        return name.rstrip(':')
-
-    def _uri2path(self, uri):
-        ''' Convert uri to absolute filepath
-
-        Parameters
-        ----------
-        uri : string
-            URI of python module to return path for
-
-        Returns
-        -------
-        path : None or string
-            Returns None if there is no valid path for this URI
-            Otherwise returns absolute file system path for URI
-
-        Examples
-        --------
-        >>> docwriter = ApiDocWriter('sphinx')
-        >>> import sphinx
-        >>> modpath = sphinx.__path__[0]
-        >>> res = docwriter._uri2path('sphinx.builder')
-        >>> res == os.path.join(modpath, 'builder.py')
-        True
-        >>> res = docwriter._uri2path('sphinx')
-        >>> res == os.path.join(modpath, '__init__.py')
-        True
-        >>> docwriter._uri2path('sphinx.does_not_exist')
-
-        '''
-        if uri == self.package_name:
-            return os.path.join(self.root_path, '__init__.py')
-        path = uri.replace('.', os.path.sep)
-        path = path.replace(self.package_name + os.path.sep, '')
-        path = os.path.join(self.root_path, path)
-        # XXX maybe check for extensions as well?
-        if os.path.exists(path + '.py'): # file
-            path += '.py'
-        elif os.path.exists(os.path.join(path, '__init__.py')):
-            path = os.path.join(path, '__init__.py')
-        else:
-            return None
-        return path
-
-    def _path2uri(self, dirpath):
-        ''' Convert directory path to uri '''
-        relpath = dirpath.replace(self.root_path, self.package_name)
-        if relpath.startswith(os.path.sep):
-            relpath = relpath[1:]
-        return relpath.replace(os.path.sep, '.')
-
-    def _parse_module(self, uri):
-        ''' Parse module defined in *uri* '''
-        filename = self._uri2path(uri)
-        if filename is None:
-            # nothing that we could handle here.
-            return ([],[])
-        f = open(filename, 'rt')
-        functions, classes = self._parse_lines(f)
-        f.close()
-        return functions, classes
-    
-    def _parse_lines(self, linesource):
-        ''' Parse lines of text for functions and classes '''
-        functions = []
-        classes = []
-        for line in linesource:
-            if line.startswith('def ') and line.count('('):
-                # exclude private stuff
-                name = self._get_object_name(line)
-                if not name.startswith('_'):
-                    functions.append(name)
-            elif line.startswith('class '):
-                # exclude private stuff
-                name = self._get_object_name(line)
-                if not name.startswith('_'):
-                    classes.append(name)
-            else:
-                pass
-        functions.sort()
-        classes.sort()
-        return functions, classes
-
-    def generate_api_doc(self, uri):
-        '''Make autodoc documentation template string for a module
-
-        Parameters
-        ----------
-        uri : string
-            python location of module - e.g 'sphinx.builder'
-
-        Returns
-        -------
-        S : string
-            Contents of API doc
-        '''
-        # get the names of all classes and functions
-        functions, classes = self._parse_module(uri)
-        if not len(functions) and not len(classes):
-            print 'WARNING: Empty -',uri  # dbg
-            return ''
-
-        # Make a shorter version of the uri that omits the package name for
-        # titles 
-        uri_short = re.sub(r'^%s\.' % self.package_name,'',uri)
-        
-        ad = '.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n'
-
-        chap_title = uri_short
-        ad += (chap_title+'\n'+ self.rst_section_levels[1] * len(chap_title)
-               + '\n\n')
-
-        # Set the chapter title to read 'module' for all modules except for the
-        # main packages
-        if '.' in uri:
-            title = 'Module: :mod:`' + uri_short + '`'
-        else:
-            title = ':mod:`' + uri_short + '`'
-        ad += title + '\n' + self.rst_section_levels[2] * len(title)
-
-        if len(classes):
-            ad += '\nInheritance diagram for ``%s``:\n\n' % uri
-            ad += '.. inheritance-diagram:: %s \n' % uri
-            ad += '   :parts: 3\n'
-
-        ad += '\n.. automodule:: ' + uri + '\n'
-        ad += '\n.. currentmodule:: ' + uri + '\n'
-        multi_class = len(classes) > 1
-        multi_fx = len(functions) > 1
-        if multi_class:
-            ad += '\n' + 'Classes' + '\n' + \
-                  self.rst_section_levels[2] * 7 + '\n'
-        elif len(classes) and multi_fx:
-            ad += '\n' + 'Class' + '\n' + \
-                  self.rst_section_levels[2] * 5 + '\n'
-        for c in classes:
-            ad += '\n:class:`' + c + '`\n' \
-                  + self.rst_section_levels[multi_class + 2 ] * \
-                  (len(c)+9) + '\n\n'
-            ad += '\n.. autoclass:: ' + c + '\n'
-            # must NOT exclude from index to keep cross-refs working
-            ad += '  :members:\n' \
-                  '  :undoc-members:\n' \
-                  '  :show-inheritance:\n' \
-                  '  :inherited-members:\n' \
-                  '\n' \
-                  '  .. automethod:: __init__\n'
-        if multi_fx:
-            ad += '\n' + 'Functions' + '\n' + \
-                  self.rst_section_levels[2] * 9 + '\n\n'
-        elif len(functions) and multi_class:
-            ad += '\n' + 'Function' + '\n' + \
-                  self.rst_section_levels[2] * 8 + '\n\n'
-        for f in functions:
-            # must NOT exclude from index to keep cross-refs working
-            ad += '\n.. autofunction:: ' + uri + '.' + f + '\n\n'
-        return ad
-
-    def _survives_exclude(self, matchstr, match_type):
-        ''' Returns True if *matchstr* does not match patterns
-
-        ``self.package_name`` removed from front of string if present
-
-        Examples
-        --------
-        >>> dw = ApiDocWriter('sphinx')
-        >>> dw._survives_exclude('sphinx.okpkg', 'package')
-        True
-        >>> dw.package_skip_patterns.append('^\\.badpkg$')
-        >>> dw._survives_exclude('sphinx.badpkg', 'package')
-        False
-        >>> dw._survives_exclude('sphinx.badpkg', 'module')
-        True
-        >>> dw._survives_exclude('sphinx.badmod', 'module')
-        True
-        >>> dw.module_skip_patterns.append('^\\.badmod$')
-        >>> dw._survives_exclude('sphinx.badmod', 'module')
-        False
-        '''
-        if match_type == 'module':
-            patterns = self.module_skip_patterns
-        elif match_type == 'package':
-            patterns = self.package_skip_patterns
-        else:
-            raise ValueError('Cannot interpret match type "%s"' 
-                             % match_type)
-        # Match to URI without package name
-        L = len(self.package_name)
-        if matchstr[:L] == self.package_name:
-            matchstr = matchstr[L:]
-        for pat in patterns:
-            try:
-                pat.search
-            except AttributeError:
-                pat = re.compile(pat)
-            if pat.search(matchstr):
-                return False
-        return True
-
-    def discover_modules(self):
-        ''' Return module sequence discovered from ``self.package_name`` 
-
-
-        Parameters
-        ----------
-        None
-
-        Returns
-        -------
-        mods : sequence
-            Sequence of module names within ``self.package_name``
-
-        Examples
-        --------
-        >>> dw = ApiDocWriter('sphinx')
-        >>> mods = dw.discover_modules()
-        >>> 'sphinx.util' in mods
-        True
-        >>> dw.package_skip_patterns.append('\.util$')
-        >>> 'sphinx.util' in dw.discover_modules()
-        False
-        >>> 
-        '''
-        modules = [self.package_name]
-        # raw directory parsing
-        for dirpath, dirnames, filenames in os.walk(self.root_path):
-            # Check directory names for packages
-            root_uri = self._path2uri(os.path.join(self.root_path,
-                                                   dirpath))
-            for dirname in dirnames[:]: # copy list - we modify inplace
-                package_uri = '.'.join((root_uri, dirname))
-                if (self._uri2path(package_uri) and
-                    self._survives_exclude(package_uri, 'package')):
-                    modules.append(package_uri)
-                else:
-                    dirnames.remove(dirname)
-            # Check filenames for modules
-            for filename in filenames:
-                module_name = filename[:-3]
-                module_uri = '.'.join((root_uri, module_name))
-                if (self._uri2path(module_uri) and
-                    self._survives_exclude(module_uri, 'module')):
-                    modules.append(module_uri)
-        return sorted(modules)
-    
-    def write_modules_api(self, modules,outdir):
-        # write the list
-        written_modules = []
-        for m in modules:
-            api_str = self.generate_api_doc(m)
-            if not api_str:
-                continue
-            # write out to file
-            outfile = os.path.join(outdir,
-                                   m + self.rst_extension)
-            fileobj = open(outfile, 'wt')
-            fileobj.write(api_str)
-            fileobj.close()
-            written_modules.append(m)
-        self.written_modules = written_modules
-
-    def write_api_docs(self, outdir):
-        """Generate API reST files.
-
-        Parameters
-        ----------
-        outdir : string
-            Directory name in which to store files
-            We create automatic filenames for each module
-            
-        Returns
-        -------
-        None
-
-        Notes
-        -----
-        Sets self.written_modules to list of written modules
-        """
-        if not os.path.exists(outdir):
-            os.mkdir(outdir)
-        # compose list of modules
-        modules = self.discover_modules()
-        self.write_modules_api(modules,outdir)
-        
-    def write_index(self, outdir, froot='gen', relative_to=None):
-        """Make a reST API index file from written files
-
-        Parameters
-        ----------
-        path : string
-            Filename to write index to
-        outdir : string
-            Directory to which to write generated index file
-        froot : string, optional
-            root (filename without extension) of filename to write to
-            Defaults to 'gen'.  We add ``self.rst_extension``.
-        relative_to : string
-            path to which written filenames are relative.  This
-            component of the written file path will be removed from
-            outdir, in the generated index.  Default is None, meaning,
-            leave path as it is.
-        """
-        if self.written_modules is None:
-            raise ValueError('No modules written')
-        # Get full filename path
-        path = os.path.join(outdir, froot+self.rst_extension)
-        # Path written into index is relative to rootpath
-        if relative_to is not None:
-            relpath = outdir.replace(relative_to + os.path.sep, '')
-        else:
-            relpath = outdir
-        idx = open(path,'wt')
-        w = idx.write
-        w('.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n')
-        w('.. toctree::\n\n')
-        for f in self.written_modules:
-            w('   %s\n' % os.path.join(relpath,f))
-        idx.close()
diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py
deleted file mode 100644
index f374b3dd..00000000
--- a/doc/sphinxext/docscrape.py
+++ /dev/null
@@ -1,497 +0,0 @@
-"""Extract reference documentation from the NumPy source tree.
-
-"""
-
-import inspect
-import textwrap
-import re
-import pydoc
-from StringIO import StringIO
-from warnings import warn
-4
-class Reader(object):
-    """A line-based string reader.
-
-    """
-    def __init__(self, data):
-        """
-        Parameters
-        ----------
-        data : str
-           String with lines separated by '\n'.
-
-        """
-        if isinstance(data,list):
-            self._str = data
-        else:
-            self._str = data.split('\n') # store string as list of lines
-
-        self.reset()
-
-    def __getitem__(self, n):
-        return self._str[n]
-
-    def reset(self):
-        self._l = 0 # current line nr
-
-    def read(self):
-        if not self.eof():
-            out = self[self._l]
-            self._l += 1
-            return out
-        else:
-            return ''
-
-    def seek_next_non_empty_line(self):
-        for l in self[self._l:]:
-            if l.strip():
-                break
-            else:
-                self._l += 1
-
-    def eof(self):
-        return self._l >= len(self._str)
-
-    def read_to_condition(self, condition_func):
-        start = self._l
-        for line in self[start:]:
-            if condition_func(line):
-                return self[start:self._l]
-            self._l += 1
-            if self.eof():
-                return self[start:self._l+1]
-        return []
-
-    def read_to_next_empty_line(self):
-        self.seek_next_non_empty_line()
-        def is_empty(line):
-            return not line.strip()
-        return self.read_to_condition(is_empty)
-
-    def read_to_next_unindented_line(self):
-        def is_unindented(line):
-            return (line.strip() and (len(line.lstrip()) == len(line)))
-        return self.read_to_condition(is_unindented)
-
-    def peek(self,n=0):
-        if self._l + n < len(self._str):
-            return self[self._l + n]
-        else:
-            return ''
-
-    def is_empty(self):
-        return not ''.join(self._str).strip()
-
-
-class NumpyDocString(object):
-    def __init__(self,docstring):
-        docstring = textwrap.dedent(docstring).split('\n')
-
-        self._doc = Reader(docstring)
-        self._parsed_data = {
-            'Signature': '',
-            'Summary': [''],
-            'Extended Summary': [],
-            'Parameters': [],
-            'Returns': [],
-            'Raises': [],
-            'Warns': [],
-            'Other Parameters': [],
-            'Attributes': [],
-            'Methods': [],
-            'See Also': [],
-            'Notes': [],
-            'Warnings': [],
-            'References': '',
-            'Examples': '',
-            'index': {}
-            }
-
-        self._parse()
-
-    def __getitem__(self,key):
-        return self._parsed_data[key]
-
-    def __setitem__(self,key,val):
-        if not self._parsed_data.has_key(key):
-            warn("Unknown section %s" % key)
-        else:
-            self._parsed_data[key] = val
-
-    def _is_at_section(self):
-        self._doc.seek_next_non_empty_line()
-
-        if self._doc.eof():
-            return False
-
-        l1 = self._doc.peek().strip()  # e.g. Parameters
-
-        if l1.startswith('.. index::'):
-            return True
-
-        l2 = self._doc.peek(1).strip() #    ---------- or ==========
-        return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
-
-    def _strip(self,doc):
-        i = 0
-        j = 0
-        for i,line in enumerate(doc):
-            if line.strip(): break
-
-        for j,line in enumerate(doc[::-1]):
-            if line.strip(): break
-
-        return doc[i:len(doc)-j]
-
-    def _read_to_next_section(self):
-        section = self._doc.read_to_next_empty_line()
-
-        while not self._is_at_section() and not self._doc.eof():
-            if not self._doc.peek(-1).strip(): # previous line was empty
-                section += ['']
-
-            section += self._doc.read_to_next_empty_line()
-
-        return section
-
-    def _read_sections(self):
-        while not self._doc.eof():
-            data = self._read_to_next_section()
-            name = data[0].strip()
-
-            if name.startswith('..'): # index section
-                yield name, data[1:]
-            elif len(data) < 2:
-                yield StopIteration
-            else:
-                yield name, self._strip(data[2:])
-
-    def _parse_param_list(self,content):
-        r = Reader(content)
-        params = []
-        while not r.eof():
-            header = r.read().strip()
-            if ' : ' in header:
-                arg_name, arg_type = header.split(' : ')[:2]
-            else:
-                arg_name, arg_type = header, ''
-
-            desc = r.read_to_next_unindented_line()
-            desc = dedent_lines(desc)
-
-            params.append((arg_name,arg_type,desc))
-
-        return params
-
-    
-    _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
-                           r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
-    def _parse_see_also(self, content):
-        """
-        func_name : Descriptive text
-            continued text
-        another_func_name : Descriptive text
-        func_name1, func_name2, :meth:`func_name`, func_name3
-
-        """
-        items = []
-
-        def parse_item_name(text):
-            """Match ':role:`name`' or 'name'"""
-            m = self._name_rgx.match(text)
-            if m:
-                g = m.groups()
-                if g[1] is None:
-                    return g[3], None
-                else:
-                    return g[2], g[1]
-            raise ValueError("%s is not a item name" % text)
-
-        def push_item(name, rest):
-            if not name:
-                return
-            name, role = parse_item_name(name)
-            items.append((name, list(rest), role))
-            del rest[:]
-
-        current_func = None
-        rest = []
-        
-        for line in content:
-            if not line.strip(): continue
-
-            m = self._name_rgx.match(line)
-            if m and line[m.end():].strip().startswith(':'):
-                push_item(current_func, rest)
-                current_func, line = line[:m.end()], line[m.end():]
-                rest = [line.split(':', 1)[1].strip()]
-                if not rest[0]:
-                    rest = []
-            elif not line.startswith(' '):
-                push_item(current_func, rest)
-                current_func = None
-                if ',' in line:
-                    for func in line.split(','):
-                        push_item(func, [])
-                elif line.strip():
-                    current_func = line
-            elif current_func is not None:
-                rest.append(line.strip())
-        push_item(current_func, rest)
-        return items
-
-    def _parse_index(self, section, content):
-        """
-        .. index: default
-           :refguide: something, else, and more
-
-        """
-        def strip_each_in(lst):
-            return [s.strip() for s in lst]
-
-        out = {}
-        section = section.split('::')
-        if len(section) > 1:
-            out['default'] = strip_each_in(section[1].split(','))[0]
-        for line in content:
-            line = line.split(':')
-            if len(line) > 2:
-                out[line[1]] = strip_each_in(line[2].split(','))
-        return out
-    
-    def _parse_summary(self):
-        """Grab signature (if given) and summary"""
-        if self._is_at_section():
-            return
-
-        summary = self._doc.read_to_next_empty_line()
-        summary_str = " ".join([s.strip() for s in summary]).strip()
-        if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
-            self['Signature'] = summary_str
-            if not self._is_at_section():
-                self['Summary'] = self._doc.read_to_next_empty_line()
-        else:
-            self['Summary'] = summary
-
-        if not self._is_at_section():
-            self['Extended Summary'] = self._read_to_next_section()
-    
-    def _parse(self):
-        self._doc.reset()
-        self._parse_summary()
-
-        for (section,content) in self._read_sections():
-            if not section.startswith('..'):
-                section = ' '.join([s.capitalize() for s in section.split(' ')])
-            if section in ('Parameters', 'Attributes', 'Methods',
-                           'Returns', 'Raises', 'Warns'):
-                self[section] = self._parse_param_list(content)
-            elif section.startswith('.. index::'):
-                self['index'] = self._parse_index(section, content)
-            elif section == 'See Also':
-                self['See Also'] = self._parse_see_also(content)
-            else:
-                self[section] = content
-
-    # string conversion routines
-
-    def _str_header(self, name, symbol='-'):
-        return [name, len(name)*symbol]
-
-    def _str_indent(self, doc, indent=4):
-        out = []
-        for line in doc:
-            out += [' '*indent + line]
-        return out
-
-    def _str_signature(self):
-        if self['Signature']:
-            return [self['Signature'].replace('*','\*')] + ['']
-        else:
-            return ['']
-
-    def _str_summary(self):
-        if self['Summary']:
-            return self['Summary'] + ['']
-        else:
-            return []
-
-    def _str_extended_summary(self):
-        if self['Extended Summary']:
-            return self['Extended Summary'] + ['']
-        else:
-            return []
-
-    def _str_param_list(self, name):
-        out = []
-        if self[name]:
-            out += self._str_header(name)
-            for param,param_type,desc in self[name]:
-                out += ['%s : %s' % (param, param_type)]
-                out += self._str_indent(desc)
-            out += ['']
-        return out
-
-    def _str_section(self, name):
-        out = []
-        if self[name]:
-            out += self._str_header(name)
-            out += self[name]
-            out += ['']
-        return out
-
-    def _str_see_also(self, func_role):
-        if not self['See Also']: return []
-        out = []
-        out += self._str_header("See Also")
-        last_had_desc = True
-        for func, desc, role in self['See Also']:
-            if role:
-                link = ':%s:`%s`' % (role, func)
-            elif func_role:
-                link = ':%s:`%s`' % (func_role, func)
-            else:
-                link = "`%s`_" % func
-            if desc or last_had_desc:
-                out += ['']
-                out += [link]
-            else:
-                out[-1] += ", %s" % link
-            if desc:
-                out += self._str_indent([' '.join(desc)])
-                last_had_desc = True
-            else:
-                last_had_desc = False
-        out += ['']
-        return out
-
-    def _str_index(self):
-        idx = self['index']
-        out = []
-        out += ['.. index:: %s' % idx.get('default','')]
-        for section, references in idx.iteritems():
-            if section == 'default':
-                continue
-            out += ['   :%s: %s' % (section, ', '.join(references))]
-        return out
-
-    def __str__(self, func_role=''):
-        out = []
-        out += self._str_signature()
-        out += self._str_summary()
-        out += self._str_extended_summary()
-        for param_list in ('Parameters','Returns','Raises'):
-            out += self._str_param_list(param_list)
-        out += self._str_section('Warnings')
-        out += self._str_see_also(func_role)
-        for s in ('Notes','References','Examples'):
-            out += self._str_section(s)
-        out += self._str_index()
-        return '\n'.join(out)
-
-
-def indent(str,indent=4):
-    indent_str = ' '*indent
-    if str is None:
-        return indent_str
-    lines = str.split('\n')
-    return '\n'.join(indent_str + l for l in lines)
-
-def dedent_lines(lines):
-    """Deindent a list of lines maximally"""
-    return textwrap.dedent("\n".join(lines)).split("\n")
-
-def header(text, style='-'):
-    return text + '\n' + style*len(text) + '\n'
-
-
-class FunctionDoc(NumpyDocString):
-    def __init__(self, func, role='func', doc=None):
-        self._f = func
-        self._role = role # e.g. "func" or "meth"
-        if doc is None:
-            doc = inspect.getdoc(func) or ''
-        try:
-            NumpyDocString.__init__(self, doc)
-        except ValueError, e:
-            print '*'*78
-            print "ERROR: '%s' while parsing `%s`" % (e, self._f)
-            print '*'*78
-            #print "Docstring follows:"
-            #print doclines
-            #print '='*78
-
-        if not self['Signature']:
-            func, func_name = self.get_func()
-            try:
-                # try to read signature
-                argspec = inspect.getargspec(func)
-                argspec = inspect.formatargspec(*argspec)
-                argspec = argspec.replace('*','\*')
-                signature = '%s%s' % (func_name, argspec)
-            except TypeError, e:
-                signature = '%s()' % func_name
-            self['Signature'] = signature
-
-    def get_func(self):
-        func_name = getattr(self._f, '__name__', self.__class__.__name__)
-        if inspect.isclass(self._f):
-            func = getattr(self._f, '__call__', self._f.__init__)
-        else:
-            func = self._f
-        return func, func_name
-            
-    def __str__(self):
-        out = ''
-
-        func, func_name = self.get_func()
-        signature = self['Signature'].replace('*', '\*')
-
-        roles = {'func': 'function',
-                 'meth': 'method'}
-
-        if self._role:
-            if not roles.has_key(self._role):
-                print "Warning: invalid role %s" % self._role
-            out += '.. %s:: %s\n    \n\n' % (roles.get(self._role,''),
-                                             func_name)
-
-        out += super(FunctionDoc, self).__str__(func_role=self._role)
-        return out
-
-
-class ClassDoc(NumpyDocString):
-    def __init__(self,cls,modulename='',func_doc=FunctionDoc,doc=None):
-        if not inspect.isclass(cls):
-            raise ValueError("Initialise using a class. Got %r" % cls)
-        self._cls = cls
-
-        if modulename and not modulename.endswith('.'):
-            modulename += '.'
-        self._mod = modulename
-        self._name = cls.__name__
-        self._func_doc = func_doc
-
-        if doc is None:
-            doc = pydoc.getdoc(cls)
-
-        NumpyDocString.__init__(self, doc)
-
-    @property
-    def methods(self):
-        return [name for name,func in inspect.getmembers(self._cls)
-                if not name.startswith('_') and callable(func)]
-
-    def __str__(self):
-        out = ''
-        out += super(ClassDoc, self).__str__()
-        out += "\n\n"
-
-        #for m in self.methods:
-        #    print "Parsing `%s`" % m
-        #    out += str(self._func_doc(getattr(self._cls,m), 'meth')) + '\n\n'
-        #    out += '.. index::\n   single: %s; %s\n\n' % (self._name, m)
-
-        return out
-
-
diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py
deleted file mode 100644
index 77ed271b..00000000
--- a/doc/sphinxext/docscrape_sphinx.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import re, inspect, textwrap, pydoc
-from docscrape import NumpyDocString, FunctionDoc, ClassDoc
-
-class SphinxDocString(NumpyDocString):
-    # string conversion routines
-    def _str_header(self, name, symbol='`'):
-        return ['.. rubric:: ' + name, '']
-
-    def _str_field_list(self, name):
-        return [':' + name + ':']
-
-    def _str_indent(self, doc, indent=4):
-        out = []
-        for line in doc:
-            out += [' '*indent + line]
-        return out
-
-    def _str_signature(self):
-        return ['']
-        if self['Signature']:
-            return ['``%s``' % self['Signature']] + ['']
-        else:
-            return ['']
-
-    def _str_summary(self):
-        return self['Summary'] + ['']
-
-    def _str_extended_summary(self):
-        return self['Extended Summary'] + ['']
-
-    def _str_param_list(self, name):
-        out = []
-        if self[name]:
-            out += self._str_field_list(name)
-            out += ['']
-            for param,param_type,desc in self[name]:
-                out += self._str_indent(['**%s** : %s' % (param.strip(),
-                                                          param_type)])
-                out += ['']
-                out += self._str_indent(desc,8)
-                out += ['']
-        return out
-
-    def _str_section(self, name):
-        out = []
-        if self[name]:
-            out += self._str_header(name)
-            out += ['']
-            content = textwrap.dedent("\n".join(self[name])).split("\n")
-            out += content
-            out += ['']
-        return out
-
-    def _str_see_also(self, func_role):
-        out = []
-        if self['See Also']:
-            see_also = super(SphinxDocString, self)._str_see_also(func_role)
-            out = ['.. seealso::', '']
-            out += self._str_indent(see_also[2:])
-        return out
-
-    def _str_warnings(self):
-        out = []
-        if self['Warnings']:
-            out = ['.. warning::', '']
-            out += self._str_indent(self['Warnings'])
-        return out
-
-    def _str_index(self):
-        idx = self['index']
-        out = []
-        if len(idx) == 0:
-            return out
-
-        out += ['.. index:: %s' % idx.get('default','')]
-        for section, references in idx.iteritems():
-            if section == 'default':
-                continue
-            elif section == 'refguide':
-                out += ['   single: %s' % (', '.join(references))]
-            else:
-                out += ['   %s: %s' % (section, ','.join(references))]
-        return out
-
-    def _str_references(self):
-        out = []
-        if self['References']:
-            out += self._str_header('References')
-            if isinstance(self['References'], str):
-                self['References'] = [self['References']]
-            out.extend(self['References'])
-            out += ['']
-        return out
-
-    def __str__(self, indent=0, func_role="obj"):
-        out = []
-        out += self._str_signature()
-        out += self._str_index() + ['']
-        out += self._str_summary()
-        out += self._str_extended_summary()
-        for param_list in ('Parameters', 'Attributes', 'Methods',
-                           'Returns','Raises'):
-            out += self._str_param_list(param_list)
-        out += self._str_warnings()
-        out += self._str_see_also(func_role)
-        out += self._str_section('Notes')
-        out += self._str_references()
-        out += self._str_section('Examples')
-        out = self._str_indent(out,indent)
-        return '\n'.join(out)
-
-class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
-    pass
-
-class SphinxClassDoc(SphinxDocString, ClassDoc):
-    pass
-
-def get_doc_object(obj, what=None, doc=None):
-    if what is None:
-        if inspect.isclass(obj):
-            what = 'class'
-        elif inspect.ismodule(obj):
-            what = 'module'
-        elif callable(obj):
-            what = 'function'
-        else:
-            what = 'object'
-    if what == 'class':
-        return SphinxClassDoc(obj, '', func_doc=SphinxFunctionDoc, doc=doc)
-    elif what in ('function', 'method'):
-        return SphinxFunctionDoc(obj, '', doc=doc)
-    else:
-        if doc is None:
-            doc = pydoc.getdoc(obj)
-        return SphinxDocString(doc)
-
diff --git a/doc/sphinxext/inheritance_diagram.py b/doc/sphinxext/inheritance_diagram.py
deleted file mode 100644
index 407fc13f..00000000
--- a/doc/sphinxext/inheritance_diagram.py
+++ /dev/null
@@ -1,407 +0,0 @@
-"""
-Defines a docutils directive for inserting inheritance diagrams.
-
-Provide the directive with one or more classes or modules (separated
-by whitespace).  For modules, all of the classes in that module will
-be used.
-
-Example::
-
-   Given the following classes:
-
-   class A: pass
-   class B(A): pass
-   class C(A): pass
-   class D(B, C): pass
-   class E(B): pass
-
-   .. inheritance-diagram: D E
-
-   Produces a graph like the following:
-
-               A
-              / \
-             B   C
-            / \ /
-           E   D
-
-The graph is inserted as a PNG+image map into HTML and a PDF in
-LaTeX.
-"""
-
-import inspect
-import os
-import re
-import subprocess
-try:
-    from hashlib import md5
-except ImportError:
-    from md5 import md5
-
-from docutils.nodes import Body, Element
-from docutils.parsers.rst import directives
-from sphinx.roles import xfileref_role
-
-def my_import(name):
-    """Module importer - taken from the python documentation.
-
-    This function allows importing names with dots in them."""
-    
-    mod = __import__(name)
-    components = name.split('.')
-    for comp in components[1:]:
-        mod = getattr(mod, comp)
-    return mod
-
-class DotException(Exception):
-    pass
-
-class InheritanceGraph(object):
-    """
-    Given a list of classes, determines the set of classes that
-    they inherit from all the way to the root "object", and then
-    is able to generate a graphviz dot graph from them.
-    """
-    def __init__(self, class_names, show_builtins=False):
-        """
-        *class_names* is a list of child classes to show bases from.
-
-        If *show_builtins* is True, then Python builtins will be shown
-        in the graph.
-        """
-        self.class_names = class_names
-        self.classes = self._import_classes(class_names)
-        self.all_classes = self._all_classes(self.classes)
-        if len(self.all_classes) == 0:
-            raise ValueError("No classes found for inheritance diagram")
-        self.show_builtins = show_builtins
-
-    py_sig_re = re.compile(r'''^([\w.]*\.)?    # class names
-                           (\w+)  \s* $        # optionally arguments
-                           ''', re.VERBOSE)
-
-    def _import_class_or_module(self, name):
-        """
-        Import a class using its fully-qualified *name*.
-        """
-        try:
-            path, base = self.py_sig_re.match(name).groups()
-        except:
-            raise ValueError(
-                "Invalid class or module '%s' specified for inheritance diagram" % name)
-        fullname = (path or '') + base
-        path = (path and path.rstrip('.'))
-        if not path:
-            path = base
-        try:
-            module = __import__(path, None, None, [])
-            # We must do an import of the fully qualified name.  Otherwise if a
-            # subpackage 'a.b' is requested where 'import a' does NOT provide
-            # 'a.b' automatically, then 'a.b' will not be found below.  This
-            # second call will force the equivalent of 'import a.b' to happen
-            # after the top-level import above.
-            my_import(fullname)
-            
-        except ImportError:
-            raise ValueError(
-                "Could not import class or module '%s' specified for inheritance diagram" % name)
-
-        try:
-            todoc = module
-            for comp in fullname.split('.')[1:]:
-                todoc = getattr(todoc, comp)
-        except AttributeError:
-            raise ValueError(
-                "Could not find class or module '%s' specified for inheritance diagram" % name)
-
-        # If a class, just return it
-        if inspect.isclass(todoc):
-            return [todoc]
-        elif inspect.ismodule(todoc):
-            classes = []
-            for cls in todoc.__dict__.values():
-                if inspect.isclass(cls) and cls.__module__ == todoc.__name__:
-                    classes.append(cls)
-            return classes
-        raise ValueError(
-            "'%s' does not resolve to a class or module" % name)
-
-    def _import_classes(self, class_names):
-        """
-        Import a list of classes.
-        """
-        classes = []
-        for name in class_names:
-            classes.extend(self._import_class_or_module(name))
-        return classes
-
-    def _all_classes(self, classes):
-        """
-        Return a list of all classes that are ancestors of *classes*.
-        """
-        all_classes = {}
-
-        def recurse(cls):
-            all_classes[cls] = None
-            for c in cls.__bases__:
-                if c not in all_classes:
-                    recurse(c)
-
-        for cls in classes:
-            recurse(cls)
-
-        return all_classes.keys()
-
-    def class_name(self, cls, parts=0):
-        """
-        Given a class object, return a fully-qualified name.  This
-        works for things I've tested in matplotlib so far, but may not
-        be completely general.
-        """
-        module = cls.__module__
-        if module == '__builtin__':
-            fullname = cls.__name__
-        else:
-            fullname = "%s.%s" % (module, cls.__name__)
-        if parts == 0:
-            return fullname
-        name_parts = fullname.split('.')
-        return '.'.join(name_parts[-parts:])
-
-    def get_all_class_names(self):
-        """
-        Get all of the class names involved in the graph.
-        """
-        return [self.class_name(x) for x in self.all_classes]
-
-    # These are the default options for graphviz
-    default_graph_options = {
-        "rankdir": "LR",
-        "size": '"8.0, 12.0"'
-        }
-    default_node_options = {
-        "shape": "box",
-        "fontsize": 10,
-        "height": 0.25,
-        "fontname": "Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",
-        "style": '"setlinewidth(0.5)"'
-        }
-    default_edge_options = {
-        "arrowsize": 0.5,
-        "style": '"setlinewidth(0.5)"'
-        }
-
-    def _format_node_options(self, options):
-        return ','.join(["%s=%s" % x for x in options.items()])
-    def _format_graph_options(self, options):
-        return ''.join(["%s=%s;\n" % x for x in options.items()])
-
-    def generate_dot(self, fd, name, parts=0, urls={},
-                     graph_options={}, node_options={},
-                     edge_options={}):
-        """
-        Generate a graphviz dot graph from the classes that
-        were passed in to __init__.
-
-        *fd* is a Python file-like object to write to.
-
-        *name* is the name of the graph
-
-        *urls* is a dictionary mapping class names to http urls
-
-        *graph_options*, *node_options*, *edge_options* are
-        dictionaries containing key/value pairs to pass on as graphviz
-        properties.
-        """
-        g_options = self.default_graph_options.copy()
-        g_options.update(graph_options)
-        n_options = self.default_node_options.copy()
-        n_options.update(node_options)
-        e_options = self.default_edge_options.copy()
-        e_options.update(edge_options)
-
-        fd.write('digraph %s {\n' % name)
-        fd.write(self._format_graph_options(g_options))
-
-        for cls in self.all_classes:
-            if not self.show_builtins and cls in __builtins__.values():
-                continue
-
-            name = self.class_name(cls, parts)
-
-            # Write the node
-            this_node_options = n_options.copy()
-            url = urls.get(self.class_name(cls))
-            if url is not None:
-                this_node_options['URL'] = '"%s"' % url
-            fd.write('  "%s" [%s];\n' %
-                     (name, self._format_node_options(this_node_options)))
-
-            # Write the edges
-            for base in cls.__bases__:
-                if not self.show_builtins and base in __builtins__.values():
-                    continue
-
-                base_name = self.class_name(base, parts)
-                fd.write('  "%s" -> "%s" [%s];\n' %
-                         (base_name, name,
-                          self._format_node_options(e_options)))
-        fd.write('}\n')
-
-    def run_dot(self, args, name, parts=0, urls={},
-                graph_options={}, node_options={}, edge_options={}):
-        """
-        Run graphviz 'dot' over this graph, returning whatever 'dot'
-        writes to stdout.
-
-        *args* will be passed along as commandline arguments.
-
-        *name* is the name of the graph
-
-        *urls* is a dictionary mapping class names to http urls
-
-        Raises DotException for any of the many os and
-        installation-related errors that may occur.
-        """
-        try:
-            dot = subprocess.Popen(['dot'] + list(args),
-                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE,
-                                   close_fds=True)
-        except OSError:
-            raise DotException("Could not execute 'dot'.  Are you sure you have 'graphviz' installed?")
-        except ValueError:
-            raise DotException("'dot' called with invalid arguments")
-        except:
-            raise DotException("Unexpected error calling 'dot'")
-
-        self.generate_dot(dot.stdin, name, parts, urls, graph_options,
-                          node_options, edge_options)
-        dot.stdin.close()
-        result = dot.stdout.read()
-        returncode = dot.wait()
-        if returncode != 0:
-            raise DotException("'dot' returned the errorcode %d" % returncode)
-        return result
-
-class inheritance_diagram(Body, Element):
-    """
-    A docutils node to use as a placeholder for the inheritance
-    diagram.
-    """
-    pass
-
-def inheritance_diagram_directive(name, arguments, options, content, lineno,
-                                  content_offset, block_text, state,
-                                  state_machine):
-    """
-    Run when the inheritance_diagram directive is first encountered.
-    """
-    node = inheritance_diagram()
-
-    class_names = arguments
-
-    # Create a graph starting with the list of classes
-    graph = InheritanceGraph(class_names)
-
-    # Create xref nodes for each target of the graph's image map and
-    # add them to the doc tree so that Sphinx can resolve the
-    # references to real URLs later.  These nodes will eventually be
-    # removed from the doctree after we're done with them.
-    for name in graph.get_all_class_names():
-        refnodes, x = xfileref_role(
-            'class', ':class:`%s`' % name, name, 0, state)
-        node.extend(refnodes)
-    # Store the graph object so we can use it to generate the
-    # dot file later
-    node['graph'] = graph
-    # Store the original content for use as a hash
-    node['parts'] = options.get('parts', 0)
-    node['content'] = " ".join(class_names)
-    return [node]
-
-def get_graph_hash(node):
-    return md5(node['content'] + str(node['parts'])).hexdigest()[-10:]
-
-def html_output_graph(self, node):
-    """
-    Output the graph for HTML.  This will insert a PNG with clickable
-    image map.
-    """
-    graph = node['graph']
-    parts = node['parts']
-
-    graph_hash = get_graph_hash(node)
-    name = "inheritance%s" % graph_hash
-    path = '_images'
-    dest_path = os.path.join(setup.app.builder.outdir, path)
-    if not os.path.exists(dest_path):
-        os.makedirs(dest_path)
-    png_path = os.path.join(dest_path, name + ".png")
-    path = setup.app.builder.imgpath
-
-    # Create a mapping from fully-qualified class names to URLs.
-    urls = {}
-    for child in node:
-        if child.get('refuri') is not None:
-            urls[child['reftitle']] = child.get('refuri')
-        elif child.get('refid') is not None:
-            urls[child['reftitle']] = '#' + child.get('refid')
-
-    # These arguments to dot will save a PNG file to disk and write
-    # an HTML image map to stdout.
-    image_map = graph.run_dot(['-Tpng', '-o%s' % png_path, '-Tcmapx'],
-                              name, parts, urls)
-    return ('<img src="%s/%s.png" usemap="#%s" class="inheritance"/>%s' %
-            (path, name, name, image_map))
-
-def latex_output_graph(self, node):
-    """
-    Output the graph for LaTeX.  This will insert a PDF.
-    """
-    graph = node['graph']
-    parts = node['parts']
-
-    graph_hash = get_graph_hash(node)
-    name = "inheritance%s" % graph_hash
-    dest_path = os.path.abspath(os.path.join(setup.app.builder.outdir, '_images'))
-    if not os.path.exists(dest_path):
-        os.makedirs(dest_path)
-    pdf_path = os.path.abspath(os.path.join(dest_path, name + ".pdf"))
-
-    graph.run_dot(['-Tpdf', '-o%s' % pdf_path],
-                  name, parts, graph_options={'size': '"6.0,6.0"'})
-    return '\n\\includegraphics{%s}\n\n' % pdf_path
-
-def visit_inheritance_diagram(inner_func):
-    """
-    This is just a wrapper around html/latex_output_graph to make it
-    easier to handle errors and insert warnings.
-    """
-    def visitor(self, node):
-        try:
-            content = inner_func(self, node)
-        except DotException, e:
-            # Insert the exception as a warning in the document
-            warning = self.document.reporter.warning(str(e), line=node.line)
-            warning.parent = node
-            node.children = [warning]
-        else:
-            source = self.document.attributes['source']
-            self.body.append(content)
-            node.children = []
-    return visitor
-
-def do_nothing(self, node):
-    pass
-
-def setup(app):
-    setup.app = app
-    setup.confdir = app.confdir
-
-    app.add_node(
-        inheritance_diagram,
-        latex=(visit_inheritance_diagram(latex_output_graph), do_nothing),
-        html=(visit_inheritance_diagram(html_output_graph), do_nothing))
-    app.add_directive(
-        'inheritance-diagram', inheritance_diagram_directive,
-        False, (1, 100, 0), parts = directives.nonnegative_int)
diff --git a/doc/sphinxext/ipython_console_highlighting.py b/doc/sphinxext/ipython_console_highlighting.py
index c9bf1c15..f5cced41 100644
--- a/doc/sphinxext/ipython_console_highlighting.py
+++ b/doc/sphinxext/ipython_console_highlighting.py
@@ -4,7 +4,6 @@ XXX - See what improvements can be made based on the new (as of Sept 2009)
 'pycon' lexer for the python console.  At the very least it will give better
 highlighted tracebacks.
 """
-from __future__ import print_function
 
 #-----------------------------------------------------------------------------
 # Needed modules
@@ -113,3 +112,4 @@ def setup(app):
 #-----------------------------------------------------------------------------
 # Register the extension as a valid pygments lexer
 highlighting.lexers['ipython'] = IPythonConsoleLexer()
+
diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py
index 79cd2aed..2c2696c1 100644
--- a/doc/sphinxext/ipython_directive.py
+++ b/doc/sphinxext/ipython_directive.py
@@ -71,14 +71,17 @@ except ImportError:
     from md5 import md5
 
 # Third-party
-import matplotlib
+try:
+    import matplotlib
+    matplotlib.use('Agg')
+except ImportError:
+    print "Couldn't find matplotlib"
+
 import sphinx
 from docutils.parsers.rst import directives
 from docutils import nodes
 from sphinx.util.compat import Directive
 
-matplotlib.use('Agg')
-
 # Our own
 from IPython import Config, InteractiveShell
 from IPython.core.profiledir import ProfileDir
@@ -828,3 +831,5 @@ if __name__=='__main__':
         os.mkdir('_static')
     test()
     print 'All OK? Check figures in _static/'
+
+
diff --git a/doc/sphinxext/mathmpl.py b/doc/sphinxext/mathmpl.py
deleted file mode 100644
index 0c126a66..00000000
--- a/doc/sphinxext/mathmpl.py
+++ /dev/null
@@ -1,120 +0,0 @@
-from __future__ import print_function
-import os
-import sys
-try:
-    from hashlib import md5
-except ImportError:
-    from md5 import md5
-
-from docutils import nodes
-from docutils.parsers.rst import directives
-import warnings
-
-from matplotlib import rcParams
-from matplotlib.mathtext import MathTextParser
-rcParams['mathtext.fontset'] = 'cm'
-mathtext_parser = MathTextParser("Bitmap")
-
-# Define LaTeX math node:
-class latex_math(nodes.General, nodes.Element):
-    pass
-
-def fontset_choice(arg):
-    return directives.choice(arg, ['cm', 'stix', 'stixsans'])
-
-options_spec = {'fontset': fontset_choice}
-
-def math_role(role, rawtext, text, lineno, inliner,
-              options={}, content=[]):
-    i = rawtext.find('`')
-    latex = rawtext[i+1:-1]
-    node = latex_math(rawtext)
-    node['latex'] = latex
-    node['fontset'] = options.get('fontset', 'cm')
-    return [node], []
-math_role.options = options_spec
-
-def math_directive(name, arguments, options, content, lineno,
-                   content_offset, block_text, state, state_machine):
-    latex = ''.join(content)
-    node = latex_math(block_text)
-    node['latex'] = latex
-    node['fontset'] = options.get('fontset', 'cm')
-    return [node]
-
-# This uses mathtext to render the expression
-def latex2png(latex, filename, fontset='cm'):
-    latex = "$%s$" % latex
-    orig_fontset = rcParams['mathtext.fontset']
-    rcParams['mathtext.fontset'] = fontset
-    if os.path.exists(filename):
-        depth = mathtext_parser.get_depth(latex, dpi=100)
-    else:
-        try:
-            depth = mathtext_parser.to_png(filename, latex, dpi=100)
-        except:
-            warnings.warn("Could not render math expression %s" % latex,
-                          Warning)
-            depth = 0
-    rcParams['mathtext.fontset'] = orig_fontset
-    sys.stdout.write("#")
-    sys.stdout.flush()
-    return depth
-
-# LaTeX to HTML translation stuff:
-def latex2html(node, source):
-    inline = isinstance(node.parent, nodes.TextElement)
-    latex = node['latex']
-    name = 'math-%s' % md5(latex).hexdigest()[-10:]
-
-    destdir = os.path.join(setup.app.builder.outdir, '_images', 'mathmpl')
-    if not os.path.exists(destdir):
-        os.makedirs(destdir)
-    dest = os.path.join(destdir, '%s.png' % name)
-    path = os.path.join(setup.app.builder.imgpath, 'mathmpl')
-
-    depth = latex2png(latex, dest, node['fontset'])
-
-    if inline:
-        cls = ''
-    else:
-        cls = 'class="center" '
-    if inline and depth != 0:
-        style = 'style="position: relative; bottom: -%dpx"' % (depth + 1)
-    else:
-        style = ''
-
-    return '<img src="%s/%s.png" %s%s/>' % (path, name, cls, style)
-
-def setup(app):
-    setup.app = app
-
-    app.add_node(latex_math)
-    app.add_role('math', math_role)
-
-    # Add visit/depart methods to HTML-Translator:
-    def visit_latex_math_html(self, node):
-        source = self.document.attributes['source']
-        self.body.append(latex2html(node, source))
-    def depart_latex_math_html(self, node):
-        pass
-
-    # Add visit/depart methods to LaTeX-Translator:
-    def visit_latex_math_latex(self, node):
-        inline = isinstance(node.parent, nodes.TextElement)
-        if inline:
-            self.body.append('$%s$' % node['latex'])
-        else:
-            self.body.extend(['\\begin{equation}',
-                              node['latex'],
-                              '\\end{equation}'])
-    def depart_latex_math_latex(self, node):
-        pass
-
-    app.add_node(latex_math, html=(visit_latex_math_html,
-                                   depart_latex_math_html))
-    app.add_node(latex_math, latex=(visit_latex_math_latex,
-                                    depart_latex_math_latex))
-    app.add_role('math', math_role)
-    app.add_directive('math', math_directive,
-                      True, (0, 0, 0), **options_spec)
diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py
deleted file mode 100644
index ff6c44c5..00000000
--- a/doc/sphinxext/numpydoc.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""
-========
-numpydoc
-========
-
-Sphinx extension that handles docstrings in the Numpy standard format. [1]
-
-It will:
-
-- Convert Parameters etc. sections to field lists.
-- Convert See Also section to a See also entry.
-- Renumber references.
-- Extract the signature from the docstring, if it can't be determined otherwise.
-
-.. [1] http://projects.scipy.org/scipy/numpy/wiki/CodingStyleGuidelines#docstring-standard
-
-"""
-
-import os, re, pydoc
-from docscrape_sphinx import get_doc_object, SphinxDocString
-import inspect
-
-def mangle_docstrings(app, what, name, obj, options, lines,
-                      reference_offset=[0]):
-    if what == 'module':
-        # Strip top title
-        title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
-                              re.I|re.S)
-        lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
-    else:
-        doc = get_doc_object(obj, what, "\n".join(lines))
-        lines[:] = str(doc).split("\n")
-
-    if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
-           obj.__name__:
-        if hasattr(obj, '__module__'):
-            v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
-        else:
-            v = dict(full_name=obj.__name__)
-        lines += ['', '.. htmlonly::', '']
-        lines += ['    %s' % x for x in
-                  (app.config.numpydoc_edit_link % v).split("\n")]
-
-    # replace reference numbers so that there are no duplicates
-    references = []
-    for l in lines:
-        l = l.strip()
-        if l.startswith('.. ['):
-            try:
-                references.append(int(l[len('.. ['):l.index(']')]))
-            except ValueError:
-                print "WARNING: invalid reference in %s docstring" % name
-
-    # Start renaming from the biggest number, otherwise we may
-    # overwrite references.
-    references.sort()
-    if references:
-        for i, line in enumerate(lines):
-            for r in references:
-                new_r = reference_offset[0] + r
-                lines[i] = lines[i].replace('[%d]_' % r,
-                                            '[%d]_' % new_r)
-                lines[i] = lines[i].replace('.. [%d]' % r,
-                                            '.. [%d]' % new_r)
-
-    reference_offset[0] += len(references)
-
-def mangle_signature(app, what, name, obj, options, sig, retann):
-    # Do not try to inspect classes that don't define `__init__`
-    if (inspect.isclass(obj) and
-        'initializes x; see ' in pydoc.getdoc(obj.__init__)):
-        return '', ''
-
-    if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return
-    if not hasattr(obj, '__doc__'): return
-
-    doc = SphinxDocString(pydoc.getdoc(obj))
-    if doc['Signature']:
-        sig = re.sub("^[^(]*", "", doc['Signature'])
-        return sig, ''
-
-def initialize(app):
-    try:
-        app.connect('autodoc-process-signature', mangle_signature)
-    except:
-        monkeypatch_sphinx_ext_autodoc()
-
-def setup(app, get_doc_object_=get_doc_object):
-    global get_doc_object
-    get_doc_object = get_doc_object_
-    
-    app.connect('autodoc-process-docstring', mangle_docstrings)
-    app.connect('builder-inited', initialize)
-    app.add_config_value('numpydoc_edit_link', None, True)
-
-#------------------------------------------------------------------------------
-# Monkeypatch sphinx.ext.autodoc to accept argspecless autodocs (Sphinx < 0.5)
-#------------------------------------------------------------------------------
-
-def monkeypatch_sphinx_ext_autodoc():
-    global _original_format_signature
-    import sphinx.ext.autodoc
-
-    if sphinx.ext.autodoc.format_signature is our_format_signature:
-        return
-
-    print "[numpydoc] Monkeypatching sphinx.ext.autodoc ..."
-    _original_format_signature = sphinx.ext.autodoc.format_signature
-    sphinx.ext.autodoc.format_signature = our_format_signature
-
-def our_format_signature(what, obj):
-    r = mangle_signature(None, what, None, obj, None, None, None)
-    if r is not None:
-        return r[0]
-    else:
-        return _original_format_signature(what, obj)
diff --git a/doc/sphinxext/only_directives.py b/doc/sphinxext/only_directives.py
deleted file mode 100644
index 9d8d0bb0..00000000
--- a/doc/sphinxext/only_directives.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# A pair of directives for inserting content that will only appear in
-# either html or latex.
-#
-
-from __future__ import print_function
-from docutils.nodes import Body, Element
-from docutils.parsers.rst import directives
-
-class only_base(Body, Element):
-    def dont_traverse(self, *args, **kwargs):
-        return []
-
-class html_only(only_base):
-    pass
-
-class latex_only(only_base):
-    pass
-
-def run(content, node_class, state, content_offset):
-    text = '\n'.join(content)
-    node = node_class(text)
-    state.nested_parse(content, content_offset, node)
-    return [node]
-
-def html_only_directive(name, arguments, options, content, lineno,
-                        content_offset, block_text, state, state_machine):
-    return run(content, html_only, state, content_offset)
-
-def latex_only_directive(name, arguments, options, content, lineno,
-                         content_offset, block_text, state, state_machine):
-    return run(content, latex_only, state, content_offset)
-
-def builder_inited(app):
-    if app.builder.name == 'html':
-        latex_only.traverse = only_base.dont_traverse
-    else:
-        html_only.traverse = only_base.dont_traverse
-
-def setup(app):
-    app.add_directive('htmlonly', html_only_directive, True, (0, 0, 0))
-    app.add_directive('latexonly', latex_only_directive, True, (0, 0, 0))
-    app.add_node(html_only)
-    app.add_node(latex_only)
-
-    # This will *really* never see the light of day As it turns out,
-    # this results in "broken" image nodes since they never get
-    # processed, so best not to do this.
-    # app.connect('builder-inited', builder_inited)
-
-    # Add visit/depart methods to HTML-Translator:
-    def visit_perform(self, node):
-        pass
-    def depart_perform(self, node):
-        pass
-    def visit_ignore(self, node):
-        node.children = []
-    def depart_ignore(self, node):
-        node.children = []
-
-    app.add_node(html_only, html=(visit_perform, depart_perform))
-    app.add_node(html_only, latex=(visit_ignore, depart_ignore))
-    app.add_node(latex_only, latex=(visit_perform, depart_perform))
-    app.add_node(latex_only, html=(visit_ignore, depart_ignore))
diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py
deleted file mode 100644
index ac96d5fa..00000000
--- a/doc/sphinxext/plot_directive.py
+++ /dev/null
@@ -1,819 +0,0 @@
-"""
-A directive for including a matplotlib plot in a Sphinx document.
-
-By default, in HTML output, `plot` will include a .png file with a
-link to a high-res .png and .pdf.  In LaTeX output, it will include a
-.pdf.
-
-The source code for the plot may be included in one of three ways:
-
-  1. **A path to a source file** as the argument to the directive::
-
-       .. plot:: path/to/plot.py
-
-     When a path to a source file is given, the content of the
-     directive may optionally contain a caption for the plot::
-
-       .. plot:: path/to/plot.py
-
-          This is the caption for the plot
-
-     Additionally, one my specify the name of a function to call (with
-     no arguments) immediately after importing the module::
-
-       .. plot:: path/to/plot.py plot_function1
-
-  2. Included as **inline content** to the directive::
-
-       .. plot::
-
-          import matplotlib.pyplot as plt
-          import matplotlib.image as mpimg
-          import numpy as np
-          img = mpimg.imread('_static/stinkbug.png')
-          imgplot = plt.imshow(img)
-
-  3. Using **doctest** syntax::
-
-       .. plot::
-          A plotting example:
-          >>> import matplotlib.pyplot as plt
-          >>> plt.plot([1,2,3], [4,5,6])
-
-Options
--------
-
-The ``plot`` directive supports the following options:
-
-    format : {'python', 'doctest'}
-        Specify the format of the input
-
-    include-source : bool
-        Whether to display the source code. The default can be changed
-        using the `plot_include_source` variable in conf.py
-
-    encoding : str
-        If this source file is in a non-UTF8 or non-ASCII encoding,
-        the encoding must be specified using the `:encoding:` option.
-        The encoding will not be inferred using the ``-*- coding -*-``
-        metacomment.
-
-    context : bool
-        If provided, the code will be run in the context of all
-        previous plot directives for which the `:context:` option was
-        specified.  This only applies to inline code plot directives,
-        not those run from files.
-
-    nofigs : bool
-        If specified, the code block will be run, but no figures will
-        be inserted.  This is usually useful with the ``:context:``
-        option.
-
-Additionally, this directive supports all of the options of the
-`image` directive, except for `target` (since plot will add its own
-target).  These include `alt`, `height`, `width`, `scale`, `align` and
-`class`.
-
-Configuration options
----------------------
-
-The plot directive has the following configuration options:
-
-    plot_include_source
-        Default value for the include-source option
-
-    plot_pre_code
-        Code that should be executed before each plot.
-
-    plot_basedir
-        Base directory, to which ``plot::`` file names are relative
-        to.  (If None or empty, file names are relative to the
-        directoly where the file containing the directive is.)
-
-    plot_formats
-        File formats to generate. List of tuples or strings::
-
-            [(suffix, dpi), suffix, ...]
-
-        that determine the file format and the DPI. For entries whose
-        DPI was omitted, sensible defaults are chosen.
-
-    plot_html_show_formats
-        Whether to show links to the files in HTML.
-
-    plot_rcparams
-        A dictionary containing any non-standard rcParams that should
-        be applied before each plot.
-
-    plot_apply_rcparams
-        By default, rcParams are applied when `context` option is not used in 
-        a plot  directive.  This configuration option overrides this behaviour 
-        and applies rcParams before each plot.
-
-    plot_working_directory
-        By default, the working directory will be changed to the directory of 
-        the example, so the code can get at its data files, if any.  Also its 
-        path will be added to `sys.path` so it can import any helper modules 
-        sitting beside it.  This configuration option can be used to specify 
-        a central directory (also added to `sys.path`) where data files and 
-        helper modules for all code are located. 
-
-    plot_template
-        Provide a customized template for preparing resturctured text.
-        
-
-"""
-from __future__ import print_function
-
-import sys, os, glob, shutil, imp, warnings, cStringIO, re, textwrap, \
-       traceback, exceptions
-
-from docutils.parsers.rst import directives
-from docutils import nodes
-from docutils.parsers.rst.directives.images import Image
-align = Image.align
-import sphinx
-
-sphinx_version = sphinx.__version__.split(".")
-# The split is necessary for sphinx beta versions where the string is
-# '6b1'
-sphinx_version = tuple([int(re.split('[a-z]', x)[0])
-                        for x in sphinx_version[:2]])
-
-try:
-    # Sphinx depends on either Jinja or Jinja2
-    import jinja2
-    def format_template(template, **kw):
-        return jinja2.Template(template).render(**kw)
-except ImportError:
-    import jinja
-    def format_template(template, **kw):
-        return jinja.from_string(template, **kw)
-
-import matplotlib
-import matplotlib.cbook as cbook
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-from matplotlib import _pylab_helpers
-
-__version__ = 2
-
-#------------------------------------------------------------------------------
-# Relative pathnames
-#------------------------------------------------------------------------------
-
-# os.path.relpath is new in Python 2.6
-try:
-    from os.path import relpath
-except ImportError:
-    # Copied from Python 2.7
-    if 'posix' in sys.builtin_module_names:
-        def relpath(path, start=os.path.curdir):
-            """Return a relative version of a path"""
-            from os.path import sep, curdir, join, abspath, commonprefix, \
-                 pardir
-
-            if not path:
-                raise ValueError("no path specified")
-
-            start_list = abspath(start).split(sep)
-            path_list = abspath(path).split(sep)
-
-            # Work out how much of the filepath is shared by start and path.
-            i = len(commonprefix([start_list, path_list]))
-
-            rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
-            if not rel_list:
-                return curdir
-            return join(*rel_list)
-    elif 'nt' in sys.builtin_module_names:
-        def relpath(path, start=os.path.curdir):
-            """Return a relative version of a path"""
-            from os.path import sep, curdir, join, abspath, commonprefix, \
-                 pardir, splitunc
-
-            if not path:
-                raise ValueError("no path specified")
-            start_list = abspath(start).split(sep)
-            path_list = abspath(path).split(sep)
-            if start_list[0].lower() != path_list[0].lower():
-                unc_path, rest = splitunc(path)
-                unc_start, rest = splitunc(start)
-                if bool(unc_path) ^ bool(unc_start):
-                    raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
-                                                                        % (path, start))
-                else:
-                    raise ValueError("path is on drive %s, start on drive %s"
-                                                        % (path_list[0], start_list[0]))
-            # Work out how much of the filepath is shared by start and path.
-            for i in range(min(len(start_list), len(path_list))):
-                if start_list[i].lower() != path_list[i].lower():
-                    break
-            else:
-                i += 1
-
-            rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
-            if not rel_list:
-                return curdir
-            return join(*rel_list)
-    else:
-        raise RuntimeError("Unsupported platform (no relpath available!)")
-
-#------------------------------------------------------------------------------
-# Registration hook
-#------------------------------------------------------------------------------
-
-def plot_directive(name, arguments, options, content, lineno,
-                   content_offset, block_text, state, state_machine):
-    return run(arguments, content, options, state_machine, state, lineno)
-plot_directive.__doc__ = __doc__
-
-def _option_boolean(arg):
-    if not arg or not arg.strip():
-        # no argument given, assume used as a flag
-        return True
-    elif arg.strip().lower() in ('no', '0', 'false'):
-        return False
-    elif arg.strip().lower() in ('yes', '1', 'true'):
-        return True
-    else:
-        raise ValueError('"%s" unknown boolean' % arg)
-
-def _option_format(arg):
-    return directives.choice(arg, ('python', 'doctest'))
-
-def _option_align(arg):
-    return directives.choice(arg, ("top", "middle", "bottom", "left", "center",
-                                   "right"))
-
-def mark_plot_labels(app, document):
-    """
-    To make plots referenceable, we need to move the reference from
-    the "htmlonly" (or "latexonly") node to the actual figure node
-    itself.
-    """
-    for name, explicit in document.nametypes.iteritems():
-        if not explicit:
-            continue
-        labelid = document.nameids[name]
-        if labelid is None:
-            continue
-        node = document.ids[labelid]
-        if node.tagname in ('html_only', 'latex_only'):
-            for n in node:
-                if n.tagname == 'figure':
-                    sectname = name
-                    for c in n:
-                        if c.tagname == 'caption':
-                            sectname = c.astext()
-                            break
-
-                    node['ids'].remove(labelid)
-                    node['names'].remove(name)
-                    n['ids'].append(labelid)
-                    n['names'].append(name)
-                    document.settings.env.labels[name] = \
-                        document.settings.env.docname, labelid, sectname
-                    break
-
-def setup(app):
-    setup.app = app
-    setup.config = app.config
-    setup.confdir = app.confdir
-
-    options = {'alt': directives.unchanged,
-               'height': directives.length_or_unitless,
-               'width': directives.length_or_percentage_or_unitless,
-               'scale': directives.nonnegative_int,
-               'align': _option_align,
-               'class': directives.class_option,
-               'include-source': _option_boolean,
-               'format': _option_format,
-               'context': directives.flag,
-               'nofigs': directives.flag,
-               'encoding': directives.encoding
-               }
-
-    app.add_directive('plot', plot_directive, True, (0, 2, False), **options)
-    app.add_config_value('plot_pre_code', None, True)
-    app.add_config_value('plot_include_source', False, True)
-    app.add_config_value('plot_formats', ['png', 'hires.png', 'pdf'], True)
-    app.add_config_value('plot_basedir', None, True)
-    app.add_config_value('plot_html_show_formats', True, True)
-    app.add_config_value('plot_rcparams', {}, True)
-    app.add_config_value('plot_apply_rcparams', False, True)
-    app.add_config_value('plot_working_directory', None, True)
-    app.add_config_value('plot_template', None, True)
-
-    app.connect('doctree-read', mark_plot_labels)
-
-#------------------------------------------------------------------------------
-# Doctest handling
-#------------------------------------------------------------------------------
-
-def contains_doctest(text):
-    try:
-        # check if it's valid Python as-is
-        compile(text, '<string>', 'exec')
-        return False
-    except SyntaxError:
-        pass
-    r = re.compile(r'^\s*>>>', re.M)
-    m = r.search(text)
-    return bool(m)
-
-def unescape_doctest(text):
-    """
-    Extract code from a piece of text, which contains either Python code
-    or doctests.
-
-    """
-    if not contains_doctest(text):
-        return text
-
-    code = ""
-    for line in text.split("\n"):
-        m = re.match(r'^\s*(>>>|\.\.\.) (.*)$', line)
-        if m:
-            code += m.group(2) + "\n"
-        elif line.strip():
-            code += "# " + line.strip() + "\n"
-        else:
-            code += "\n"
-    return code
-
-def split_code_at_show(text):
-    """
-    Split code at plt.show()
-
-    """
-
-    parts = []
-    is_doctest = contains_doctest(text)
-
-    part = []
-    for line in text.split("\n"):
-        if (not is_doctest and line.strip() == 'plt.show()') or \
-               (is_doctest and line.strip() == '>>> plt.show()'):
-            part.append(line)
-            parts.append("\n".join(part))
-            part = []
-        else:
-            part.append(line)
-    if "\n".join(part).strip():
-        parts.append("\n".join(part))
-    return parts
-
-#------------------------------------------------------------------------------
-# Template
-#------------------------------------------------------------------------------
-
-
-TEMPLATE = """
-{{ source_code }}
-
-{{ only_html }}
-
-   {% if source_link or (html_show_formats and not multi_image) %}
-   (
-   {%- if source_link -%}
-   `Source code <{{ source_link }}>`__
-   {%- endif -%}
-   {%- if html_show_formats and not multi_image -%}
-     {%- for img in images -%}
-       {%- for fmt in img.formats -%}
-         {%- if source_link or not loop.first -%}, {% endif -%}
-         `{{ fmt }} <{{ dest_dir }}/{{ img.basename }}.{{ fmt }}>`__
-       {%- endfor -%}
-     {%- endfor -%}
-   {%- endif -%}
-   )
-   {% endif %}
-
-   {% for img in images %}
-   .. figure:: {{ build_dir }}/{{ img.basename }}.png
-      {%- for option in options %}
-      {{ option }}
-      {% endfor %}
-
-      {% if html_show_formats and multi_image -%}
-        (
-        {%- for fmt in img.formats -%}
-        {%- if not loop.first -%}, {% endif -%}
-        `{{ fmt }} <{{ dest_dir }}/{{ img.basename }}.{{ fmt }}>`__
-        {%- endfor -%}
-        )
-      {%- endif -%}
-
-      {{ caption }}
-   {% endfor %}
-
-{{ only_latex }}
-
-   {% for img in images %}
-   .. image:: {{ build_dir }}/{{ img.basename }}.pdf
-   {% endfor %}
-
-{{ only_texinfo }}
-
-   {% for img in images %}
-   .. image:: {{ build_dir }}/{{ img.basename }}.png
-      {%- for option in options %}
-      {{ option }}
-      {% endfor %}
-
-   {% endfor %}
-
-"""
-
-exception_template = """
-.. htmlonly::
-
-   [`source code <%(linkdir)s/%(basename)s.py>`__]
-
-Exception occurred rendering plot.
-
-"""
-
-# the context of the plot for all directives specified with the
-# :context: option
-plot_context = dict()
-
-class ImageFile(object):
-    def __init__(self, basename, dirname):
-        self.basename = basename
-        self.dirname = dirname
-        self.formats = []
-
-    def filename(self, format):
-        return os.path.join(self.dirname, "%s.%s" % (self.basename, format))
-
-    def filenames(self):
-        return [self.filename(fmt) for fmt in self.formats]
-
-def out_of_date(original, derived):
-    """
-    Returns True if derivative is out-of-date wrt original,
-    both of which are full file paths.
-    """
-    return (not os.path.exists(derived) or
-            (os.path.exists(original) and
-             os.stat(derived).st_mtime < os.stat(original).st_mtime))
-
-class PlotError(RuntimeError):
-    pass
-
-def run_code(code, code_path, ns=None, function_name=None):
-    """
-    Import a Python module from a path, and run the function given by
-    name, if function_name is not None.
-    """
-
-    # Change the working directory to the directory of the example, so
-    # it can get at its data files, if any.  Add its path to sys.path
-    # so it can import any helper modules sitting beside it.
-
-    pwd = os.getcwd()
-    old_sys_path = list(sys.path)
-    if setup.config.plot_working_directory is not None:
-        try:
-            os.chdir(setup.config.plot_working_directory)
-        except OSError as err:
-            raise OSError(str(err) + '\n`plot_working_directory` option in'
-                          'Sphinx configuration file must be a valid '
-                          'directory path')
-        except TypeError as err:
-            raise TypeError(str(err) + '\n`plot_working_directory` option in '
-                            'Sphinx configuration file must be a string or '
-                            'None')
-        sys.path.insert(0, setup.config.plot_working_directory)
-    elif code_path is not None:
-        dirname = os.path.abspath(os.path.dirname(code_path))
-        os.chdir(dirname)
-        sys.path.insert(0, dirname)
-
-    # Redirect stdout
-    stdout = sys.stdout
-    sys.stdout = cStringIO.StringIO()
-
-    # Reset sys.argv
-    old_sys_argv = sys.argv
-    sys.argv = [code_path]
-
-    try:
-        try:
-            code = unescape_doctest(code)
-            if ns is None:
-                ns = {}
-            if not ns:
-                if setup.config.plot_pre_code is None:
-                    exec "import numpy as np\nfrom matplotlib import pyplot as plt\n" in ns
-                else:
-                    exec setup.config.plot_pre_code in ns
-            if "__main__" in code:
-                exec "__name__ = '__main__'" in ns
-            exec code in ns
-            if function_name is not None:
-                exec function_name + "()" in ns
-        except (Exception, SystemExit), err:
-            raise PlotError(traceback.format_exc())
-    finally:
-        os.chdir(pwd)
-        sys.argv = old_sys_argv
-        sys.path[:] = old_sys_path
-        sys.stdout = stdout
-    return ns
-
-def clear_state(plot_rcparams):
-    plt.close('all')
-    matplotlib.rc_file_defaults()
-    matplotlib.rcParams.update(plot_rcparams)
-
-def render_figures(code, code_path, output_dir, output_base, context,
-                   function_name, config):
-    """
-    Run a pyplot script and save the low and high res PNGs and a PDF
-    in outdir.
-
-    Save the images under *output_dir* with file names derived from
-    *output_base*
-    """
-    # -- Parse format list
-    default_dpi = {'png': 80, 'hires.png': 200, 'pdf': 200}
-    formats = []
-    plot_formats = config.plot_formats
-    if isinstance(plot_formats, (str, unicode)):
-        plot_formats = eval(plot_formats)
-    for fmt in plot_formats:
-        if isinstance(fmt, str):
-            formats.append((fmt, default_dpi.get(fmt, 80)))
-        elif type(fmt) in (tuple, list) and len(fmt)==2:
-            formats.append((str(fmt[0]), int(fmt[1])))
-        else:
-            raise PlotError('invalid image format "%r" in plot_formats' % fmt)
-
-    # -- Try to determine if all images already exist
-
-    code_pieces = split_code_at_show(code)
-
-    # Look for single-figure output files first
-    # Look for single-figure output files first
-    all_exists = True
-    img = ImageFile(output_base, output_dir)
-    for format, dpi in formats:
-        if out_of_date(code_path, img.filename(format)):
-            all_exists = False
-            break
-        img.formats.append(format)
-
-    if all_exists:
-        return [(code, [img])]
-
-    # Then look for multi-figure output files
-    results = []
-    all_exists = True
-    for i, code_piece in enumerate(code_pieces):
-        images = []
-        for j in xrange(1000):
-            if len(code_pieces) > 1:
-                img = ImageFile('%s_%02d_%02d' % (output_base, i, j), output_dir)
-            else:
-                img = ImageFile('%s_%02d' % (output_base, j), output_dir)
-            for format, dpi in formats:
-                if out_of_date(code_path, img.filename(format)):
-                    all_exists = False
-                    break
-                img.formats.append(format)
-
-            # assume that if we have one, we have them all
-            if not all_exists:
-                all_exists = (j > 0)
-                break
-            images.append(img)
-        if not all_exists:
-            break
-        results.append((code_piece, images))
-
-    if all_exists:
-        return results
-
-    # We didn't find the files, so build them
-
-    results = []
-    if context:
-        ns = plot_context
-    else:
-        ns = {}
-
-    for i, code_piece in enumerate(code_pieces):
-        if not context or config.plot_apply_rcparams:
-            clear_state(config.plot_rcparams)
-        run_code(code_piece, code_path, ns, function_name)
-
-        images = []
-        fig_managers = _pylab_helpers.Gcf.get_all_fig_managers()
-        for j, figman in enumerate(fig_managers):
-            if len(fig_managers) == 1 and len(code_pieces) == 1:
-                img = ImageFile(output_base, output_dir)
-            elif len(code_pieces) == 1:
-                img = ImageFile("%s_%02d" % (output_base, j), output_dir)
-            else:
-                img = ImageFile("%s_%02d_%02d" % (output_base, i, j),
-                                output_dir)
-            images.append(img)
-            for format, dpi in formats:
-                try:
-                    figman.canvas.figure.savefig(img.filename(format), dpi=dpi)
-                except Exception,err:
-                    raise PlotError(traceback.format_exc())
-                img.formats.append(format)
-
-        results.append((code_piece, images))
-
-    if not context or config.plot_apply_rcparams:
-        clear_state(config.plot_rcparams)
-
-    return results
-
-def run(arguments, content, options, state_machine, state, lineno):
-    # The user may provide a filename *or* Python code content, but not both
-    if arguments and content:
-        raise RuntimeError("plot:: directive can't have both args and content")
-
-    document = state_machine.document
-    config = document.settings.env.config
-    nofigs = options.has_key('nofigs')
-
-    options.setdefault('include-source', config.plot_include_source)
-    context = options.has_key('context')
-
-    rst_file = document.attributes['source']
-    rst_dir = os.path.dirname(rst_file)
-
-    if len(arguments):
-        if not config.plot_basedir:
-            source_file_name = os.path.join(setup.app.builder.srcdir,
-                                            directives.uri(arguments[0]))
-        else:
-            source_file_name = os.path.join(setup.confdir, config.plot_basedir,
-                                            directives.uri(arguments[0]))
-
-        # If there is content, it will be passed as a caption.
-        caption = '\n'.join(content)
-
-        # If the optional function name is provided, use it
-        if len(arguments) == 2:
-            function_name = arguments[1]
-        else:
-            function_name = None
-
-        with open(source_file_name, 'r') as fd:
-            code = fd.read()
-        output_base = os.path.basename(source_file_name)
-    else:
-        source_file_name = rst_file
-        code = textwrap.dedent("\n".join(map(str, content)))
-        counter = document.attributes.get('_plot_counter', 0) + 1
-        document.attributes['_plot_counter'] = counter
-        base, ext = os.path.splitext(os.path.basename(source_file_name))
-        output_base = '%s-%d.py' % (base, counter)
-        function_name = None
-        caption = ''
-
-    base, source_ext = os.path.splitext(output_base)
-    if source_ext in ('.py', '.rst', '.txt'):
-        output_base = base
-    else:
-        source_ext = ''
-
-    # ensure that LaTeX includegraphics doesn't choke in foo.bar.pdf filenames
-    output_base = output_base.replace('.', '-')
-
-    # is it in doctest format?
-    is_doctest = contains_doctest(code)
-    if options.has_key('format'):
-        if options['format'] == 'python':
-            is_doctest = False
-        else:
-            is_doctest = True
-
-    # determine output directory name fragment
-    source_rel_name = relpath(source_file_name, setup.confdir)
-    source_rel_dir = os.path.dirname(source_rel_name)
-    while source_rel_dir.startswith(os.path.sep):
-        source_rel_dir = source_rel_dir[1:]
-
-    # build_dir: where to place output files (temporarily)
-    build_dir = os.path.join(os.path.dirname(setup.app.doctreedir),
-                             'plot_directive',
-                             source_rel_dir)
-    # get rid of .. in paths, also changes pathsep
-    # see note in Python docs for warning about symbolic links on Windows.
-    # need to compare source and dest paths at end
-    build_dir = os.path.normpath(build_dir)
-
-    if not os.path.exists(build_dir):
-        os.makedirs(build_dir)
-
-    # output_dir: final location in the builder's directory
-    dest_dir = os.path.abspath(os.path.join(setup.app.builder.outdir,
-                                            source_rel_dir))
-    if not os.path.exists(dest_dir):
-        os.makedirs(dest_dir) # no problem here for me, but just use built-ins
-
-    # how to link to files from the RST file
-    dest_dir_link = os.path.join(relpath(setup.confdir, rst_dir),
-                                 source_rel_dir).replace(os.path.sep, '/')
-    build_dir_link = relpath(build_dir, rst_dir).replace(os.path.sep, '/')
-    source_link = dest_dir_link + '/' + output_base + source_ext
-
-    # make figures
-    try:
-        results = render_figures(code, source_file_name, build_dir, output_base,
-                                 context, function_name, config)
-        errors = []
-    except PlotError, err:
-        reporter = state.memo.reporter
-        sm = reporter.system_message(
-            2, "Exception occurred in plotting %s\n from %s:\n%s" % (output_base,
-                                                source_file_name, err),
-            line=lineno)
-        results = [(code, [])]
-        errors = [sm]
-
-    # Properly indent the caption
-    caption = '\n'.join('      ' + line.strip()
-                        for line in caption.split('\n'))
-
-    # generate output restructuredtext
-    total_lines = []
-    for j, (code_piece, images) in enumerate(results):
-        if options['include-source']:
-            if is_doctest:
-                lines = ['']
-                lines += [row.rstrip() for row in code_piece.split('\n')]
-            else:
-                lines = ['.. code-block:: python', '']
-                lines += ['    %s' % row.rstrip()
-                          for row in code_piece.split('\n')]
-            source_code = "\n".join(lines)
-        else:
-            source_code = ""
-
-        if nofigs:
-            images = []
-
-        opts = [':%s: %s' % (key, val) for key, val in options.items()
-                if key in ('alt', 'height', 'width', 'scale', 'align', 'class')]
-
-        only_html = ".. only:: html"
-        only_latex = ".. only:: latex"
-        only_texinfo = ".. only:: texinfo"
-
-        if j == 0:
-            src_link = source_link
-        else:
-            src_link = None
-
-        result = format_template(
-            config.plot_template or TEMPLATE,
-            dest_dir=dest_dir_link,
-            build_dir=build_dir_link,
-            source_link=src_link,
-            multi_image=len(images) > 1,
-            only_html=only_html,
-            only_latex=only_latex,
-            only_texinfo=only_texinfo,
-            options=opts,
-            images=images,
-            source_code=source_code,
-            html_show_formats=config.plot_html_show_formats,
-            caption=caption)
-
-        total_lines.extend(result.split("\n"))
-        total_lines.extend("\n")
-
-    if total_lines:
-        state_machine.insert_input(total_lines, source=source_file_name)
-
-    # copy image files to builder's output directory, if necessary
-    if not os.path.exists(dest_dir):
-        cbook.mkdirs(dest_dir)
-
-    for code_piece, images in results:
-        for img in images:
-            for fn in img.filenames():
-                destimg = os.path.join(dest_dir, os.path.basename(fn))
-                if fn != destimg:
-                    shutil.copyfile(fn, destimg)
-
-    # copy script (if necessary)
-    target_name = os.path.join(dest_dir, output_base + source_ext)
-    with open(target_name, 'w') as f:
-        if source_file_name == rst_file:
-            code_escaped = unescape_doctest(code)
-        else:
-            code_escaped = code
-        f.write(code_escaped)
-
-    return errors
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 7d1a43df..92b25bc0 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -1,4 +1,3 @@
-
 *************************************
 Gaussian process regression tutorial
 *************************************
@@ -12,7 +11,7 @@ We first import the libraries we will need: ::
     import numpy as np
     import GPy
 
-1 dimensional model
+1-dimensional model
 ===================
 
 For this toy example, we assume we have the following inputs and outputs::
@@ -22,13 +21,11 @@ For this toy example, we assume we have the following inputs and outputs::
 
 Note that the observations Y include some noise.
 
-The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
+The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
 
-    Gaussian = GPy.kern.rbf(D=1)
-    noise = GPy.kern.white(D=1)
-    kernel = Gaussian + noise
+    kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
 
-The parameter ``D`` stands for the dimension of the input space. Note that many other kernels are implemented such as:
+The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as:
 
 * linear (``GPy.kern.linear``)
 * exponential kernel (``GPy.kern.exponential``)
@@ -41,19 +38,19 @@ The inputs required for building the model are the observations and the kernel::
 
     m = GPy.models.GP_regression(X,Y,kernel)
 
-The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
+By default, some observation noise is added to the modle. The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
 
     print m
     m.plot()
 
 gives the following output: ::
 
-    Marginal log-likelihood: -2.281e+01
+    Marginal log-likelihood: -4.479e+00
            Name        |  Value   |  Constraints  |  Ties  |  Prior  
     -----------------------------------------------------------------
        rbf_variance    |  1.0000  |               |        |         
       rbf_lengthscale  |  1.0000  |               |        |         
-      white_variance   |  1.0000  |               |        |         
+      noise variance   |  1.0000  |               |        |         
 
 .. figure::  Figures/tuto_GP_regression_m1.png
     :align:   center
@@ -75,24 +72,24 @@ but it is also possible to set a range on to constrain one parameter to be fixed
     m.unconstrain('')                            # Required to remove the previous constrains
     m.constrain_positive('rbf_variance')
     m.constrain_bounded('lengthscale',1.,10. )
-    m.constrain_fixed('white',0.0025)
+    m.constrain_fixed('noise',0.0025)
 
 Once the constrains have been imposed, the model can be optimized::
 
     m.optimize()
 
-If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
+If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
 
     m.optimize_restarts(Nrestarts = 10)
 
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
 
-    Marginal log-likelihood: 2.001e+01
+    Marginal log-likelihood: 3.603e+01
            Name        |  Value   |  Constraints  |  Ties  |  Prior  
     -----------------------------------------------------------------
-       rbf_variance    |  0.8033  |     (+ve)     |        |         
-      rbf_lengthscale  |  1.8033  |  (1.0, 10.0)  |        |         
-      white_variance   |  0.0025  |     Fixed     |        |               
+       rbf_variance    |  0.8151  |     (+ve)     |        |         
+      rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
+      noise variance   |  0.0025  |     Fixed     |        |         
 
 .. figure::  Figures/tuto_GP_regression_m2.png
     :align:   center
@@ -101,7 +98,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo
     GP regression model after optimization of the parameters.
 
 
-2 dimensional example
+2-dimensional example
 =====================
 
 Here is a 2 dimensional example::
@@ -131,15 +128,16 @@ Here is a 2 dimensional example::
     m.plot()
     print(m)
 
-The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last 2 lines is::
+The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
 
-    Marginal log-likelihood: 2.893e+01
-               Name            |  Value   |  Constraints  |  Ties  |  Prior  
-    -------------------------------------------------------------------------
-        Mat52_ARD_variance     |  0.4094  |     (+ve)     |        |         
-      Mat52_ARD_lengthscale_0  |  2.1060  |     (+ve)     |        |         
-      Mat52_ARD_lengthscale_1  |  2.0546  |     (+ve)     |        |         
-          white_variance       |  0.0012  |     (+ve)     |        |         
+    Marginal log-likelihood: 6.682e+01
+             Name          |  Value   |  Constraints  |  Ties  |  Prior  
+    ---------------------------------------------------------------------
+        Mat52_variance     |  0.3860  |     (+ve)     |        |         
+      Mat52_lengthscale_0  |  2.0578  |     (+ve)     |        |         
+      Mat52_lengthscale_1  |  1.8542  |     (+ve)     |        |         
+        white_variance     |  0.0023  |     (+ve)     |        |         
+        noise variance     |  0.0000  |     (+ve)     |        |         
 
 .. figure::  Figures/tuto_GP_regression_m3.png
     :align:   center
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
new file mode 100644
index 00000000..80e2bee2
--- /dev/null
+++ b/doc/tuto_kernel_overview.rst
@@ -0,0 +1,177 @@
+
+****************************
+tutorial : A kernel overview
+****************************
+
+First we import the libraries we will need ::
+
+    import pylab as pb
+    import numpy as np
+    import GPy
+    pb.ion()
+
+For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) ::
+
+    ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+    ker2 = GPy.kern.rbf(D=1, variance = 1.5, lengthscale=2.)
+    ker3 = GPy.kern.rbf(1, .5, .5)
+
+A `plot` and a `print` functions are implemented to represent kernel objects ::
+    
+    print ker1
+
+    ker1.plot()
+    ker2.plot()
+    ker3.plot()
+
+.. figure::  Figures/tuto_kern_overview_basicdef.png
+    :align:   center
+    :height: 350px
+
+Implemented kernels
+===================
+
+Many kernels are already implemented in GPy. Here is a summary of most of them:
+
+.. figure::  Figures/tuto_kern_overview_allkern.png
+    :align:  center
+    :height: 800px
+
+On the other hand, it is possible to use the `sympy` package to build new kernels. This will be the subject of another tutorial.
+
+Operations to combine kernel
+============================
+
+In ``GPy``, kernel objects can be combined with the usual ``+`` and ``*`` operators. ::
+    
+    k1 = GPy.kern.rbf(1,variance=1., lengthscale=2)
+    k2 = GPy.kern.Matern32(1,variance=1., lengthscale=2)
+
+    ker_add = k1 + k2
+    print ker_add
+
+    ker_prod = k1 * k2
+    print ker_prod
+
+Note that by default, the operator ``+`` adds kernels defined on the same input space whereas ``*`` assumes that the kernels are defined on different input spaces. Here for example ``ker_add.D`` will return ``1`` whereas ``ker_prod.D`` will return ``2``.
+
+In order to add kernels defined on the different input spaces, the required command is::
+
+    ker_add_orth = k1.add_orthogonal(k2)
+
+.. figure::  Figures/tuto_kern_overview_add_orth.png
+    :align:  center
+    :height: 350px
+
+    Output of ``ker_add_orth.plot(plot_limits=[[-10,-10],[10,10]])``.
+
+Example : Building an ANOVA kernel
+==================================
+
+In two dimensions ANOVA kernels have the following form: 
+
+.. math::
+
+    k_{ANOVA}(x,y) = \prod_{i=1}^2 (1 + k_i(x_i,y_i)) = 1 + k_1(x_1,y_1) + k_2(x_2,y_2) + k_1(x_1,y_1) \times k_2(x_2,y_2).
+
+Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel for :math:`k_i`. As seen previously, we can define this kernel as follows ::
+
+    k_cst = GPy.kern.bias(1,variance=1.)
+    k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
+    Kanova = (k_cst + k_mat) * (k_cst + k_mat)
+    print Kanova
+
+Printing the resulting kernel outputs the following ::
+
+                     Name                  |  Value   |  Constraints  |  Ties  
+    ---------------------------------------------------------------------------
+           bias<times>bias_variance        |  1.0000  |               |        
+           bias<times>Mat52_variance       |  1.0000  |               |        
+      bias<times>Mat52_Mat52_lengthscale   |  3.0000  |               |  (1)   
+           Mat52<times>bias_variance       |  1.0000  |               |        
+      Mat52<times>bias_Mat52_lengthscale   |  3.0000  |               |  (0)   
+          Mat52<times>Mat52_variance       |  1.0000  |               |        
+      Mat52<times>Mat52_Mat52_lengthscale  |  3.0000  |               |  (0)   
+      Mat52<times>Mat52_Mat52_lengthscale  |  3.0000  |               |  (1)
+
+Note the ties between the lengthscales of ``Kanova`` to keep the number of lengthscales equal to 2. On the other hand, there are four variance terms in the new parameterization: one for each term of the right hand part of the above equation. We can illustrate the use of this kernel on a toy example::
+
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(40,2))
+    Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
+
+    # Create GP regression model
+    m = GPy.models.GP_regression(X,Y,Kanova)
+    m.plot()
+
+
+.. figure::  Figures/tuto_kern_overview_mANOVA.png
+    :align:  center
+    :height: 350px
+
+As :math:`k_{ANOVA}` corresponds to the sum of 4 kernels, the best predictor can be splited in a sum of 4 functions 
+
+.. math::
+
+    bp(x) & = k(x)^t K^{-1} Y \\
+          & = (1 + k_1(x_1) +  k_2(x_2) +  k_1(x_1)k_2(x_2))^t K^{-1} Y \\
+          & = 1^t K^{-1} Y + k_1(x_1)^t K^{-1} Y + k_2(x_2)^t K^{-1} Y + (k_1(x_1)k_2(x_2))^t K^{-1} Y
+
+The submodels can be represented with the option ``which_function`` of ``plot``: ::
+    
+    pb.figure(figsize=(20,5))
+    pb.subplots_adjust(wspace=0.5)
+    pb.subplot(1,5,1)
+    m.plot()
+    pb.subplot(1,5,2)
+    pb.ylabel("=   ",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,3)
+    m.plot(which_functions=[False,True,False,False])
+    pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,4)
+    m.plot(which_functions=[False,False,True,False])
+    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+    pb.subplot(1,5,5)
+    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
+    m.plot(which_functions=[False,False,False,True])
+
+
+.. figure::  Figures/tuto_kern_overview_mANOVAdec.png
+    :align:  center
+    :height: 200px
+
+
+..  import pylab as pb
+    import numpy as np
+    import GPy
+    pb.ion()
+
+    ker1 = GPy.kern.rbf(D=1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+    ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
+    ker3 = GPy.kern.rbf(1, .5, .25)
+
+    ker1.plot()
+    ker2.plot()
+    ker3.plot()
+    #pb.savefig("Figures/tuto_kern_overview_basicdef.png")
+
+    kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1),  GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
+    kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
+    
+    pb.figure(figsize=(16,12))
+    pb.subplots_adjust(wspace=.5, hspace=.5)
+    for i, kern in enumerate(kernels):
+       pb.subplot(3,4,i+1)
+       kern.plot(x=7.5,plot_limits=[0.00001,15.])
+       pb.title(kernel_names[i]+ '\n')
+       #pb.axes([.1,.1,.8,.7])
+       #pb.figtext(.5,.9,'Foo Bar', fontsize=18, ha='center')
+       #pb.figtext(.5,.85,'Lorem ipsum dolor sit amet, consectetur adipiscing elit',fontsize=10,ha='center')
+
+    # actual plot for the noise
+    i = 11
+    X = np.linspace(0.,15.,201)
+    WN = 0*X
+    WN[100] = 1.
+    pb.subplot(3,4,i+1)
+    pb.plot(X,WN,'b')
diff --git a/grid_parameters.py b/grid_parameters.py
deleted file mode 100644
index 64d82755..00000000
--- a/grid_parameters.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as np
-import pylab as pb
-pb.ion()
-import sys
-import GPy
-
-pb.close('all')
-
-N = 200
-M = 15
-resolution=5
-
-X = np.linspace(0,12,N)[:,None]
-Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
-Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
-#k = GPy.kern.rbf(1)
-k = GPy.kern.Matern32(1) + GPy.kern.white(1)
-
-models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
-    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
-    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
-    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
-models[0].scale_factor = 1.
-models[1].scale_factor = 10.
-models[2].scale_factor = 100.
-models[3].scale_factor = 1000.
-    #GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
-    #GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
-    #GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
-
-[m.constrain_fixed('white',0.1) for m in models]
-
-#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
-xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
-
-lls = []
-cgs = []
-grads = []
-count = 0
-for l,v in zip(xx.flatten(),yy.flatten()):
-    count += 1
-    print count, 'of', resolution**2
-    sys.stdout.flush()
-
-    [m.set('lengthscale',l) for m in models]
-    [m.set('_variance',10.**v) for m in models]
-    lls.append([m.log_likelihood() for m in models])
-    grads.append([m.log_likelihood_gradients() for m in models])
-    cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
-
-lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
-cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
-
-for ll,cg in zip(lls,cgs):
-    pb.figure()
-    pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
-    pb.colorbar()
-    try:
-        pb.contour(xx,yy,np.exp(ll),colors='k')
-    except:
-        pass
-    pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
-    pb.colorbar()
-
diff --git a/setup.py b/setup.py
index ebe31175..40c89ccb 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 
 import os
 from numpy.distutils.core import Extension, setup
-from sphinx.setup_command import BuildDoc
+#from sphinx.setup_command import BuildDoc
 
 # Version number
 version = '0.1.3'
@@ -19,16 +19,19 @@ setup(name = 'GPy',
       license = "BSD 3-clause",
       keywords = "machine-learning gaussian-processes kernels",
       url = "http://ml.sheffield.ac.uk/GPy/",
-      packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples'],
+      packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
       package_dir={'GPy': 'GPy'},
       package_data = {'GPy': ['GPy/examples']},
       py_modules = ['GPy.__init__'],
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['mock', 'sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
-      setup_requires=['sphinx'],
-      cmdclass = {'build_sphinx': BuildDoc},
+      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      extras_require = {
+        'docs':['Sphinx', 'ipython'],
+      },
+      #setup_requires=['sphinx'],
+      #cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[
       "Development Status :: 1 - Alpha",
       "Topic :: Machine Learning",