merged

2026-05-14 22:42:37 +02:00 · 2013-02-12 18:04:07 +00:00 · 2013-02-12 18:04:07 +00:00 · 8761f72c59
commit 8761f72c59
parent 976b27b654 ae19ab2210
66 changed files with 1888 additions and 6647 deletions
--- a/GPy/init.py
+++ b/GPy/init.py
@ -7,5 +7,5 @@ import models
 import inference
 import util
 import examples
 #import examples TODO: discuss!
 from core import priors
 import likelihoods
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
 class model(parameterised):
    def __init__(self):
@ -82,7 +83,7 @@ class model(parameterised):
    def get(self,name, return_names=False):
        """
-        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
+        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
        """
        matches = self.grep_param_names(name)
        if len(matches):
@ -107,7 +108,7 @@ class model(parameterised):
    def get_gradient(self,name, return_names=False):
        """
-        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. 
+        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
        """
        matches = self.grep_param_names(name)
        if len(matches):
@ -303,54 +304,62 @@ class model(parameterised):
        return '\n'.join(s)
-    def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args):
+    def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3):
        """
        Check the gradient of the model by comparing to a numerical estimate.
-        If the overall gradient fails, invividual components are tested.
+        If the verbose flag is passed, invividual components are tested (and printed)
        :param verbose: If True, print a "full" checking of each parameter
        :type verbose: bool
        :param step: The size of the step around which to linearise the objective
        :type step: float (defaul 1e-6)
        :param tolerance: the tolerance allowed (see note)
        :type tolerance: float (default 1e-3)
        Note:-
           The gradient is considered correct if the ratio of the analytical
           and numerical gradients is within <tolerance> of unity.
        """
        x = self._get_params_transformed().copy()
-        #choose a random direction to step in:
+        if not verbose:
-        dx = step*np.sign(np.random.uniform(-1,1,x.size))
+            #just check the global ratio
            dx = step*np.sign(np.random.uniform(-1,1,x.size))
-        #evaulate around the point x
+            #evaulate around the point x
-        self._set_params_transformed(x+dx)
+            self._set_params_transformed(x+dx)
-        f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+            f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-        self._set_params_transformed(x-dx)
+            self._set_params_transformed(x-dx)
-        f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
+            f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
-        self._set_params_transformed(x)
+            self._set_params_transformed(x)
-        gradient = self._log_likelihood_gradients_transformed()
+            gradient = self._log_likelihood_gradients_transformed()
-        numerical_gradient = (f1-f2)/(2*dx)
+            numerical_gradient = (f1-f2)/(2*dx)
-        global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
+            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
        if verbose:
            print "Gradient ratio = ", global_ratio, '\n'
            sys.stdout.flush()
-        if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
+            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
-            if verbose:
+                return True
-                print 'Gradcheck passed'
+            else:
                return False
        else:
-            if verbose:
+            #check the gradient of each parameter individually, and do some pretty printing
-                print "Global check failed. Testing individual gradients\n"
+            try:
                names = self._get_param_names_transformed()
            except NotImplementedError:
                names = ['Variable %i'%i for i in range(len(x))]
-                try:
+            # Prepare for pretty-printing
-                    names = self._get_param_names_transformed()
+            header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
-                except NotImplementedError:
+            max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
-                    names = ['Variable %i'%i for i in range(len(x))]
+            float_len = 10
-
+            cols = [max_names]
-                # Prepare for pretty-printing
+            cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
-                header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
+            cols = np.array(cols) + 5
-                max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
+            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
-                float_len = 10
+            header_string = map(lambda x: '|'.join(x), [header_string])
-                cols = [max_names]
+            separator = '-'*len(header_string[0])
-                cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
+            print '\n'.join([header_string[0], separator])
                cols = np.array(cols) + 5
                header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
                header_string = map(lambda x: '|'.join(x), [header_string])
                separator = '-'*len(header_string[0])
                print '\n'.join([header_string[0], separator])
            for i in range(len(x)):
                xx = x.copy()
@ -368,27 +377,52 @@ class model(parameterised):
                ratio = (f1-f2)/(2*step*gradient)
                difference = np.abs((f1-f2)/2/step - gradient)
-                if verbose:
+                if (np.abs(ratio-1)<tolerance):
-                    if (np.abs(ratio-1)<tolerance):
+                    formatted_name = "\033[92m {0} \033[0m".format(names[i])
-                        formatted_name = "\033[92m {0} \033[0m".format(names[i])
+                else:
-                    else:
+                    formatted_name = "\033[91m {0} \033[0m".format(names[i])
-                        formatted_name = "\033[91m {0} \033[0m".format(names[i])
+                r = '%.6f' % float(ratio)
-                    r = '%.6f' % float(ratio)
+                d = '%.6f' % float(difference)
-                    d = '%.6f' % float(difference)
+                g = '%.6f' % gradient
-                    g = '%.6f' % gradient
+                ng = '%.6f' % float(numerical_gradient)
-                    ng = '%.6f' % float(numerical_gradient)
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
-                    grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                print grad_string
                    print grad_string
-            if verbose:
+    def EPEM(self,epsilon=.1,**kwargs):
-                print ''
+        """
        TODO: Should this not bein the GP class?
        Expectation maximization for Expectation Propagation.
-            if return_ratio:
+        kwargs are passed to the optimize function. They can be:
-                return global_ratio
+
        :epsilon: convergence criterion
        :max_f_eval: maximum number of function evaluations
        :messages: whether to display during optimisation
        :param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
        :type optimzer: string TODO: valid strings?
        """
        assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
        log_change = epsilon + 1.
        self.log_likelihood_record = []
        self.gp_params_record = []
        self.ep_params_record = []
        iteration = 0
        last_value = -np.exp(1000)
        while log_change > epsilon or not iteration:
            print 'EM iteration %s' %iteration
            self.update_likelihood_approximation()
            self.optimize(**kwargs)
            new_value = self.log_likelihood()
            log_change = new_value - last_value
            if log_change > epsilon:
                self.log_likelihood_record.append(new_value)
                self.gp_params_record.append(self._get_params())
                #self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
                last_value = new_value
            else:
-                return False
+                convergence = False
-
+                #self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
-        if return_ratio:
+                self._set_params(self.gp_params_record[-1])
-            return global_ratio
+                print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
-        else:
+            iteration += 1
            return True
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@ -102,6 +102,11 @@ class parameterised(object):
        else:
            return expr
    def Nparam_transformed(self):
            ties = 0
            for ar in self.tied_indices:
                ties += ar.size - 1
            return self.Nparam - len(self.constrained_fixed_indices) - ties
    def constrain_positive(self, which):
        """
@ -149,8 +154,6 @@ class parameterised(object):
    def constrain_negative(self,which):
        """
        Set negative constraints.
--- a/GPy/examples/BGPLVM_demo.py
+++ b/GPy/examples/BGPLVM_demo.py
@ -17,7 +17,7 @@ K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T
 # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-k = GPy.kern.linear(Q, ARD = True)  + GPy.kern.white(Q)
+k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
 m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
 m.constrain_positive('(rbf|bias|noise|white|S)')
 # m.constrain_fixed('S', 1)
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -3,16 +3,15 @@
 """
-Simple Gaussian Processes classification
+Gaussian Processes classification
 """
 import pylab as pb
 import numpy as np
 import GPy
 default_seed=10000
-######################################
+
-## 2 dimensional example
+def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
 def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'])
+
    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1])
    # Likelihood object
    distribution = GPy.likelihoods.likelihood_functions.probit()
    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
    if model_type=='Full':
-        m = GPy.models.GP_EP(data['X'],likelihood)
+        m = GPy.models.GP(data['X'],likelihood,kernel)
    else:
        # create sparse GP EP model
        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
    print(m)
    # optimize
-    m.em()
+    m.optimize()
    print(m)
    # plot
@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    return m
 def oil():
-    """Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
+    """
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+    # Kernel object
    kernel = GPy.kern.rbf(12)
-    # create simple GP model
+    # Likelihood object
-    m = GPy.models.GP_EP(data['X'],likelihood)
+    distribution = GPy.likelihoods.likelihood_functions.probit()
    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
-    # contrain all parameters to be positive
+    # Create GP model
    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
    # Contrain all parameters to be positive
    m.constrain_positive('')
    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
-    # optimize
+    # Optimize
    m.optimize()
    # plot
    #m.plot()
    print(m)
    return m
-def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
+def toy_linear_1d_classification(seed=default_seed):
-    """Simple 1D classification example.
+    """
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+    Simple 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+    Y = data['Y'][:, 0:1]
-    assert model_type in ('Full','DTC','FITC')
+    Y[Y == -1] = 0
-    # create simple GP model
+    # Kernel object
-    if model_type=='Full':
+    kernel = GPy.kern.rbf(1)
        m = GPy.models.simple_GP_EP(data['X'],likelihood)
    else:
        # create sparse GP EP model
        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
-    m.constrain_positive('var')
+    # Likelihood object
-    m.constrain_positive('len')
+    distribution = GPy.likelihoods.likelihood_functions.probit()
-    m.tie_param('lengthscale')
+    likelihood = GPy.likelihoods.EP(Y,distribution)
    m.approximate_likelihood()
-    # Optimize and plot
+    # Model definition
-    m.em(plot_all=False) # EM algorithm
+    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
    # Optimize
    """
    EPEM runs a loop that consists of two steps:
    1) EP likelihood approximation:
        m.update_likelihood_approximation()
    2) Parameters optimization:
        m.optimize()
    """
    m.EPEM()
    # Plot
    pb.subplot(211)
    m.plot_f()
    pb.subplot(212)
    m.plot()
    print(m)
    return m
--- a/GPy/examples/poisson.py
+++ b/GPy/examples/poisson.py
@ -0,0 +1,47 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 """
 Gaussian Processes + Expectation Propagation - Poisson Likelihood
 """
 import pylab as pb
 import numpy as np
 import GPy
 default_seed=10000
 def  toy_1d(seed=default_seed):
    """
    Simple 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
    """
    X = np.arange(0,100,5)[:,None]
    F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None]
    E = np.random.randint(-5,5,20)[:,None]
    Y = F + E
    kernel = GPy.kern.rbf(1)
    distribution = GPy.likelihoods.likelihood_functions.Poisson()
    likelihood = GPy.likelihoods.EP(Y,distribution)
    m = GPy.models.GP(X,likelihood,kernel)
    m.ensure_default_constraints()
    # Approximate likelihood
    m.update_likelihood_approximation()
    # Optimize and plot
    m.optimize()
    #m.EPEM FIXME
    print m
    # Plot
    pb.subplot(211)
    m.plot_f() #GP plot
    pb.subplot(212)
    m.plot() #Output plot
    return m
--- a/GPy/examples/sparse_ep_fix.py
+++ b/GPy/examples/sparse_ep_fix.py
@ -0,0 +1,60 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 """
 Sparse Gaussian Processes regression with an RBF kernel
 """
 import pylab as pb
 import numpy as np
 import GPy
 np.random.seed(2)
 pb.ion()
 N = 500
 M = 5
 pb.close('all')
 ######################################
 ## 1 dimensional example
 # sample inputs and outputs
 X = np.random.uniform(-3.,3.,(N,1))
 #Y = np.sin(X)+np.random.randn(N,1)*0.05
 F = np.sin(X)+np.random.randn(N,1)*0.05
 Y = np.ones([F.shape[0],1])
 Y[F<0] = -1
 likelihood = GPy.inference.likelihoods.probit(Y)
 # construct kernel
 rbf =  GPy.kern.rbf(1)
 noise = GPy.kern.white(1)
 kernel = rbf + noise
 # create simple GP model
 #m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
 # contrain all parameters to be positive
 #m.constrain_fixed('prec',100.)
 m = GPy.models.sparse_GP(X, Y, kernel, M=M)
 m.ensure_default_constraints()
 #if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
 #    m.approximate_likelihood()
 print m.checkgrad()
 m.optimize('tnc', messages = 1)
 m.plot(samples=3)
 print m
 n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
 n.ensure_default_constraints()
 if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
    n.approximate_likelihood()
 print n.checkgrad()
 pb.figure()
 n.plot()
 """
 m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
 m.ensure_default_constraints()
 print m.checkgrad()
 """
--- a/GPy/inference/Expectation_Propagation.py
+++ b/GPy/inference/Expectation_Propagation.py
@ -1,240 +0,0 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import random
 from scipy import stats, linalg
 from .likelihoods import likelihood
 from ..core import model
 from ..util.linalg import pdinv,mdot,jitchol
 from ..util.plot import gpplot
 from .. import kern
 class EP_base:
    """
    Expectation Propagation.
    This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
    """
    def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
        self.likelihood = likelihood
        self.epsilon = epsilon
        self.eta, self.delta = powerep
        self.jitter = 1e-12
        #Initial values - Likelihood approximation parameters:
        #p(y|f) = t(f|tau_tilde,v_tilde)
        self.restart_EP()
    def restart_EP(self):
        """
        Set the EP approximation to initial state
        """
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)
        self.mu = np.zeros(self.N)
 class Full(EP_base):
    """
    :param likelihood: Output's likelihood (e.g. probit)
    :type likelihood: GPy.inference.likelihood instance
    :param K: prior covariance matrix
    :type K: np.ndarray (N x N)
    :param likelihood: Output's likelihood (e.g. probit)
    :type likelihood: GPy.inference.likelihood instance
    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
    """
    def __init__(self,K,likelihood,*args,**kwargs):
        assert K.shape[0] == K.shape[1]
        self.K = K
        self.N = self.K.shape[0]
        EP_base.__init__(self,likelihood,*args,**kwargs)
    def fit_EP(self,messages=False):
        """
        The expectation-propagation algorithm.
        For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
        """
        #Prior distribution parameters: p(f|X) = N(f|0,K)
        #self.K = self.kernel.K(self.X,self.X)
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        self.mu=np.zeros(self.N)
        self.Sigma=self.K.copy()
        """
        Initial values - Cavity distribution parameters:
        q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
        sigma_ = 1./tau_
        mu_ = v_/tau_
        """
        self.tau_ = np.empty(self.N,dtype=np.float64)
        self.v_ = np.empty(self.N,dtype=np.float64)
        #Initial values - Marginal moments
        z = np.empty(self.N,dtype=np.float64)
        self.Z_hat = np.empty(self.N,dtype=np.float64)
        phi = np.empty(self.N,dtype=np.float64)
        mu_hat = np.empty(self.N,dtype=np.float64)
        sigma2_hat = np.empty(self.N,dtype=np.float64)
        #Approximation
        epsilon_np1 = self.epsilon + 1.
        epsilon_np2 = self.epsilon + 1.
       	self.iterations = 0
        self.np1 = [self.tau_tilde.copy()]
        self.np2 = [self.v_tilde.copy()]
        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
                self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
                self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
                si=self.Sigma[:,i].reshape(self.N,1)
                self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
                self.mu = np.dot(self.Sigma,self.v_tilde)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
            L = jitchol(B)
            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
            self.Sigma = self.K - np.dot(V.T,V)
            self.mu = np.dot(self.Sigma,self.v_tilde)
            epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
            epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
            self.np1.append(self.tau_tilde.copy())
            self.np2.append(self.v_tilde.copy())
            if messages:
                print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
 class FITC(EP_base):
    """
    :param likelihood: Output's likelihood (e.g. probit)
    :type likelihood: GPy.inference.likelihood instance
    :param Knn_diag: The diagonal elements of Knn is a 1D vector
    :param Kmn: The 'cross' variance between inducing inputs and data
    :param Kmm: the covariance matrix of the inducing inputs
    :param likelihood: Output's likelihood (e.g. probit)
    :type likelihood: GPy.inference.likelihood instance
    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
    """
    def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
        self.Knn_diag = Knn_diag
        self.Kmn = Kmn
        self.Kmm = Kmm
        self.M = self.Kmn.shape[0]
        self.N = self.Kmn.shape[1]
        assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
        assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
        EP_base.__init__(self,likelihood,*args,**kwargs)
    def fit_EP(self):
        """
        The expectation-propagation algorithm with sparse pseudo-input.
        For nomenclature see Naish-Guzman and Holden, 2008.
        """
        """
        Prior approximation parameters:
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
        """
        self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
        self.P0 = self.Kmn.T
        self.KmnKnm = np.dot(self.P0.T, self.P0)
        self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
        self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
        self.Diag0 = self.Knn_diag - self.Qnn_diag
        self.R0 = jitchol(self.Kmmi).T
        """
        Posterior approximation: q(f|y) = N(f| mu, Sigma)
        Sigma = Diag + P*R.T*R*P.T + K
        mu = w + P*gamma
        """
        self.w = np.zeros(self.N)
        self.gamma = np.zeros(self.M)
        self.mu = np.zeros(self.N)
        self.P = self.P0.copy()
        self.R = self.R0.copy()
        self.Diag = self.Diag0.copy()
        self.Sigma_diag = self.Knn_diag
        """
        Initial values - Cavity distribution parameters:
        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
        sigma_ = 1./tau_
        mu_ = v_/tau_
        """
        self.tau_ = np.empty(self.N,dtype=np.float64)
        self.v_ = np.empty(self.N,dtype=np.float64)
        #Initial values - Marginal moments
        z = np.empty(self.N,dtype=np.float64)
        self.Z_hat = np.empty(self.N,dtype=np.float64)
        phi = np.empty(self.N,dtype=np.float64)
        mu_hat = np.empty(self.N,dtype=np.float64)
        sigma2_hat = np.empty(self.N,dtype=np.float64)
        #Approximation
        epsilon_np1 = 1
        epsilon_np2 = 1
       	self.iterations = 0
        self.np1 = [self.tau_tilde.copy()]
        self.np2 = [self.v_tilde.copy()]
        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
            update_order = np.arange(self.N)
            random.shuffle(update_order)
            for i in update_order:
                #Cavity distribution parameters
                self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
                self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
                dtd1 = Delta_tau*self.Diag[i] + 1.
                dii = self.Diag[i]
                self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
                pi_ = self.P[i,:].reshape(1,self.M)
                self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
                Rp_i = np.dot(self.R,pi_.T)
                RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
                self.R = jitchol(RTR).T
                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
                self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
                self.RPT = np.dot(self.R,self.P.T)
                self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
                self.mu = self.w + np.dot(self.P,self.gamma)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
            self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
            self.P = (self.Diag / self.Diag0)[:,None] * self.P0
            self.RPT0 = np.dot(self.R0,self.P0.T)
            L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
            self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
            self.RPT = np.dot(self.R,self.P.T)
            self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
            self.w = self.Diag * self.v_tilde
            self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
            self.mu = self.w + np.dot(self.P,self.gamma)
            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
            self.np1.append(self.tau_tilde.copy())
            self.np2.append(self.v_tilde.copy())
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@ -1,219 +0,0 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 class likelihood:
    def __init__(self,Y,location=0,scale=1):
        """
        Likelihood class for doing Expectation propagation
        :param Y: observed output (Nx1 numpy.darray)
        ..Note:: Y values allowed depend on the likelihood used
        """
        self.Y = Y
        self.N = self.Y.shape[0]
        self.location = location
        self.scale = scale
    def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
        """
        Plot the predictive distribution of the GP model for 1-dimensional inputs
        :param X_new: The points at which to make a prediction
        :param Mean_new: mean values at X_new
        :param Var_new: variance values at X_new
        :param X_u: input (inducing)  points used to train the model
        :param Mean_u: mean values at X_u
        :param Var_new: variance values at X_u
        """
        assert X_new.shape[1] == 1, 'Number of dimensions must be 1'
        gpplot(X_new,Mean_new,Var_new)
        pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+')
        pb.plot(X_u,Mean_u,'ro')
    def plot2D(self,X,X_new,F_new,U=None):
        """
        Predictive distribution of the fitted GP model for 2-dimensional inputs
        :param X_new: The points at which to make a prediction
        :param Mean_new: mean values at X_new
        :param Var_new: variance values at X_new
        :param X_u: input points used to train the model
        :param Mean_u: mean values at X_u
        :param Var_new: variance values at X_u
        """
        N,D = X_new.shape
        assert D == 2, 'Number of dimensions must be 2'
        n = np.sqrt(N)
        x1min = X_new[:,0].min()
        x1max = X_new[:,0].max()
        x2min = X_new[:,1].min()
        x2max = X_new[:,1].max()
        pb.imshow(F_new.reshape(n,n),extent=(x1min,x1max,x2max,x2min),vmin=0,vmax=1)
        pb.colorbar()
        C1 = np.arange(self.N)[self.Y.flatten()==1]
        C2 = np.arange(self.N)[self.Y.flatten()==-1]
        [pb.plot(X[i,0],X[i,1],'ro') for i in C1]
        [pb.plot(X[i,0],X[i,1],'bo') for i in C2]
        pb.xlim(x1min,x1max)
        pb.ylim(x2min,x2max)
        if U is not None:
            [pb.plot(a,b,'wo') for a,b in U]
 class probit(likelihood):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
    -----
    $$
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
    def moments_match(self,i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
        :param i: number of observation (int)
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        z = self.Y[i]*v_i/np.sqrt(tau_i**2 + tau_i)
        Z_hat = stats.norm.cdf(z)
        phi = stats.norm.pdf(z)
        mu_hat = v_i/tau_i + self.Y[i]*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
        sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
        return Z_hat, mu_hat, sigma2_hat
    def plot1Db(self,X,X_new,F_new,U=None):
        assert X.shape[1] == 1, 'Number of dimensions must be 1'
        gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
        pb.plot(X,(self.Y+1)/2,'kx',mew=1.5)
        pb.ylim(-0.2,1.2)
        if U is not None:
            pb.plot(U,U*0+.5,'r|',mew=1.5,markersize=12)
    def predictive_mean(self,mu,variance):
        return stats.norm.cdf(mu/np.sqrt(1+variance))
    def _log_likelihood_gradients():
        raise NotImplementedError
 class poisson(likelihood):
    """
    Poisson likelihood
    Y is expected to take values in {0,1,2,...}
    -----
    $$
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
    def moments_match(self,i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
        :param i: number of observation (int)
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        mu = v_i/tau_i
        sigma = np.sqrt(1./tau_i)
        def poisson_norm(f):
            """
            Product of the likelihood and the cavity distribution
            """
            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
            rate = np.exp( (f*self.scale)+self.location)
            poisson = stats.poisson.pmf(float(self.Y[i]),rate)
            return pdf_norm_f*poisson
        def log_pnm(f):
            """
            Log of poisson_norm
            """
            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
        """
        Golden Search and Simpson's Rule
        --------------------------------
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
        """
        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
        #Golden search
        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
        golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
        golden_A = (golden_A - self.location)/self.scale
        golden_B = (golden_B - self.location)/self.scale
        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
        # Simpson's approximation
        width = 3./np.log(max(self.Y[i],2))
        A = opt - width #Lower limit
        B = opt + width #Upper limit
        K =  10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
    def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
        pb.subplot(212)
        #gpplot(X_new,F_new,np.sqrt(F2_new))
        pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
        pb.plot(X,self.Y,'kx',mew=1.5)
        if U is not None:
            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
    def predictive_mean(self,mu,variance):
        return np.exp(mu*self.scale + self.location)
    def predictive_variance(self,mu,variance):
        return mu
    def _log_likelihood_gradients():
        raise NotImplementedError
 class gaussian(likelihood):
    """
    Gaussian likelihood
    Y is expected to take values in (-inf,inf)
    """
    def moments_match(self,i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
        :param i: number of observation (int)
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        mu = v_i/tau_i
        sigma = np.sqrt(1./tau_i)
        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
        return Z_hat, mu_hat, sigma2_hat
    def plot1Db(self,X,X_new,F_new,U=None):
        assert X.shape[1] == 1, 'Number of dimensions must be 1'
        gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
        pb.plot(X,self.Y,'kx',mew=1.5)
        if U is not None:
            pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
    def predictive_mean(self,mu,Sigma):
        return mu
    def _log_likelihood_gradients():
        raise NotImplementedError
--- a/GPy/kern/Matern32.py
+++ b/GPy/kern/Matern32.py
@ -14,14 +14,14 @@ class Matern32(kernpart):
    .. math::
-       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
    :param D: the number of input dimensions
    :type D: int
    :param variance: the variance :math:`\sigma^2`
    :type variance: float
    :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
    :type ARD: Boolean
    :rtype: kernel object
@ -35,17 +35,19 @@ class Matern32(kernpart):
            self.Nparam = 2
            self.name = 'Mat32'
            if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
            else:
                lengthscale = np.ones(1)
        else:
            self.Nparam = self.D + 1
-            self.name = 'Mat32_ARD'
+            self.name = 'Mat32'
            if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == self.D, "bad number of lengthscales"
            else:
                lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
    def _get_params(self):
        """return the value of the parameters."""
@ -116,9 +118,9 @@ class Matern32(kernpart):
        :param F1: vector of derivatives of F
        :type F1: np.array
        :param F2: vector of second derivatives of F
-        :type F2: np.array    
+        :type F2: np.array
        :param lower,upper: boundaries of the input domain
-        :type lower,upper: floats  
+        :type lower,upper: floats
        """
        assert self.D == 1
        def L(x,i):
@ -133,4 +135,3 @@ class Matern32(kernpart):
        #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
        #return(G)
        return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
--- a/GPy/kern/Matern52.py
+++ b/GPy/kern/Matern52.py
@ -13,14 +13,14 @@ class Matern52(kernpart):
    .. math::
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
    :param D: the number of input dimensions
    :type D: int
    :param variance: the variance :math:`\sigma^2`
    :type variance: float
    :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
    :type ARD: Boolean
    :rtype: kernel object
@ -33,17 +33,19 @@ class Matern52(kernpart):
            self.Nparam = 2
            self.name = 'Mat52'
            if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
            else:
                lengthscale = np.ones(1)
        else:
            self.Nparam = self.D + 1
-            self.name = 'Mat52_ARD'
+            self.name = 'Mat52'
            if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == self.D, "bad number of lengthscales"
            else:
                lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
    def _get_params(self):
        """return the value of the parameters."""
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/exponential.py
@ -13,14 +13,14 @@ class exponential(kernpart):
    .. math::
-       k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \sigma^2 \exp(- r) \ \ \ \ \  \\text{ where  } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
    :param D: the number of input dimensions
    :type D: int
    :param variance: the variance :math:`\sigma^2`
    :type variance: float
    :param lengthscale: the vector of lengthscale :math:`\ell_i`
-    :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
    :type ARD: Boolean
    :rtype: kernel object
@ -33,17 +33,19 @@ class exponential(kernpart):
            self.Nparam = 2
            self.name = 'exp'
            if lengthscale is not None:
-                assert lengthscale.shape == (1,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
            else:
                lengthscale = np.ones(1)
        else:
            self.Nparam = self.D + 1
-            self.name = 'exp_ARD'
+            self.name = 'exp'
            if lengthscale is not None:
-                assert lengthscale.shape == (self.D,)
+                lengthscale = np.asarray(lengthscale)
                assert lengthscale.size == self.D, "bad number of lengthscales"
            else:
                lengthscale = np.ones(self.D)
-        self._set_params(np.hstack((variance,lengthscale)))
+        self._set_params(np.hstack((variance,lengthscale.flatten())))
    def _get_params(self):
        """return the value of the parameters."""
@ -87,7 +89,7 @@ class exponential(kernpart):
            dl = self.variance*dvar*dist2M.sum(-1)*invdist
            target[1] += np.sum(dl*partial)
-    def dKdiag_dtheta(self,partial,X,target): 
+    def dKdiag_dtheta(self,partial,X,target):
        """derivative of the diagonal of the covariance matrix with respect to the parameters."""
        #NB: derivative of diagonal elements wrt lengthscale is 0
        target[0] += np.sum(partial)
@ -110,9 +112,9 @@ class exponential(kernpart):
        :param F: vector of functions
        :type F: np.array
        :param F1: vector of derivatives of F
-        :type F1: np.array  
+        :type F1: np.array
        :param lower,upper: boundaries of the input domain
-        :type lower,upper: floats  
+        :type lower,upper: floats
        """
        assert self.D == 1
        def L(x,i):
@ -124,8 +126,3 @@ class exponential(kernpart):
                G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
        Flower = np.array([f(lower) for f in F])[:,None]
        return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -3,6 +3,7 @@
 import numpy as np
 import pylab as pb
 from ..core.parameterised import parameterised
 from kernpart import kernpart
 import itertools
@ -155,7 +156,7 @@ class kern(parameterised):
        D = K1.D + K2.D
-        newkernparts = [product_orthogonal(k1,k2).parts[0] for k1, k2 in itertools.product(K1.parts,K2.parts)]
+        newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
        slices = []
        for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@ -235,6 +236,8 @@ class kern(parameterised):
            X2 = X
        target = np.zeros(self.Nparam)
        [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
 	#TODO: transform the gradients here!
        return target
    def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
@ -372,3 +375,59 @@ class kern(parameterised):
        #TODO: there are some extra terms to compute here!
        return target_mu, target_S
    def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None):
        if which_functions=='all':
            which_functions = [True]*self.Nparts
        if self.D == 1:
            if x is None:
                x = np.zeros((1,1))
            else:
                x = np.asarray(x)
                assert x.size == 1, "The size of the fixed variable x is not 1"
                x = x.reshape((1,1))
            if plot_limits == None:
                xmin, xmax = (x-5).flatten(), (x+5).flatten()
            elif len(plot_limits) == 2:
                xmin, xmax = plot_limits
            else:
                raise ValueError, "Bad limits for plotting"
            Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None]
            Kx = self.K(Xnew,x,slices2=which_functions)
            pb.plot(Xnew,Kx)
            pb.xlim(xmin,xmax)
            pb.xlabel("x")
            pb.ylabel("k(x,%0.1f)" %x)
        elif self.D == 2:
            if x is None:
                x = np.zeros((1,2))
            else:
                x = np.asarray(x)
                assert x.size == 2, "The size of the fixed variable x is not 2"
                x = x.reshape((1,2))
            if plot_limits == None:
                xmin, xmax = (x-5).flatten(), (x+5).flatten()
            elif len(plot_limits) == 2:
                xmin, xmax = plot_limits
            else:
                raise ValueError, "Bad limits for plotting"
            resolution = resolution or 51
            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
            xg = np.linspace(xmin[0],xmax[0],resolution)
            yg = np.linspace(xmin[1],xmax[1],resolution)
            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
            Kx = self.K(Xnew,x,slices2=which_functions)
            Kx = Kx.reshape(resolution,resolution).T
            pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet)
            pb.xlim(xmin[0],xmax[0])
            pb.ylim(xmin[1],xmax[1])
            pb.xlabel("x1")
            pb.ylabel("x2")
            pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) )
        else:
            raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@ -15,8 +15,8 @@ class linear(kernpart):
    :param D: the number of input dimensions
    :type D: int
    :param variances: the vector of variances :math:`\sigma^2_i`
-    :type variances: np.ndarray of size (1,) or (D,) depending on ARD
+    :type variances: array or list of the appropriate size (or float if there is only one variance parameter)
-    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
+    :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
    :type ARD: Boolean
    :rtype: kernel object
    """
@ -28,21 +28,20 @@ class linear(kernpart):
            self.Nparam = 1
            self.name = 'linear'
            if variances is not None:
-                if isinstance(variances, float):
+                variances = np.asarray(variances)
-                    variances = np.array([variances])
+                assert variances.size == 1, "Only one variance needed for non-ARD kernel"
                assert variances.shape == (1,)
            else:
                variances = np.ones(1)
            self._Xcache, self._X2cache = np.empty(shape=(2,))
        else:
            self.Nparam = self.D
-            self.name = 'linear_ARD'
+            self.name = 'linear'
            if variances is not None:
-                assert variances.shape == (self.D,)
+                variances = np.asarray(variances)
                assert variances.size == self.D, "bad number of lengthscales"
            else:
                variances = np.ones(self.D)
-        self._set_params(variances)
+        self._set_params(variances.flatten())
    def _get_params(self):
        return self.variances
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@ -12,7 +12,7 @@ class rbf(kernpart):
    .. math::
-       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
+       k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
    where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
@ -21,7 +21,7 @@ class rbf(kernpart):
    :param variance: the variance of the kernel
    :type variance: float
    :param lengthscale: the vector of lengthscale of the kernel
-    :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
+    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
    :type ARD: Boolean
    :rtype: kernel object
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -0,0 +1,311 @@
 import numpy as np
 from scipy import stats, linalg
 from ..util.linalg import pdinv,mdot,jitchol
 from likelihood import likelihood
 class EP(likelihood):
    def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
        """
        Expectation Propagation
        Arguments
        ---------
        epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
        likelihood_function : a likelihood function (see likelihood_functions.py)
        """
        self.likelihood_function = likelihood_function
        self.epsilon = epsilon
        self.eta, self.delta = power_ep
        self.data = data
        self.N = self.data.size
        self.is_heteroscedastic = True
        self.Nparams = 0
        #Initial values - Likelihood approximation parameters:
        #p(y|f) = t(f|tau_tilde,v_tilde)
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)
        #initial values for the GP variables
        self.Y = np.zeros((self.N,1))
        self.covariance_matrix = np.eye(self.N)
        self.precision = np.ones(self.N)
        self.Z = 0
        self.YYT = None
    def predictive_values(self,mu,var):
        return self.likelihood_function.predictive_values(mu,var)
    def _get_params(self):
        return np.zeros(0)
    def _get_param_names(self):
        return []
    def _set_params(self,p):
        pass # TODO: the EP likelihood might want to take some parameters...
    def _gradients(self,partial):
        return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
    def _compute_GP_variables(self):
        #Variables to be called from GP
        mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
        sigma_sum = 1./self.tau_ + 1./self.tau_tilde
        mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
        self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
        self.Y =  mu_tilde[:,None]
        self.YYT = np.dot(self.Y,self.Y.T)
        self.precision = self.tau_tilde
        self.covariance_matrix = np.diag(1./self.precision)
    def fit_full(self,K):
        """
        The expectation-propagation algorithm.
        For nomenclature see Rasmussen & Williams 2006.
        """
        #Prior distribution parameters: p(f|X) = N(f|0,K)
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        mu = np.zeros(self.N)
        Sigma = K.copy()
        """
        Initial values - Cavity distribution parameters:
        q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
        sigma_ = 1./tau_
        mu_ = v_/tau_
        """
        self.tau_ = np.empty(self.N,dtype=float)
        self.v_ = np.empty(self.N,dtype=float)
        #Initial values - Marginal moments
        z = np.empty(self.N,dtype=float)
        self.Z_hat = np.empty(self.N,dtype=float)
        phi = np.empty(self.N,dtype=float)
        mu_hat = np.empty(self.N,dtype=float)
        sigma2_hat = np.empty(self.N,dtype=float)
        #Approximation
        epsilon_np1 = self.epsilon + 1.
        epsilon_np2 = self.epsilon + 1.
       	self.iterations = 0
        self.np1 = [self.tau_tilde.copy()]
        self.np2 = [self.v_tilde.copy()]
        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
                self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
                self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
                si=Sigma[:,i].reshape(self.N,1)
                Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
                mu = np.dot(Sigma,self.v_tilde)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
            L = jitchol(B)
            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
            Sigma = K - np.dot(V.T,V)
            mu = np.dot(Sigma,self.v_tilde)
            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
            self.np1.append(self.tau_tilde.copy())
            self.np2.append(self.v_tilde.copy())
        return self._compute_GP_variables()
    def fit_DTC(self, Knn_diag, Kmn, Kmm):
        """
        The expectation-propagation algorithm with sparse pseudo-input.
        For nomenclature see ... 2013.
        """
        #TODO: this doesn;t work with uncertain inputs!
        """
        Prior approximation parameters:
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = Qnn = Knm*Kmmi*Kmn
        """
        Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
        KmnKnm = np.dot(Kmn, Kmn.T)
        KmmiKmn = np.dot(Kmmi,Kmn)
        Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
        LLT0 = Kmm.copy()
        """
        Posterior approximation: q(f|y) = N(f| mu, Sigma)
        Sigma = Diag + P*R.T*R*P.T + K
        mu = w + P*gamma
        """
        mu = np.zeros(self.N)
        LLT = Kmm.copy()
        Sigma_diag = Qnn_diag.copy()
        """
        Initial values - Cavity distribution parameters:
        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
        sigma_ = 1./tau_
        mu_ = v_/tau_
        """
        tau_ = np.empty(self.N,dtype=float)
        v_ = np.empty(self.N,dtype=float)
        #Initial values - Marginal moments
        z = np.empty(self.N,dtype=float)
        Z_hat = np.empty(self.N,dtype=float)
        phi = np.empty(self.N,dtype=float)
        mu_hat = np.empty(self.N,dtype=float)
        sigma2_hat = np.empty(self.N,dtype=float)
        #Approximation
        epsilon_np1 = 1
        epsilon_np2 = 1
       	self.iterations = 0
        np1 = [tau_tilde.copy()]
        np2 = [v_tilde.copy()]
        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
                tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
                v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
                #Marginal moments
                Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
                #Site parameters update
                Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                tau_tilde[i] = tau_tilde[i] + Delta_tau
                v_tilde[i] = v_tilde[i] + Delta_v
                #Posterior distribution parameters update
                LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
                L = jitchol(LLT)
                V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
                Sigma_diag = np.sum(V*V,-2)
                si = np.sum(V.T*V[:,i],-1)
                mu = mu + (Delta_v-Delta_tau*mu[i])*si
                self.iterations += 1
            #Sigma recomputation with Cholesky decompositon
            LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
            L = jitchol(LLT)
            V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
            V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
            Sigma_diag = np.sum(V*V,-2)
            Knmv_tilde = np.dot(Kmn,v_tilde)
            mu = np.dot(V2.T,Knmv_tilde)
            epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
            epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
            np1.append(tau_tilde.copy())
            np2.append(v_tilde.copy())
        self._compute_GP_variables()
    def fit_FITC(self, Knn_diag, Kmn):
        """
        The expectation-propagation algorithm with sparse pseudo-input.
        For nomenclature see Naish-Guzman and Holden, 2008.
        """
        """
        Prior approximation parameters:
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
        """
        Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
        P0 = Kmn.T
        KmnKnm = np.dot(P0.T, P0)
        KmmiKmn = np.dot(Kmmi,P0.T)
        Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
        Diag0 = Knn_diag - Qnn_diag
        R0 = jitchol(Kmmi).T
        """
        Posterior approximation: q(f|y) = N(f| mu, Sigma)
        Sigma = Diag + P*R.T*R*P.T + K
        mu = w + P*gamma
        """
        self.w = np.zeros(self.N)
        self.gamma = np.zeros(self.M)
        mu = np.zeros(self.N)
        P = P0.copy()
        R = R0.copy()
        Diag = Diag0.copy()
        Sigma_diag = Knn_diag
        """
        Initial values - Cavity distribution parameters:
        q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
        sigma_ = 1./tau_
        mu_ = v_/tau_
        """
        self.tau_ = np.empty(self.N,dtype=float)
        self.v_ = np.empty(self.N,dtype=float)
        #Initial values - Marginal moments
        z = np.empty(self.N,dtype=float)
        self.Z_hat = np.empty(self.N,dtype=float)
        phi = np.empty(self.N,dtype=float)
        mu_hat = np.empty(self.N,dtype=float)
        sigma2_hat = np.empty(self.N,dtype=float)
        #Approximation
        epsilon_np1 = 1
        epsilon_np2 = 1
       	self.iterations = 0
        self.np1 = [self.tau_tilde.copy()]
        self.np2 = [self.v_tilde.copy()]
        while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
                self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
                self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
                dtd1 = Delta_tau*Diag[i] + 1.
                dii = Diag[i]
                Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
                pi_ = P[i,:].reshape(1,self.M)
                P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
                Rp_i = np.dot(R,pi_.T)
                RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
                R = jitchol(RTR).T
                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
                self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
                RPT = np.dot(R,P.T)
                Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
                mu = self.w + np.dot(P,self.gamma)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
            Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
            P = (Diag / Diag0)[:,None] * P0
            RPT0 = np.dot(R0,P0.T)
            L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
            R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
            RPT = np.dot(R,P.T)
            Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
            self.w = Diag * self.v_tilde
            self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
            mu = self.w + np.dot(P,self.gamma)
            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
            self.np1.append(self.tau_tilde.copy())
            self.np2.append(self.v_tilde.copy())
        return self._compute_GP_variables()
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@ -0,0 +1,56 @@
 import numpy as np
 from likelihood import likelihood
 class Gaussian(likelihood):
    def __init__(self,data,variance=1.,normalize=False):
        self.is_heteroscedastic = False
        self.Nparams = 1
        self.data = data
        self.N,D = data.shape
        self.Z = 0. # a correction factor which accounts for the approximation made
        #normalisation
        if normalize:
            self._mean = data.mean(0)[None,:]
            self._std = data.std(0)[None,:]
            self.Y = (self.data - self._mean)/self._std
        else:
            self._mean = np.zeros((1,D))
            self._std = np.ones((1,D))
            self.Y = self.data
        #TODO: make this work efficiently (only compute YYT if D>>N)
        self.YYT = np.dot(self.Y,self.Y.T)
        self.trYYT = np.trace(self.YYT)
        self._set_params(np.asarray(variance))
    def _get_params(self):
        return np.asarray(self._variance)
    def _get_param_names(self):
        return ["noise variance"]
    def _set_params(self,x):
        self._variance = float(x)
        self.covariance_matrix = np.eye(self.N)*self._variance
        self.precision = 1./self._variance
    def predictive_values(self,mu,var):
        """
        Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
        """
        mean = mu*self._std + self._mean
        true_var = (var + self._variance)*self._std**2
        _5pc = mean + - 2.*np.sqrt(true_var)
        _95pc = mean + 2.*np.sqrt(true_var)
        return mean, _5pc, _95pc
    def fit_full(self):
        """
        No approximations needed
        """
        pass
    def _gradients(self,partial):
        return np.sum(partial)
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -0,0 +1,4 @@
 from EP import EP
 from Gaussian import Gaussian
 # TODO: from Laplace import Laplace
 import likelihood_functions as functions
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -0,0 +1,35 @@
 import numpy as np
 class likelihood:
    """
    The atom for a likelihood class
    This object interfaces the GP and the data. The most basic likelihood
    (Gaussian) inherits directly from this, as does the EP algorithm
    Some things must be defined for this to work properly:
    self.Y : the effective Gaussian target of the GP
    self.N, self.D : Y.shape
    self.covariance_matrix : the effective (noise) covariance of the GP targets
    self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
    self.is_heteroscedastic : enables significant computational savings in GP
    self.precision : a scalar or vector representation of the effective target precision
    self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
    """
    def __init__(self,data):
        raise ValueError, "this class is not to be instantiated"
    def _get_params(self):
        raise NotImplementedError
    def _get_param_names(self):
        raise NotImplementedError
    def _set_params(self,x):
        raise NotImplementedError
    def fit(self):
        raise NotImplementedError
    def _gradients(self,partial):
        raise NotImplementedError
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -0,0 +1,134 @@
 # Copyright (c) 2012, 2013 Ricardo Andrade
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 class likelihood_function:
    """
    Likelihood class for doing Expectation propagation
    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
    def __init__(self,location=0,scale=1):
        self.location = location
        self.scale = scale
 class probit(likelihood_function):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
    -----
    $$
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
    def moments_match(self,data_i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
        :param i: number of observation (int)
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        # TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
        if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
        z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
        Z_hat = stats.norm.cdf(z)
        phi = stats.norm.pdf(z)
        mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
        sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
        return Z_hat, mu_hat, sigma2_hat
    def predictive_values(self,mu,var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
        mu = mu.flatten()
        var = var.flatten()
        mean = stats.norm.cdf(mu/np.sqrt(1+var))
        p_025 = np.zeros(mu.shape)
        p_975 = np.ones(mu.shape)
        return mean, p_025, p_975
 class Poisson(likelihood_function):
    """
    Poisson likelihood
    Y is expected to take values in {0,1,2,...}
    -----
    $$
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
    def moments_match(self,data_i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
        :param i: number of observation (int)
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        mu = v_i/tau_i
        sigma = np.sqrt(1./tau_i)
        def poisson_norm(f):
            """
            Product of the likelihood and the cavity distribution
            """
            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
            rate = np.exp( (f*self.scale)+self.location)
            poisson = stats.poisson.pmf(float(data_i),rate)
            return pdf_norm_f*poisson
        def log_pnm(f):
            """
            Log of poisson_norm
            """
            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
        """
        Golden Search and Simpson's Rule
        --------------------------------
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
        """
        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
        #Golden search
        golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
        golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
        golden_A = (golden_A - self.location)/self.scale
        golden_B = (golden_B - self.location)/self.scale
        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
        # Simpson's approximation
        width = 3./np.log(max(data_i,2))
        A = opt - width #Lower limit
        B = opt + width #Upper limit
        K =  10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
    def predictive_values(self,mu,var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
        mean = np.exp(mu*self.scale + self.location)
        tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
        p_025 = tmp[:,0]
        p_975 = tmp[:,1]
        return mean,p_025,p_975
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@ -5,10 +5,12 @@ import numpy as np
 import pylab as pb
 import sys, pdb
 from GPLVM import GPLVM
-from sparse_GP_regression import sparse_GP_regression
+from sparse_GP import sparse_GP
 from GPy.util.linalg import pdinv
 from ..likelihoods import Gaussian
 from .. import kern
-class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
+class Bayesian_GPLVM(sparse_GP, GPLVM):
    """
    Bayesian Gaussian Process Latent Variable Model
@ -20,15 +22,23 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
    :type init: 'PCA'|'random'
    """
-    def __init__(self, Y, Q, init='PCA', **kwargs):
+    def __init__(self, Y, Q, init='PCA', M=10, Z=None, **kwargs):
        X = self.initialise_latent(init, Q, Y)
-        S = np.ones_like(X) * 1e-2# 
+
-        sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
+        if Z is None:
            Z = np.random.permutation(X.copy())[:M]
        else:
            assert Z.shape[1]==X.shape[1]
        kernel = kern.rbf(Q) + kern.white(Q)
        S = np.ones_like(X) * 1e-2#
        sparse_GP.__init__(self, X, Gaussian(Y), X_uncertainty=S, Z=Z,**kwargs)
    def _get_param_names(self):
        X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
        S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
-        return (X_names + S_names + sparse_GP_regression._get_param_names(self))
+        return (X_names + S_names + sparse_GP._get_param_names(self))
    def _get_params(self):
        """
@ -36,17 +46,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
        The resulting 1-D array has this structure:
        ===============================================================
-        |       mu       |        S        |    Z    | beta |  theta  |
+        |       mu       |        S        |    Z    | theta |  beta  |
        ===============================================================
        """
-        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self)))
+        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self)))
    def _set_params(self,x):
        N, Q = self.N, self.Q
        self.X = x[:self.X.size].reshape(N,Q).copy()
        self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
-        sparse_GP_regression._set_params(self, x[(2*N*Q):])
+        sparse_GP._set_params(self, x[(2*N*Q):])
    def dL_dmuS(self):
        dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
@ -58,5 +68,5 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
        return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
    def _log_likelihood_gradients(self):
-        return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
+        return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@ -0,0 +1,274 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import pylab as pb
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
 from ..likelihoods import EP
 class GP(model):
    """
    Gaussian Process model for regression and EP
    :param X: input observations
    :param kernel: a GPy kernel, defaults to rbf+white
    :parm likelihood: a GPy likelihood
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_Y: False|True
    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
    :rtype: model object
    :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
    :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
    :type powerep: list
    .. Note:: Multiple independent outputs are allowed using columns of Y
    """
    #FIXME normalize vs normalise
    def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
        # parse arguments
        self.Xslices = Xslices
        self.X = X
        assert len(self.X.shape)==2
        self.N, self.Q = self.X.shape
        assert isinstance(kernel, kern.kern)
        self.kern = kernel
        #here's some simple normalisation for the inputs
        if normalize_X:
            self._Xmean = X.mean(0)[None,:]
            self._Xstd = X.std(0)[None,:]
            self.X = (X.copy() - self._Xmean) / self._Xstd
            if hasattr(self,'Z'):
                self.Z = (self.Z - self._Xmean) / self._Xstd
        else:
            self._Xmean = np.zeros((1,self.X.shape[1]))
            self._Xstd = np.ones((1,self.X.shape[1]))
        self.likelihood = likelihood
        #assert self.X.shape[0] == self.likelihood.Y.shape[0]
        #self.N, self.D = self.likelihood.Y.shape
        assert self.X.shape[0] == self.likelihood.data.shape[0]
        self.N, self.D = self.likelihood.data.shape
        model.__init__(self)
    def _set_params(self,p):
        self.kern._set_params_transformed(p[:self.kern.Nparam])
        #self.likelihood._set_params(p[self.kern.Nparam:])               # test by Nicolas
        self.likelihood._set_params(p[self.kern.Nparam_transformed():])    # test by Nicolas
        self.K = self.kern.K(self.X,slices1=self.Xslices)
        self.K += self.likelihood.covariance_matrix
        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
        #the gradient of the likelihood wrt the covariance matrix
        if self.likelihood.YYT is None:
            alpha = np.dot(self.Ki,self.likelihood.Y)
            self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
        else:
            tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
            self.dL_dK = 0.5*(tmp - self.D*self.Ki)
    def _get_params(self):
        return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
    def _get_param_names(self):
        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
    def update_likelihood_approximation(self):
        """
        Approximates a non-gaussian likelihood using Expectation Propagation
        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
        this function does nothing
        """
        self.likelihood.fit_full(self.kern.K(self.X))
        self._set_params(self._get_params()) # update the GP
    def _model_fit_term(self):
        """
        Computes the model fit using YYT if it's available
        """
        if self.likelihood.YYT is None:
            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
        else:
            return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
    def log_likelihood(self):
        """
        The log marginal likelihood of the GP.
        For an EP model,  can be written as the log likelihood of a regression
        model for a new variable Y* = v_tilde/tau_tilde, with a covariance
        matrix K* = K + diag(1./tau_tilde) plus a normalization term.
        """
        return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
    def _log_likelihood_gradients(self):
        """
        The gradient of all parameters.
        For the kernel parameters, use the chain rule via dL_dK
        For the likelihood parameters, pass in alpha = K^-1 y
        """
        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
    def _raw_predict(self,_Xnew,slices=None, full_cov=False):
        """
        Internal helper function for making predictions, does not account
        for normalisation or likelihood
        """
        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
        KiKx = np.dot(self.Ki,Kx)
        if full_cov:
            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
            var = Kxx - np.dot(KiKx.T,Kx) #NOTE this won't work for plotting
        else:
            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
            var = var[:,None]
        return mu, var
    def predict(self,Xnew, slices=None, full_cov=False):
        """
        Predict the function(s) at the new point(s) Xnew.
        Arguments
        ---------
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.Q
        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
        :type slices: (None, list of slice objects, list of ints)
        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
        :type full_cov: bool
        :rtype: posterior mean,  a Numpy array, Nnew x self.D
        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.D
        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
             - If None, the new points covary throigh every kernel part (default)
             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
             - If a list of booleans, specifying which kernel parts are active
           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
           This is to allow for different normalisations of the output dimensions.
        """
        #normalise X values
        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
        mu, var = self._raw_predict(Xnew, slices, full_cov)
        #now push through likelihood TODO
        mean, _025pm, _975pm = self.likelihood.predictive_values(mu, var)
        return mean, var, _025pm, _975pm
    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
        """
        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
        :param samples: the number of a posteriori samples to plot
        :param which_data: which if the training data to plot (default all)
        :type which_data: 'all' or a slice object to slice self.X, self.Y
        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
        :param which_functions: which of the kernel functions to plot (additively)
        :type which_functions: list of bools
        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
        Plot the posterior of the GP.
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
          - In two dimsensions, a contour-plot shows the mean predicted function
          - In higher dimensions, we've no implemented this yet !TODO!
        Can plot only part of the data and part of the posterior functions using which_data and which_functions
        Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
        """
        if which_functions=='all':
            which_functions = [True]*self.kern.Nparts
        if which_data=='all':
            which_data = slice(None)
        if self.X.shape[1] == 1:
            Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
            if samples == 0:
                m,v = self._raw_predict(Xnew, slices=which_functions)
                gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
                pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
            else:
                m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
                Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
                gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
                for i in range(samples):
                    pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
            pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
            pb.xlim(xmin,xmax)
            ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
            pb.ylim(ymin,ymax)
            if hasattr(self,'Z'):
                pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
        elif self.X.shape[1] == 2:
            resolution = resolution or 50
            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
            m,v = self._raw_predict(Xnew, slices=which_functions)
            m = m.reshape(resolution,resolution).T
            pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
            pb.xlim(xmin[0],xmax[0])
            pb.ylim(xmin[1],xmax[1])
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
        # TODO include samples
        if which_functions=='all':
            which_functions = [True]*self.kern.Nparts
        if which_data=='all':
            which_data = slice(None)
        if self.X.shape[1] == 1:
            Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
            gpplot(Xnew,m, lower, upper)
            pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
            ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
            ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
            pb.xlim(xmin,xmax)
            pb.ylim(ymin,ymax)
            if hasattr(self,'Z'):
                Zu = self.Z*self._Xstd + self._Xmean
                pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
        elif self.X.shape[1]==2: #FIXME
            resolution = resolution or 50
            Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
            x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
            m, var, lower, upper = self.predict(Xnew, slices=which_functions)
            m = m.reshape(resolution,resolution).T
            pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
            Yf = self.likelihood.Y.flatten()
            pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
            pb.xlim(xmin[0],xmax[0])
            pb.ylim(xmin[1],xmax[1])
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@ -8,9 +8,10 @@ import sys, pdb
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv, PCA
-from GP_regression import GP_regression
+from GP import GP
 from ..likelihoods import Gaussian
-class GPLVM(GP_regression):
+class GPLVM(GP):
    """
    Gaussian Process Latent Variable Model
@ -22,10 +23,13 @@ class GPLVM(GP_regression):
    :type init: 'PCA'|'random'
    """
-    def __init__(self, Y, Q, init='PCA', X = None, **kwargs):
+    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
        if X is None:
            X = self.initialise_latent(init, Q, Y)
-        GP_regression.__init__(self, X, Y, **kwargs)
+        if kernel is None:
            kernel = kern.rbf(Q) + kern.bias(Q)
        likelihood = Gaussian(Y)
        GP.__init__(self, X, likelihood, kernel, **kwargs)
    def initialise_latent(self, init, Q, Y):
        if init == 'PCA':
@ -34,23 +38,19 @@ class GPLVM(GP_regression):
            return np.random.randn(Y.shape[0], Q)
    def _get_param_names(self):
-        return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
+        return sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + GP._get_param_names(self)
                + self.kern._get_param_names_transformed())
    def _get_params(self):
-        return np.hstack((self.X.flatten(), self.kern._get_params_transformed()))
+        return np.hstack((self.X.flatten(), GP._get_params(self)))
    def _set_params(self,x):
        self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
-        GP_regression._set_params(self, x[self.X.size:])
+        GP._set_params(self, x[self.X.size:])
    def _log_likelihood_gradients(self):
-        dL_dK = self.dL_dK()
+        dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
-        dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X)
+        return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
        dL_dX = 2*self.kern.dK_dX(dL_dK,self.X)
        return np.hstack((dL_dX.flatten(),dL_dtheta))
    def plot(self):
        assert self.Y.shape[1]==2
--- a/GPy/models/GP_EP.py
+++ b/GPy/models/GP_EP.py
@ -1,160 +0,0 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import pylab as pb
 from scipy import stats, linalg
 from .. import kern
 from ..inference.Expectation_Propagation import Full
 from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
 from ..core import model
 from ..util.linalg import pdinv,jitchol
 from ..util.plot import gpplot
 class GP_EP(model):
    def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
        """
        Simple Gaussian Process with Non-Gaussian likelihood
        Arguments
        ---------
        :param X: input observations (NxD numpy.darray)
        :param likelihood: a GPy likelihood (likelihood class)
        :param kernel: a GPy kernel (kern class)
        :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float)
        :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
        :rtype: GPy model class.
        """
        if kernel is None:
            kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
        assert isinstance(kernel,kern.kern), 'kernel is not a kern instance'
        self.likelihood = likelihood
        self.Y = self.likelihood.Y
        self.kernel = kernel
        self.X = X
        self.N, self.D = self.X.shape
        self.eta,self.delta = powerep
        self.epsilon_ep = epsilon_ep
        self.jitter = 1e-12
        self.K = self.kernel.K(self.X)
        model.__init__(self)
    def _set_params(self,p):
        self.kernel._set_params_transformed(p)
    def _get_params(self):
        return self.kernel._get_params_transformed()
    def _get_param_names(self):
        return self.kernel._get_param_names_transformed()
    def approximate_likelihood(self):
        self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
        self.ep_approx.fit_EP()
    def posterior_param(self):
        self.K = self.kernel.K(self.X)
        self.Sroot_tilde_K =  np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
        B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K
        #self.L = np.linalg.cholesky(B)
        self.L = jitchol(B)
        V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
        self.Sigma = self.K - np.dot(V.T,V)
        self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde)
    def log_likelihood(self):
        """
        Returns
        -------
        The EP approximation to the log-marginal likelihood
        """
        self.posterior_param()
        mu_ = self.ep_approx.v_/self.ep_approx.tau_
        L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
        L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
        L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
        L3 = sum(np.log(self.ep_approx.Z_hat))
        return L1 + L2A + L2B + L3
    def _log_likelihood_gradients(self):
        dK_dp = self.kernel.dK_dtheta(self.X)
        self.dK_dp = dK_dp
        aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
        b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0]
        U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1)
        dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U))
        self.dL_dK = dL_dK
        return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T])
    def predict(self,X):
        #TODO: check output dimensions
        self.posterior_param()
        K_x = self.kernel.K(self.X,X)
        Kxx = self.kernel.K(X)
        aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
        aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
        zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
        f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
       	v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
        variance = Kxx - np.dot(v.T,v)
       	vdiag = np.diag(variance)
        y=self.likelihood.predictive_mean(f,vdiag)
        return f,vdiag,y
    def plot(self):
        """
        Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
        """
        if self.X.shape[1]==1:
            pb.figure()
            xmin,xmax = self.X.min(),self.X.max()
            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
            Xnew = np.linspace(xmin,xmax,100)[:,None]
            mu_f, var_f, mu_phi = self.predict(Xnew)
            pb.subplot(211)
            self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
            pb.subplot(212)
            self.likelihood.plot1Db(self.X,Xnew,mu_phi)
        elif self.X.shape[1]==2:
            pb.figure()
            x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
            x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
            x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
            x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
            axis1 = np.linspace(x1min,x1max,50)
            axis2 = np.linspace(x2min,x2max,50)
            XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
            Xnew = np.c_[XX1.flatten(),XX2.flatten()]
            f,v,p = self.predict(Xnew)
            self.likelihood.plot2D(self.X,Xnew,p)
        else:
            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
    def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
        """
        Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
        """
        self.epsilon_em = epsilon
        log_likelihood_change = self.epsilon_em + 1.
        self.parameters_path = [self.kernel._get_params()]
        self.approximate_likelihood()
        self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
        self.log_likelihood_path = [self.log_likelihood()]
        iteration = 0
        while log_likelihood_change > self.epsilon_em:
            print 'EM iteration', iteration
            self.optimize(max_f_eval = max_f_eval)
            log_likelihood_new = self.log_likelihood()
            log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
            if log_likelihood_change < 0:
                print 'log_likelihood decrement'
                self.kernel._set_params_transformed(self.parameters_path[-1])
                self.kernM._set_params_transformed(self.parameters_path[-1])
            else:
                self.approximate_likelihood()
                self.log_likelihood_path.append(self.log_likelihood())
                self.parameters_path.append(self.kernel._get_params())
                self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
            iteration += 1
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@ -1,18 +1,18 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-import pylab as pb
+from GP import GP
 from .. import likelihoods
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot, Tango
-class GP_regression(model):
+class GP_regression(GP):
    """
    Gaussian Process model for regression
    This is a thin wrapper around the GP class, with a set of sensible defalts
    :param X: input observations
    :param Y: observed values
    :param kernel: a GPy kernel, defaults to rbf+white
@ -29,199 +29,8 @@ class GP_regression(model):
    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
        if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
+            kernel = kern.rbf(X.shape[1])
-        # parse arguments
+        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
        self.Xslices = Xslices
        assert isinstance(kernel, kern.kern)
        self.kern = kernel
        self.X = X
        self.Y = Y
        assert len(self.X.shape)==2
        assert len(self.Y.shape)==2
        assert self.X.shape[0] == self.Y.shape[0]
        self.N, self.D = self.Y.shape
        self.N, self.Q = self.X.shape
-        #here's some simple normalisation
+        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
        if normalize_X:
            self._Xmean = X.mean(0)[None,:]
            self._Xstd = X.std(0)[None,:]
            self.X = (X.copy() - self._Xmean) / self._Xstd
            if hasattr(self,'Z'):
                self.Z = (self.Z - self._Xmean) / self._Xstd
        else:
            self._Xmean = np.zeros((1,self.X.shape[1]))
            self._Xstd = np.ones((1,self.X.shape[1]))
        if normalize_Y:
            self._Ymean = Y.mean(0)[None,:]
            self._Ystd = Y.std(0)[None,:]
            self.Y = (Y.copy()- self._Ymean) / self._Ystd
        else:
            self._Ymean = np.zeros((1,self.Y.shape[1]))
            self._Ystd = np.ones((1,self.Y.shape[1]))
        if self.D > self.N:
            # then it's more efficient to store YYT
            self.YYT = np.dot(self.Y, self.Y.T)
        else:
            self.YYT = None
        model.__init__(self)
    def _set_params(self,p):
        self.kern._set_params_transformed(p)
        self.K = self.kern.K(self.X,slices1=self.Xslices)
        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
    def _get_params(self):
        return self.kern._get_params_transformed()
    def _get_param_names(self):
        return self.kern._get_param_names_transformed()
    def _model_fit_term(self):
        """
        Computes the model fit using YYT if it's available
        """
        if self.YYT is None:
            return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
        else:
            return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
    def log_likelihood(self):
        complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
        return complexity_term + self._model_fit_term()
    def dL_dK(self):
        if self.YYT is None:
            alpha = np.dot(self.Ki,self.Y)
            dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
        else:
            dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
        return dL_dK
    def _log_likelihood_gradients(self):
        return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
    def predict(self,Xnew, slices=None, full_cov=False):
        """
        Predict the function(s) at the new point(s) Xnew.
        Arguments
        ---------
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.Q
        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
        :type slices: (None, list of slice objects, list of ints)
        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
        :type full_cov: bool
        :rtype: posterior mean,  a Numpy array, Nnew x self.D
        :rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
             - If None, the new points covary throigh every kernel part (default)
             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
             - If a list of booleans, specifying which kernel parts are active
           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
           This is to allow for different normalisations of the output dimensions.
        """
        #normalise X values
        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
        mu, var = self._raw_predict(Xnew, slices, full_cov)
        #un-normalise
        mu = mu*self._Ystd + self._Ymean
        if full_cov:
            if self.D==1:
                var *= np.square(self._Ystd)
            else:
                var = var[:,:,None] * np.square(self._Ystd)
        else:
            if self.D==1:
                var *= np.square(np.squeeze(self._Ystd))
            else:
                var = var[:,None] * np.square(self._Ystd)
        return mu,var
    def _raw_predict(self,_Xnew,slices, full_cov=False):
        """Internal helper function for making predictions, does not account for normalisation"""
        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
        mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
        KiKx = np.dot(self.Ki,Kx)
        if full_cov:
            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
            var = Kxx - np.dot(KiKx.T,Kx)
        else:
            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
        return mu, var
    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
        """
        :param samples: the number of a posteriori samples to plot
        :param which_data: which if the training data to plot (default all)
        :type which_data: 'all' or a slice object to slice self.X, self.Y
        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
        :param which_functions: which of the kernel functions to plot (additively)
        :type which_functions: list of bools
        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
        Plot the posterior of the GP.
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
          - In two dimsensions, a contour-plot shows the mean predicted function
          - In higher dimensions, we've no implemented this yet !TODO!
        Can plot only part of the data and part of the posterior functions using which_data and which_functions
        """
        if which_functions=='all':
            which_functions = [True]*self.kern.Nparts
        if which_data=='all':
            which_data = slice(None)
        X = self.X[which_data,:]
        Y = self.Y[which_data,:]
        Xorig = X*self._Xstd + self._Xmean
        Yorig = Y*self._Ystd + self._Ymean
        if plot_limits is None:
            xmin,xmax = Xorig.min(0),Xorig.max(0)
            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
        elif len(plot_limits)==2:
            xmin, xmax = plot_limits
        else:
            raise ValueError, "Bad limits for plotting"
        if self.X.shape[1]==1:
            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
            m,v = self.predict(Xnew,slices=which_functions)
            gpplot(Xnew,m,v)
            if samples:
                s = np.random.multivariate_normal(m.flatten(),v,samples)
                pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
            pb.plot(Xorig,Yorig,'kx',mew=1.5)
            pb.xlim(xmin,xmax)
        elif self.X.shape[1]==2:
            resolution = 50 or resolution
            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
            Xtest = np.vstack((xx.flatten(),yy.flatten())).T
            zz,vv = self.predict(Xtest,slices=which_functions)
            zz = zz.reshape(resolution,resolution)
            pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
            pb.xlim(xmin[0],xmax[0])
            pb.ylim(xmin[1],xmax[1])
        else:
            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -2,12 +2,12 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 from GP import GP
 from GP_regression import GP_regression
 from sparse_GP import sparse_GP
 from sparse_GP_regression import sparse_GP_regression
 from GPLVM import GPLVM
 from warped_GP import warpedGP
 from GP_EP import GP_EP
 from generalized_FITC import generalized_FITC
 from sparse_GPLVM import sparse_GPLVM
-from uncollapsed_sparse_GP import uncollapsed_sparse_GP
+#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from BGPLVM import Bayesian_GPLVM
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@ -1,241 +0,0 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import pylab as pb
 from scipy import stats, linalg
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot
 from ..inference.Expectation_Propagation import FITC
 from ..inference.likelihoods import likelihood,probit
 class generalized_FITC(model):
    def __init__(self,X,likelihood,kernel=None,inducing=10,epsilon_ep=1e-3,powerep=[1.,1.]):
        """
        Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
        :param X: input observations
        :param likelihood: Output's likelihood (likelihood class)
        :param kernel: a GPy kernel
        :param inducing:  Either an array specifying the inducing points location or a scalar defining their number.
        :param epsilon_ep: EP convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
        :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
        """
        assert isinstance(kernel,kern.kern)
        self.likelihood = likelihood
        self.Y = self.likelihood.Y
        self.kernel = kernel
        self.X = X
        self.N, self.D = self.X.shape
        assert self.Y.shape[0] == self.N
        if type(inducing) == int:
            self.M = inducing
            self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
        elif type(inducing) == np.ndarray:
            self.Z = inducing
            self.M = self.Z.shape[0]
        self.eta,self.delta = powerep
        self.epsilon_ep = epsilon_ep
        self.jitter = 1e-12
        model.__init__(self)
    def _set_params(self,p):
        self.kernel._set_params_transformed(p[0:-self.Z.size])
        self.Z = p[-self.Z.size:].reshape(self.M,self.D)
    def _get_params(self):
        return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
    def _get_param_names(self):
        return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
    def approximate_likelihood(self):
        self.Kmm = self.kernel.K(self.Z)
        self.Knm = self.kernel.K(self.X,self.Z)
        self.Knn_diag = self.kernel.Kdiag(self.X)
        self.ep_approx = FITC(self.Kmm,self.likelihood,self.Knm.T,self.Knn_diag,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
        self.ep_approx.fit_EP()
    def posterior_param(self):
        self.Knn_diag = self.kernel.Kdiag(self.X)
        self.Kmm = self.kernel.K(self.Z)
        self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
        self.Knm = self.kernel.K(self.X,self.Z)
        self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
        self.Qnn = np.dot(self.Knm,self.KmmiKmn)
        self.Diag0 =  self.Knn_diag - np.diag(self.Qnn)
        self.R0 = np.linalg.cholesky(self.Kmmi).T
        self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
        self.KmnTaut = self.Knm.T*self.Taut[None,:]
        self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
        self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
        self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
        self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
        self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
        self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
        self.P = (self.Diag / self.Diag0)[:,None] * self.Knm
        self.RPT0 = np.dot(self.R0,self.Knm.T)
        self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
        self.R,info = linalg.flapack.dtrtrs(self.L,self.R0,lower=1)
        self.RPT = np.dot(self.R,self.P.T)
        self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
        self.w = self.Diag * self.ep_approx.v_tilde
        self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.ep_approx.v_tilde))
        self.mu = self.w + np.dot(self.P,self.gamma)
        self.mu_tilde = (self.ep_approx.v_tilde/self.ep_approx.tau_tilde)[:,None]
    def log_likelihood(self):
        self.posterior_param()
        self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
        A = -self.hld
        B = -.5*np.sum(self.Qi*self.YYT)
        C = sum(np.log(self.ep_approx.Z_hat))
        D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
        E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
        return  A + B + C + D + E
    def _log_likelihood_gradients(self):
        dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
        dKnn_dtheta = self.kernel.dK_dtheta(self.X)
        dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
        dKmm_dZ = -self.kernel.dK_dX(self.Z)
        dKnm_dZ = -self.kernel.dK_dX(self.X,self.Z)
        tmp = [np.dot(dKmn_dtheta_i,self.KmmiKmn) for dKmn_dtheta_i in dKmn_dtheta.T]
        dQnn_dtheta = [tmp_i + tmp_i.T - np.dot(np.dot(self.KmmiKmn.T,dKmm_dtheta_i),self.KmmiKmn) for tmp_i,dKmm_dtheta_i in zip(tmp,dKmm_dtheta.T)]
        dDiag0_dtheta = [np.diag(dKnn_dtheta_i) - np.diag(dQnn_dtheta_i) for dKnn_dtheta_i,dQnn_dtheta_i in zip(dKnn_dtheta.T,dQnn_dtheta)]
        dQ_dtheta = [np.diag(dDiag0_dtheta_i) + dQnn_dtheta_i for dDiag0_dtheta_i,dQnn_dtheta_i in zip(dDiag0_dtheta,dQnn_dtheta)]
        dW_dtheta = [dKmm_dtheta_i + 2*np.dot(self.KmnTaut,dKmn_dtheta_i) - np.dot(self.KmnTaut*dDiag0_dtheta_i,self.KmnTaut.T) for dKmm_dtheta_i,dDiag0_dtheta_i,dKmn_dtheta_i in zip(dKmm_dtheta.T,dDiag0_dtheta,dKmn_dtheta.T)]
        QiY = np.dot(self.Qi, self.mu_tilde)
        QiYYQi = np.outer(QiY,QiY)
        WiKmnTaut = np.dot(self.Woodbury_inv,self.KmnTaut)
        K_Y = np.dot(self.KmmiKmn,QiY)
        # gradient - theta
        Atheta = [-0.5*np.dot(self.Taut,dDiag0_dtheta_i) + 0.5*np.sum(self.Kmmi*dKmm_dtheta_i) - 0.5*np.sum(self.Woodbury_inv*dW_dtheta_i) for dDiag0_dtheta_i,dKmm_dtheta_i,dW_dtheta_i in zip(dDiag0_dtheta,dKmm_dtheta.T,dW_dtheta)]
        Btheta = np.array([0.5*np.sum(QiYYQi*dQ_dtheta_i) for dQ_dtheta_i in dQ_dtheta])
        dL_dtheta = Atheta + Btheta
        # gradient - Z
        # Az
        dQnn_dZ_diag_a2 = (np.array([d[:,:,None]*self.KmmiKmn[:,:,None] for d in dKnm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
        dQnn_dZ_diag_b2 = (np.array([(self.KmmiKmn*np.sum(d[:,:,None]*self.KmmiKmn,-2))[:,:,None] for d in dKmm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
        dQnn_dZ_diag = dQnn_dZ_diag_a2 - dQnn_dZ_diag_b2
        d_hld_Diag1_dZ = -np.sum(np.dot(self.KmmiKmn*self.Taut,self.KmmiKmn.T)[:,:,None]*dKmm_dZ,-2) + np.sum((self.KmmiKmn*self.Taut)[:,:,None]*dKnm_dZ,-2)
        d_hld_Kmm_dZ = np.sum(self.Kmmi[:,:,None]*dKmm_dZ,-2)
        d_hld_W_dZ1 = np.sum(WiKmnTaut[:,:,None]*dKnm_dZ,-2)
        d_hld_W_dZ3 = np.sum(self.Woodbury_inv[:,:,None]*dKmm_dZ,-2)
        d_hld_W_dZ2 = np.array([np.sum(np.sum(WiKmnTaut.T*d[:,:,None]*self.KmnTaut.T,-2),-1) for d in dQnn_dZ_diag.transpose(2,0,1)]).T
        Az = d_hld_Diag1_dZ + d_hld_Kmm_dZ - d_hld_W_dZ1 - d_hld_W_dZ2 - d_hld_W_dZ3
        # Bz
        Bz2 = np.sum(np.dot(K_Y,QiY.T)[:,:,None]*dKnm_dZ,-2)
        Bz3 = - np.sum(np.dot(K_Y,K_Y.T)[:,:,None]*dKmm_dZ,-2)
        Bz1 = -np.array([np.sum((QiY**2)*d[:,:,None],-2) for d in dQnn_dZ_diag.transpose(2,0,1)]).reshape(self.D,self.M).T
        Bz = Bz1 + Bz2 + Bz3
        dL_dZ = (Az + Bz).flatten()
        return np.hstack([dL_dtheta, dL_dZ])
    def predict(self,X):
        """
        Make a prediction for the vsGP model
        Arguments
        ---------
        X : Input prediction data - Nx1 numpy array (floats)
        """
        #TODO: check output dimensions
        K_x = self.kernel.K(self.Z,X)
        Kxx = self.kernel.K(X)
        #K_x = self.kernM.cross.K(X)
        # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
        # Ci = I + (RPT0)Di(RPT0).T
        # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
        #   = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
        #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
        #   = I - V.T * V
        U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
        V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
        C = np.eye(self.M) - np.dot(V.T,V)
        mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
        #self.C = C
        #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
        #self.mu_u = mu_u
        #self.U = U
        # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
        mu_H = np.dot(mu_u,self.mu)
        self.mu_H = mu_H
        Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
        # q(f_star|y) = N(f_star|mu_star,sigma2_star)
        KR0T = np.dot(K_x.T,self.R0.T)
        mu_star = np.dot(KR0T,mu_H)
        sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
        vdiag = np.diag(sigma2_star)
        # q(y_star|y) = non-gaussian posterior probability of class membership
        p = self.likelihood.predictive_mean(mu_star,vdiag)
        return mu_star,vdiag,p
    def plot(self):
        """
        Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
        """
        if self.X.shape[1]==1:
            pb.figure()
            xmin,xmax = np.r_[self.X,self.Z].min(),np.r_[self.X,self.Z].max()
            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
            Xnew = np.linspace(xmin,xmax,100)[:,None]
            mu_f, var_f, mu_phi = self.predict(Xnew)
            self.mu_inducing,self.var_diag_inducing,self.phi_inducing = self.predict(self.Z)
            pb.subplot(211)
            self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.Z,Mean_u=self.mu_inducing,Var_u=self.var_diag_inducing)
            pb.subplot(212)
            self.likelihood.plot1Db(self.X,Xnew,mu_phi,self.Z)
        elif self.X.shape[1]==2:
            pb.figure()
            x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
            x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
            x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
            x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
            axis1 = np.linspace(x1min,x1max,50)
            axis2 = np.linspace(x2min,x2max,50)
            XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
            Xnew = np.c_[XX1.flatten(),XX2.flatten()]
            f,v,p = self.predict(Xnew)
            self.likelihood.plot2D(self.X,Xnew,p,self.Z)
        else:
            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
    def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
        """
        Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
        """
        self.epsilon_em = epsilon
        log_likelihood_change = self.epsilon_em + 1.
        self.parameters_path = [self.kernel._get_params()]
        self.approximate_likelihood()
        self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
        self.inducing_inputs_path = [self.Z]
        self.log_likelihood_path = [self.log_likelihood()]
        iteration = 0
        while log_likelihood_change > self.epsilon_em:
            print 'EM iteration', iteration
            self.optimize(max_f_eval = max_f_eval)
            log_likelihood_new = self.log_likelihood()
            log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
            if log_likelihood_change < 0:
                print 'log_likelihood decrement'
                self.kernel._set_params_transformed(self.parameters_path[-1])
                self.kernM = self.kernel.copy()
                slef.kernM.expand_X(self.iducing_inputs_path[-1])
                self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
            else:
                self.approximate_likelihood()
                self.log_likelihood_path.append(self.log_likelihood())
                self.parameters_path.append(self.kernel._get_params())
                self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
                self.inducing_inputs_path.append(self.Z)
            iteration += 1
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -0,0 +1,217 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import pylab as pb
 from ..util.linalg import mdot, jitchol, chol_inv, pdinv
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
 #Still TODO:
 # make use of slices properly (kernel can now do this)
 # enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
 class sparse_GP(GP):
    """
    Variational sparse GP model
    :param X: inputs
    :type X: np.ndarray (N x Q)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP)
    :param kernel : the kernel/covariance function. See link kernels
    :type kernel: a GPy kernel
    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
    :type X_uncertainty: np.ndarray (N x Q) | None
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (M x Q) | None
    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type M: int
    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
    :type normalize_(X|Y): bool
    """
    def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
        self.scale_factor = 1.0# a scaling factor to help keep the algorithm stable
        self.Z = Z
        self.Zslices = Zslices
        self.Xslices = Xslices
        self.M = Z.shape[0]
        self.likelihood = likelihood
        if X_uncertainty is None:
            self.has_uncertain_inputs=False
        else:
            assert X_uncertainty.shape==X.shape
            self.has_uncertain_inputs=True
            self.X_uncertainty = X_uncertainty
        GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
        #normalise X uncertainty also
        if self.has_uncertain_inputs:
            self.X_uncertainty /= np.square(self._Xstd)
    def _computations(self):
        # TODO find routine to multiply triangular matrices
        #TODO: slices for psi statistics (easy enough)
        sf = self.scale_factor
        sf2 = sf**2
        # kernel computations, using BGPLVM notation
        self.Kmm = self.kern.K(self.Z)
        if self.has_uncertain_inputs:
            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
            if self.likelihood.is_heteroscedastic:
                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
                #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
            else:
                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
        else:
            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
            self.psi1 = self.kern.K(self.Z,self.X)
            if self.likelihood.is_heteroscedastic:
                tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
            else:
                tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
            self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
        self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
        self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
        self.B = np.eye(self.M)/sf2 + self.A
        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
        self.psi1V = np.dot(self.psi1, self.V)
        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
        # Compute dL_dpsi # FIXME: this is untested for the het. case
        self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
        if self.likelihood.is_heteroscedastic:
            self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
            self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
        else:
            self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi[None,:,:] # dB
            self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C[None,:,:] # dC
            self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E[None,:,:] # dD
        # Compute dL_dKmm
        self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
        #the partial derivative vector for the likelihood
        if self.likelihood.Nparams ==0:
            #save computation here.
            self.partial_for_likelihood = None
        elif self.likelihood.is_heteroscedastic:
            raise NotImplementedError, "heteroscedatic derivates not implemented"
            #self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
            #self.partial_for_likelihood +=  0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
            #self.partial_for_likelihood +=  0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
            #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
        else:
            #likelihood is not heterscedatic
            beta = self.likelihood.precision
            dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
            dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
            self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
    def _set_params(self, p):
        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
        self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
        self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
        self._computations()
    def _get_params(self):
        return np.hstack([self.Z.flatten(),GP._get_params(self)])
    def _get_param_names(self):
        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
    def log_likelihood(self):
        """ Compute the (lower bound on the) log marginal likelihood """
        sf2 = self.scale_factor**2
        if self.likelihood.is_heteroscedastic:
            A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
        else:
            A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
        B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
        return A+B+C+D
    def _log_likelihood_gradients(self):
        return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
    def dL_dtheta(self):
        """
        Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
        """
        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
        if self.has_uncertain_inputs:
            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
        else:
            #re-cast computations in psi2 back to psi1:
            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
            dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
        return dL_dtheta
    def dL_dZ(self):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
        if self.has_uncertain_inputs:
            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
        else:
            #re-cast computations in psi2 back to psi1:
            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
            dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
        return dL_dZ
    def _raw_predict(self, Xnew, slices, full_cov=False):
        """Internal helper function for making predictions, does not account for normalisation"""
        Kx = self.kern.K(self.Z, Xnew)
        mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
        if full_cov:
            Kxx = self.kern.K(Xnew)
            var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
        else:
            Kxx = self.kern.Kdiag(Xnew)
            var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
        return mu,var[:,None]
    def plot(self, *args, **kwargs):
        """
        Plot the fitted model: just call the GP plot function and then add inducing inputs
        """
        GP.plot(self,*args,**kwargs)
        if self.Q==1:
            if self.has_uncertain_inputs:
                pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
        if self.Q==2:
            pb.plot(self.Z[:,0],self.Z[:,1],'wo')
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@ -1,205 +1,44 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-import pylab as pb
+from sparse_GP import sparse_GP
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
+from .. import likelihoods
 from ..util.plot import gpplot
 from .. import kern
 from ..inference.likelihoods import likelihood
 from GP_regression import GP_regression
-#Still TODO:
+class sparse_GP_regression(sparse_GP):
 # make use of slices properly (kernel can now do this)
 # enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
 class sparse_GP_regression(GP_regression):
    """
-    Variational sparse GP model (Regression)
+    Gaussian Process model for regression
    This is a thin wrapper around the GP class, with a set of sensible defalts
    :param X: input observations
    :param Y: observed values
    :param kernel: a GPy kernel, defaults to rbf+white
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_Y: False|True
    :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
    :rtype: model object
    .. Note:: Multiple independent outputs are allowed using columns of Y
    :param X: inputs
    :type X: np.ndarray (N x Q)
    :param Y: observed data
    :type Y: np.ndarray of observations (N x D)
    :param kernel : the kernel/covariance function. See link kernels
    :type kernel: a GPy kernel
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (M x Q) | None
    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
    :type X_uncertainty: np.ndarray (N x Q) | None
    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type M: int
    :param beta: noise precision. TODO> ignore beta if doing EP
    :type beta: float
    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
    :type normalize_(X|Y): bool
    """
-    def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
+    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
-        self.scale_factor = 1000.0
+        #kern defaults to rbf
-        self.beta = beta
+        if kernel is None:
            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
        #Z defaults to a subset of the data
        if Z is None:
-            self.Z = np.random.permutation(X.copy())[:M]
+            Z = np.random.permutation(X.copy())[:M]
            self.M = M
        else:
            assert Z.shape[1]==X.shape[1]
            self.Z = Z
            self.M = Z.shape[0]
        if X_uncertainty is None:
            self.has_uncertain_inputs=False
        else:
            assert X_uncertainty.shape==X.shape
            self.has_uncertain_inputs=True
            self.X_uncertainty = X_uncertainty
-        GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
+        #likelihood defaults to Gaussian
-        self.trYYT = np.sum(np.square(self.Y))
+        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
-        #normalise X uncertainty also
+        sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
        if self.has_uncertain_inputs:
            self.X_uncertainty /= np.square(self._Xstd)
    def _computations(self):
        # TODO find routine to multiply triangular matrices
        #TODO: slices for psi statistics (easy enough)
        # kernel computations, using BGPLVM notation
        self.Kmm = self.kern.K(self.Z)
        if self.has_uncertain_inputs:
            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
            self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
        else:
            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
            self.psi1 = self.kern.K(self.Z,self.X)
            #self.psi2 = np.dot(self.psi1,self.psi1.T)
            #self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
            tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
            self.psi2_beta_scaled = np.dot(tmp,tmp.T)
        sf = self.scale_factor
        sf2 = sf**2
        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
        self.V = (self.beta/self.scale_factor)*self.Y
        self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
        self.B = np.eye(self.M)/sf2 + self.A
        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
        self.psi1V = np.dot(self.psi1, self.V)
        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
        # Compute dL_dpsi
        self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
        self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
        self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
        self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
        # Compute dL_dKmm
        self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
    def _set_params(self, p):
        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
        self.beta = p[self.M*self.Q]
        self.kern._set_params(p[self.Z.size + 1:])
        self._computations()
    def _get_params(self):
        return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
    def _get_param_names(self):
        return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
    def log_likelihood(self):
        """ Compute the (lower bound on the) log marginal likelihood """
        sf2 = self.scale_factor**2
        A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
        B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
        C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
        return A+B+C+D
    def _log_likelihood_gradients(self):
        return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
    def dL_dbeta(self):
        """
        Compute the gradient of the log likelihood wrt beta.
        """
        #TODO: suport heteroscedatic noise
        sf2 = self.scale_factor**2
        dA_dbeta =   0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
        dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
        dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
        dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
        return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
    def dL_dtheta(self):
        """
        Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
        """
        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
        if self.has_uncertain_inputs:
            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
        else:
            #re-cast computations in psi2 back to psi1:
            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
            dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
        return dL_dtheta
    def dL_dZ(self):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
        if self.has_uncertain_inputs:
            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
        else:
            #re-cast computations in psi2 back to psi1:
            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
            dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
        return dL_dZ
    def _raw_predict(self, Xnew, slices, full_cov=False):
        """Internal helper function for making predictions, does not account for normalisation"""
        Kx = self.kern.K(self.Z, Xnew)
        mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
        if full_cov:
            Kxx = self.kern.K(Xnew)
            var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
        else:
            Kxx = self.kern.Kdiag(Xnew)
            var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
        return mu,var
    def plot(self, *args, **kwargs):
        """
        Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
        """
        GP_regression.plot(self,*args,**kwargs)
        if self.Q==1:
            pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
            if self.has_uncertain_inputs:
                pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
        if self.Q==2:
            pb.plot(self.Z[:,0],self.Z[:,1],'wo')
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@ -6,7 +6,7 @@ import pylab as pb
 from ..util.linalg import mdot, jitchol, chol_inv, pdinv
 from ..util.plot import gpplot
 from .. import kern
-from ..inference.likelihoods import likelihood
+from ..likelihoods import likelihood
 from sparse_GP_regression import sparse_GP_regression
 class uncollapsed_sparse_GP(sparse_GP_regression):
@ -136,8 +136,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
        #dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
        dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
-        #dL_dSim = 
+        #dL_dSim =
-        #dL_dmhSi = 
+        #dL_dmhSi =
        return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten()))  # natgrad only, grad TODO
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
        m.constrain_positive('(linear|bias|white)')
        self.assertTrue(m.checkgrad())
-    def test_GP_EP(self):
+    def test_GP_EP_probit(self):
        return # Disabled TODO
        N = 20
-        X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
+        X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
-        k = GPy.kern.rbf(1) + GPy.kern.white(1)
+        Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
-        Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
+        kernel = GPy.kern.rbf(1)
-        likelihood = GPy.inference.likelihoods.probit(Y)
+        distribution = GPy.likelihoods.likelihood_functions.probit()
-        m = GPy.models.GP_EP(X,likelihood,k)
+        likelihood = GPy.likelihoods.EP(Y, distribution)
-        m.constrain_positive('(var|len)')
+        m = GPy.models.GP(X, likelihood, kernel)
-        m.approximate_likelihood()
+        m.ensure_default_constraints()
-        self.assertTrue(m.checkgrad())
+        self.assertTrue(m.EPEM)
    @unittest.skip("FITC will be broken for a while")
    def test_generalized_FITC(self):
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@ -6,30 +6,26 @@ import Tango
 import pylab as pb
 import numpy as np
-def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
+def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
    if axes is None:
        axes = pb.gca()
    mu = mu.flatten()
    x = x.flatten()
    lower = lower.flatten()
    upper = upper.flatten()
    #here's the mean
    axes.plot(x,mu,color=edgecol,linewidth=2)
-    #ensure variance is a vector
+    #here's the box
    if len(var.shape)>1:
        err = 2*np.sqrt(np.diag(var))
    else:
        err = 2*np.sqrt(var)
    #here's the 2*std box
    kwargs['linewidth']=0.5
    if not 'alpha' in kwargs.keys():
        kwargs['alpha'] = 0.3
-    axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
+    axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
    #this is the edge:
-    axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
+    axes.plot(x,upper,color=edgecol,linewidth=0.2)
-    axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
+    axes.plot(x,lower,color=edgecol,linewidth=0.2)
 def removeRightTicks(ax=None):
    ax = ax or pb.gca()
@ -74,4 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
        else:
            removeUpperTicks()
 def x_frame1D(X,plot_limits=None,resolution=None):
    """
    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
    """
    assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
    if plot_limits is None:
        xmin,xmax = X.min(0),X.max(0)
        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
    elif len(plot_limits)==2:
        xmin, xmax = plot_limits
    else:
        raise ValueError, "Bad limits for plotting"
    Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
    return Xnew, xmin, xmax
 def x_frame2D(X,plot_limits=None,resolution=None):
    """
    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
    """
    assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs"
    if plot_limits is None:
        xmin,xmax = X.min(0),X.max(0)
        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
    elif len(plot_limits)==2:
        xmin, xmax = plot_limits
    else:
        raise ValueError, "Bad limits for plotting"
    resolution = resolution or 50
    xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
    Xnew = np.vstack((xx.flatten(),yy.flatten())).T
    return Xnew, xx, yy, xmin, xmax
--- a/doc/Figures/kern-def.png
+++ b/doc/Figures/kern-def.png
--- a/doc/Figures/tuto_GP_regression_m1.png
+++ b/doc/Figures/tuto_GP_regression_m1.png
--- a/doc/Figures/tuto_GP_regression_m2.png
+++ b/doc/Figures/tuto_GP_regression_m2.png
--- a/doc/Figures/tuto_GP_regression_m3.png
+++ b/doc/Figures/tuto_GP_regression_m3.png
--- a/doc/Figures/tuto_kern_overview_add_orth.png
+++ b/doc/Figures/tuto_kern_overview_add_orth.png
--- a/doc/Figures/tuto_kern_overview_allkern.png
+++ b/doc/Figures/tuto_kern_overview_allkern.png
--- a/doc/Figures/tuto_kern_overview_basicdef.png
+++ b/doc/Figures/tuto_kern_overview_basicdef.png
--- a/doc/Figures/tuto_kern_overview_mANOVA.png
+++ b/doc/Figures/tuto_kern_overview_mANOVA.png
--- a/doc/Figures/tuto_kern_overview_mANOVAdec.png
+++ b/doc/Figures/tuto_kern_overview_mANOVAdec.png
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@ -33,6 +33,14 @@ examples Package
    :undoc-members:
    :show-inheritance:
 :mod:`poisson` Module
 ---------------------
 .. automodule:: GPy.examples.poisson
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`regression` Module
 ------------------------
@ -57,6 +65,14 @@ examples Package
    :undoc-members:
    :show-inheritance:
 :mod:`sparse_ep_fix` Module
 ---------------------------
 .. automodule:: GPy.examples.sparse_ep_fix
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`uncertain_input_GP_regression_demo` Module
 ------------------------------------------------
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@ -1,22 +1,6 @@
 inference Package
 =================
 :mod:`Expectation_Propagation` Module
 -------------------------------------
 .. automodule:: GPy.inference.Expectation_Propagation
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`likelihoods` Module
 -------------------------
 .. automodule:: GPy.inference.likelihoods
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`optimization` Module
 --------------------------
--- a/doc/GPy.likelihoods.rst
+++ b/doc/GPy.likelihoods.rst
@ -0,0 +1,43 @@
 likelihoods Package
 ===================
 :mod:`likelihoods` Package
 --------------------------
 .. automodule:: GPy.likelihoods
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`EP` Module
 ----------------
 .. automodule:: GPy.likelihoods.EP
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`Gaussian` Module
 ----------------------
 .. automodule:: GPy.likelihoods.Gaussian
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`likelihood` Module
 ------------------------
 .. automodule:: GPy.likelihoods.likelihood
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`likelihood_functions` Module
 ----------------------------------
 .. automodule:: GPy.likelihoods.likelihood_functions
    :members:
    :undoc-members:
    :show-inheritance:
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@ -17,18 +17,18 @@ models Package
    :undoc-members:
    :show-inheritance:
-:mod:`GPLVM` Module
+:mod:`GP` Module
-------------------
+----------------
-.. automodule:: GPy.models.GPLVM
+.. automodule:: GPy.models.GP
    :members:
    :undoc-members:
    :show-inheritance:
-:mod:`GP_EP` Module
+:mod:`GPLVM` Module
 -------------------
-.. automodule:: GPy.models.GP_EP
+.. automodule:: GPy.models.GPLVM
    :members:
    :undoc-members:
    :show-inheritance:
@ -41,10 +41,10 @@ models Package
    :undoc-members:
    :show-inheritance:
-:mod:`generalized_FITC` Module
+:mod:`sparse_GP` Module
------------------------------
+-----------------------
-.. automodule:: GPy.models.generalized_FITC
+.. automodule:: GPy.models.sparse_GP
    :members:
    :undoc-members:
    :show-inheritance:
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@ -18,6 +18,7 @@ Subpackages
    GPy.examples
    GPy.inference
    GPy.kern
    GPy.likelihoods
    GPy.models
    GPy.util
--- a/doc/Makefile
+++ b/doc/Makefile
@ -41,6 +41,7 @@ help:
 clean:
 	-rm -rf $(BUILDDIR)/*
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
--- a/doc/conf.py
+++ b/doc/conf.py
@ -11,101 +11,49 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-import sys, os
+import sys
 import os
-#Mocking uninstalled modules: https://read-the-docs.readthedocs.org/en/latest/faq.html
+print "python exec:", sys.executable
-
+print "sys.path:", sys.path
-#class Mock(object):
+try:
-    #__all__ = []
+    import numpy
-    #def __init__(self, *args, **kwargs):
+    print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
-        #for key, value in kwargs.iteritems():
+except ImportError:
-            #setattr(self, key, value)
+    print "no numpy"
-
+try:
-    #def __call__(self, *args, **kwargs):
+    import matplotlib
-        #return Mock()
+    print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__)
-
+except ImportError:
-    #__add__  = __mul__  = __getitem__ = __setitem__ = \
+    print "no matplotlib"
-#__delitem__ = __sub__ =  __floordiv__ = __mod__ = __divmod__ = \
+try:
-#__pow__ = __lshift__ = __rshift__ = __and__ = __xor__ = __or__ = \
+    import ipython
-#__rmul__  = __rsub__  = __rfloordiv__ = __rmod__ = __rdivmod__ = \
+    print "ipython: %s, %s" % (ipython.__version__, ipython.__file__)
-#__rpow__ = __rlshift__ = __rrshift__ = __rand__ = __rxor__ = __ror__ = \
+except ImportError:
-#__imul__  = __isub__  = __ifloordiv__ = __imod__ = __idivmod__ = \
+    print "no ipython"
-#__ipow__ = __ilshift__ = __irshift__ = __iand__ = __ixor__ = __ior__ = \
+try:
-#__neg__ = __pos__ = __abs__ = __invert__ = __call__
+    import sphinx
-
+    print "sphinx: %s, %s" % (sphinx.__version__, sphinx.__file__)
-    #def __getattr__(self, name):
+except ImportError:
-        #if name in ('__file__', '__path__'):
+    print "no sphinx"
            #return '/dev/null'
        #if name == 'sqrt':
            #return math.sqrt
        #elif name[0] != '_' and name[0] == name[0].upper():
            #return type(name, (), {})
        #else:
            #return Mock(**vars(self))
    #def __lt__(self, *args, **kwargs):
        #return True
    #__nonzero__ = __le__ = __eq__ = __ne__ = __gt__ = __ge__ = __contains__ = \
 #__lt__
    #def __repr__(self):
        ## Use _mock_repr to fake the __repr__ call
        #res = getattr(self, "_mock_repr")
        #return res if isinstance(res, str) else "Mock"
    #def __hash__(self):
        #return 1
    #__len__ = __int__ = __long__ = __index__ = __hash__
    #def __oct__(self):
        #return '01'
    #def __hex__(self):
        #return '0x1'
    #def __float__(self):
        #return 0.1
    #def __complex__(self):
        #return 1j
 #MOCK_MODULES = [
    #'pylab', 'scipy', 'matplotlib', 'matplotlib.pyplot', 'pyfits',
    #'scipy.constants.constants', 'matplotlib.cm',
    #'matplotlib.image', 'matplotlib.colors', 'sunpy.cm',
    #'pandas', 'pandas.io', 'pandas.io.parsers',
    #'suds', 'matplotlib.ticker', 'matplotlib.colorbar',
    #'matplotlib.dates', 'scipy.optimize', 'scipy.ndimage',
    #'matplotlib.figure', 'scipy.ndimage.interpolation', 'bs4']
 #for mod_name in MOCK_MODULES:
    #sys.modules[mod_name] = Mock()
 #sys.modules['numpy'] = Mock(pi=math.pi, G=6.67364e-11,
                            #ndarray=type('ndarray', (), {}),
                            #dtype=lambda _: Mock(_mock_repr='np.dtype(\'float32\')'))
 #sys.modules['scipy.constants'] = Mock(pi=math.pi, G=6.67364e-11)
 print "sys.path:", sys.path
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('..'))
+#sys.path.insert(0, os.path.abspath('../GPy'))
 #print "sys.path.after:", sys.path
 print "Adding path"
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-#sys.path.append(os.path.abspath('./sphinxext'))
+sys.path.append(os.path.abspath('sphinxext'))
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, os.path.abspath('.'))
 #sys.path.insert(0, os.path.abspath('./sphinxext'))
 # -- General configuration -----------------------------------------------------
@ -116,23 +64,24 @@ print "Adding path"
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 print "Importing extensions"
-              
+
-extensions = [#'ipython_directive',
+extensions = ['sphinx.ext.autodoc',
-              'sphinx.ext.autodoc', 'sphinx.ext.viewcode'
+              #'sphinx.ext.doctest'
-              #'matplotlib.sphinxext.mathmpl',
+              'sphinx.ext.viewcode',
-              #'matplotlib.sphinxext.only_directives',
+              'sphinx.ext.pngmath',
-              #'matplotlib.sphinxext.plot_directive',
+              'ipython_directive',
-              #'ipython_directive'
+              'ipython_console_highlighting'
-            ]
+              #'matplotlib.sphinxext.plot_directive'
-              #'sphinx.ext.doctest',
+             ]
-              #'ipython_console_highlighting',
+plot_formats = [('png', 80), ('pdf', 50)]
-              #'inheritance_diagram',
+
              #'numpydoc'] 
 print "finished importing"
 ##############################################################################
 ##
 ## Mock out imports with C dependencies because ReadTheDocs can't build them.
 #############################################################################
 class Mock(object):
    def __init__(self, *args, **kwargs):
        pass
@ -151,23 +100,38 @@ class Mock(object):
        else:
            return Mock()
 #sys.path.append("../GPy")
 #import mock
 print "Mocking"
-MOCK_MODULES = ['pylab', 'matplotlib', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser']#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
+MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
 #'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
 for mod_name in MOCK_MODULES:
    sys.modules[mod_name] = Mock()
 # ----------------------- READTHEDOCS ------------------
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 on_rtd = True
 if on_rtd:
-  sys.path.append("../GPy")
+    sys.path.append(os.path.abspath('../GPy'))
-  os.system("pwd")
+
-  os.system("sphinx-apidoc -f -o . ../GPy")
+    import subprocess
-  #os.system("cd ..")
+
-  #os.system("cd ./docs")
+    proc = subprocess.Popen("pwd", stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    print "program output:", out
    proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    print "program output:", out
    proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    print "program output:", out
    #proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True)
    #(out, err) = proc.communicate()
    #print "program output:", out
    #proc = subprocess.Popen("whereis matplotlib", stdout=subprocess.PIPE, shell=True)
    #(out, err) = proc.communicate()
    #print "program output:", out
 print "Compiled files"
@ -317,21 +281,21 @@ htmlhelp_basename = 'GPydoc'
 # -- Options for LaTeX output --------------------------------------------------
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
+    # The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
+    #'papersize': 'letterpaper',
-# The font size ('10pt', '11pt' or '12pt').
+    # The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
+    #'pointsize': '10pt',
-# Additional stuff for the LaTeX preamble.
+    # Additional stuff for the LaTeX preamble.
-#'preamble': '',
+    #'preamble': '',
 }
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'GPy.tex', u'GPy Documentation',
+    ('index', 'GPy.tex', u'GPy Documentation',
-   u'Author', 'manual'),
+    u'Author', 'manual'),
 ]
 # The name of an image file (relative to this directory) to place at the top of
@ -374,9 +338,9 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'GPy', u'GPy Documentation',
+    ('index', 'GPy', u'GPy Documentation',
-   u'Author', 'GPy', 'One line description of project.',
+    u'Author', 'GPy', 'One line description of project.',
-   'Miscellaneous'),
+    'Miscellaneous'),
 ]
 # Documents to append as an appendix to all manuals.
@ -431,17 +395,4 @@ epub_copyright = u'2013, Author'
 # Allow duplicate toc entries.
 #epub_tocdup = True
-#############################################################################
+autodoc_member_order = "source"
 #
 # Include constructors in all the docs
 # Got this method from:
 # http://stackoverflow.com/questions/5599254/how-to-use-sphinxs-autodoc-to-document-a-classs-init-self-method
 #def skip(app, what, name, obj, skip, options):
    #if name == "__init__":
        #return False
    #return skip
 #def setup(app):
    #app.connect("autodoc-skip-member", skip)
--- a/doc/doc-requirements.txt
+++ b/doc/doc-requirements.txt
@ -0,0 +1,3 @@
 ipython
 numpy
 scipy
--- a/doc/index.rst
+++ b/doc/index.rst
@ -8,8 +8,8 @@ Welcome to GPy's documentation!
 For a quick start, you can have a look at one of the tutorials:
 * `Basic Gaussian process regression <tuto_GP_regression.html>`_  
 * `A kernel overview <tuto_kernel_overview.html>`_ 
 * Advanced GP regression (Forthcoming)
 * Kernel manipulation (Forthcoming)
 * Writting kernels (Forthcoming)
 You may also be interested by some examples in the GPy/examples folder.
@ -28,4 +28,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
--- a/doc/mock.py
+++ b/doc/mock.py
--- a/doc/sphinxext/init.py
+++ b/doc/sphinxext/init.py
@ -1,2 +0,0 @@
 from __future__ import print_function
--- a/doc/sphinxext/apigen.py
+++ b/doc/sphinxext/apigen.py
@ -1,427 +0,0 @@
 """Attempt to generate templates for module reference with Sphinx
 XXX - we exclude extension modules
 To include extension modules, first identify them as valid in the
 ``_uri2path`` method, then handle them in the ``_parse_module`` script.
 We get functions and classes by parsing the text of .py files.
 Alternatively we could import the modules for discovery, and we'd have
 to do that for extension modules.  This would involve changing the
 ``_parse_module`` method to work via import and introspection, and
 might involve changing ``discover_modules`` (which determines which
 files are modules, and therefore which module URIs will be passed to
 ``_parse_module``).
 NOTE: this is a modified version of a script originally shipped with the
 PyMVPA project, which we've adapted for NIPY use.  PyMVPA is an MIT-licensed
 project."""
 # Stdlib imports
 import os
 import re
 # Functions and classes
 class ApiDocWriter(object):
    ''' Class for automatic detection and parsing of API docs
    to Sphinx-parsable reST format'''
    # only separating first two levels
    rst_section_levels = ['*', '=', '-', '~', '^']
    def __init__(self,
                 package_name,
                 rst_extension='.rst',
                 package_skip_patterns=None,
                 module_skip_patterns=None,
                 ):
        ''' Initialize package for parsing
        Parameters
        ----------
        package_name : string
            Name of the top-level package.  *package_name* must be the
            name of an importable package
        rst_extension : string, optional
            Extension for reST files, default '.rst'
        package_skip_patterns : None or sequence of {strings, regexps}
            Sequence of strings giving URIs of packages to be excluded
            Operates on the package path, starting at (including) the
            first dot in the package path, after *package_name* - so,
            if *package_name* is ``sphinx``, then ``sphinx.util`` will
            result in ``.util`` being passed for earching by these
            regexps.  If is None, gives default. Default is:
            ['\.tests$']
        module_skip_patterns : None or sequence
            Sequence of strings giving URIs of modules to be excluded
            Operates on the module name including preceding URI path,
            back to the first dot after *package_name*.  For example
            ``sphinx.util.console`` results in the string to search of
            ``.util.console``
            If is None, gives default. Default is:
            ['\.setup$', '\._']
        '''
        if package_skip_patterns is None:
            package_skip_patterns = ['\\.tests$']
        if module_skip_patterns is None:
            module_skip_patterns = ['\\.setup$', '\\._']
        self.package_name = package_name
        self.rst_extension = rst_extension
        self.package_skip_patterns = package_skip_patterns
        self.module_skip_patterns = module_skip_patterns
    def get_package_name(self):
        return self._package_name
    def set_package_name(self, package_name):
        ''' Set package_name
        >>> docwriter = ApiDocWriter('sphinx')
        >>> import sphinx
        >>> docwriter.root_path == sphinx.__path__[0]
        True
        >>> docwriter.package_name = 'docutils'
        >>> import docutils
        >>> docwriter.root_path == docutils.__path__[0]
        True
        '''
        # It's also possible to imagine caching the module parsing here
        self._package_name = package_name
        self.root_module = __import__(package_name)
        self.root_path = self.root_module.__path__[0]
        self.written_modules = None
    package_name = property(get_package_name, set_package_name, None,
                            'get/set package_name')
    def _get_object_name(self, line):
        ''' Get second token in line
        >>> docwriter = ApiDocWriter('sphinx')
        >>> docwriter._get_object_name("  def func():  ")
        'func'
        >>> docwriter._get_object_name("  class Klass(object):  ")
        'Klass'
        >>> docwriter._get_object_name("  class Klass:  ")
        'Klass'
        '''
        name = line.split()[1].split('(')[0].strip()
        # in case we have classes which are not derived from object
        # ie. old style classes
        return name.rstrip(':')
    def _uri2path(self, uri):
        ''' Convert uri to absolute filepath
        Parameters
        ----------
        uri : string
            URI of python module to return path for
        Returns
        -------
        path : None or string
            Returns None if there is no valid path for this URI
            Otherwise returns absolute file system path for URI
        Examples
        --------
        >>> docwriter = ApiDocWriter('sphinx')
        >>> import sphinx
        >>> modpath = sphinx.__path__[0]
        >>> res = docwriter._uri2path('sphinx.builder')
        >>> res == os.path.join(modpath, 'builder.py')
        True
        >>> res = docwriter._uri2path('sphinx')
        >>> res == os.path.join(modpath, '__init__.py')
        True
        >>> docwriter._uri2path('sphinx.does_not_exist')
        '''
        if uri == self.package_name:
            return os.path.join(self.root_path, '__init__.py')
        path = uri.replace('.', os.path.sep)
        path = path.replace(self.package_name + os.path.sep, '')
        path = os.path.join(self.root_path, path)
        # XXX maybe check for extensions as well?
        if os.path.exists(path + '.py'): # file
            path += '.py'
        elif os.path.exists(os.path.join(path, '__init__.py')):
            path = os.path.join(path, '__init__.py')
        else:
            return None
        return path
    def _path2uri(self, dirpath):
        ''' Convert directory path to uri '''
        relpath = dirpath.replace(self.root_path, self.package_name)
        if relpath.startswith(os.path.sep):
            relpath = relpath[1:]
        return relpath.replace(os.path.sep, '.')
    def _parse_module(self, uri):
        ''' Parse module defined in *uri* '''
        filename = self._uri2path(uri)
        if filename is None:
            # nothing that we could handle here.
            return ([],[])
        f = open(filename, 'rt')
        functions, classes = self._parse_lines(f)
        f.close()
        return functions, classes
    def _parse_lines(self, linesource):
        ''' Parse lines of text for functions and classes '''
        functions = []
        classes = []
        for line in linesource:
            if line.startswith('def ') and line.count('('):
                # exclude private stuff
                name = self._get_object_name(line)
                if not name.startswith('_'):
                    functions.append(name)
            elif line.startswith('class '):
                # exclude private stuff
                name = self._get_object_name(line)
                if not name.startswith('_'):
                    classes.append(name)
            else:
                pass
        functions.sort()
        classes.sort()
        return functions, classes
    def generate_api_doc(self, uri):
        '''Make autodoc documentation template string for a module
        Parameters
        ----------
        uri : string
            python location of module - e.g 'sphinx.builder'
        Returns
        -------
        S : string
            Contents of API doc
        '''
        # get the names of all classes and functions
        functions, classes = self._parse_module(uri)
        if not len(functions) and not len(classes):
            print 'WARNING: Empty -',uri  # dbg
            return ''
        # Make a shorter version of the uri that omits the package name for
        # titles 
        uri_short = re.sub(r'^%s\.' % self.package_name,'',uri)
        ad = '.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n'
        chap_title = uri_short
        ad += (chap_title+'\n'+ self.rst_section_levels[1] * len(chap_title)
               + '\n\n')
        # Set the chapter title to read 'module' for all modules except for the
        # main packages
        if '.' in uri:
            title = 'Module: :mod:`' + uri_short + '`'
        else:
            title = ':mod:`' + uri_short + '`'
        ad += title + '\n' + self.rst_section_levels[2] * len(title)
        if len(classes):
            ad += '\nInheritance diagram for ``%s``:\n\n' % uri
            ad += '.. inheritance-diagram:: %s \n' % uri
            ad += '   :parts: 3\n'
        ad += '\n.. automodule:: ' + uri + '\n'
        ad += '\n.. currentmodule:: ' + uri + '\n'
        multi_class = len(classes) > 1
        multi_fx = len(functions) > 1
        if multi_class:
            ad += '\n' + 'Classes' + '\n' + \
                  self.rst_section_levels[2] * 7 + '\n'
        elif len(classes) and multi_fx:
            ad += '\n' + 'Class' + '\n' + \
                  self.rst_section_levels[2] * 5 + '\n'
        for c in classes:
            ad += '\n:class:`' + c + '`\n' \
                  + self.rst_section_levels[multi_class + 2 ] * \
                  (len(c)+9) + '\n\n'
            ad += '\n.. autoclass:: ' + c + '\n'
            # must NOT exclude from index to keep cross-refs working
            ad += '  :members:\n' \
                  '  :undoc-members:\n' \
                  '  :show-inheritance:\n' \
                  '  :inherited-members:\n' \
                  '\n' \
                  '  .. automethod:: __init__\n'
        if multi_fx:
            ad += '\n' + 'Functions' + '\n' + \
                  self.rst_section_levels[2] * 9 + '\n\n'
        elif len(functions) and multi_class:
            ad += '\n' + 'Function' + '\n' + \
                  self.rst_section_levels[2] * 8 + '\n\n'
        for f in functions:
            # must NOT exclude from index to keep cross-refs working
            ad += '\n.. autofunction:: ' + uri + '.' + f + '\n\n'
        return ad
    def _survives_exclude(self, matchstr, match_type):
        ''' Returns True if *matchstr* does not match patterns
        ``self.package_name`` removed from front of string if present
        Examples
        --------
        >>> dw = ApiDocWriter('sphinx')
        >>> dw._survives_exclude('sphinx.okpkg', 'package')
        True
        >>> dw.package_skip_patterns.append('^\\.badpkg$')
        >>> dw._survives_exclude('sphinx.badpkg', 'package')
        False
        >>> dw._survives_exclude('sphinx.badpkg', 'module')
        True
        >>> dw._survives_exclude('sphinx.badmod', 'module')
        True
        >>> dw.module_skip_patterns.append('^\\.badmod$')
        >>> dw._survives_exclude('sphinx.badmod', 'module')
        False
        '''
        if match_type == 'module':
            patterns = self.module_skip_patterns
        elif match_type == 'package':
            patterns = self.package_skip_patterns
        else:
            raise ValueError('Cannot interpret match type "%s"' 
                             % match_type)
        # Match to URI without package name
        L = len(self.package_name)
        if matchstr[:L] == self.package_name:
            matchstr = matchstr[L:]
        for pat in patterns:
            try:
                pat.search
            except AttributeError:
                pat = re.compile(pat)
            if pat.search(matchstr):
                return False
        return True
    def discover_modules(self):
        ''' Return module sequence discovered from ``self.package_name`` 
        Parameters
        ----------
        None
        Returns
        -------
        mods : sequence
            Sequence of module names within ``self.package_name``
        Examples
        --------
        >>> dw = ApiDocWriter('sphinx')
        >>> mods = dw.discover_modules()
        >>> 'sphinx.util' in mods
        True
        >>> dw.package_skip_patterns.append('\.util$')
        >>> 'sphinx.util' in dw.discover_modules()
        False
        >>> 
        '''
        modules = [self.package_name]
        # raw directory parsing
        for dirpath, dirnames, filenames in os.walk(self.root_path):
            # Check directory names for packages
            root_uri = self._path2uri(os.path.join(self.root_path,
                                                   dirpath))
            for dirname in dirnames[:]: # copy list - we modify inplace
                package_uri = '.'.join((root_uri, dirname))
                if (self._uri2path(package_uri) and
                    self._survives_exclude(package_uri, 'package')):
                    modules.append(package_uri)
                else:
                    dirnames.remove(dirname)
            # Check filenames for modules
            for filename in filenames:
                module_name = filename[:-3]
                module_uri = '.'.join((root_uri, module_name))
                if (self._uri2path(module_uri) and
                    self._survives_exclude(module_uri, 'module')):
                    modules.append(module_uri)
        return sorted(modules)
    def write_modules_api(self, modules,outdir):
        # write the list
        written_modules = []
        for m in modules:
            api_str = self.generate_api_doc(m)
            if not api_str:
                continue
            # write out to file
            outfile = os.path.join(outdir,
                                   m + self.rst_extension)
            fileobj = open(outfile, 'wt')
            fileobj.write(api_str)
            fileobj.close()
            written_modules.append(m)
        self.written_modules = written_modules
    def write_api_docs(self, outdir):
        """Generate API reST files.
        Parameters
        ----------
        outdir : string
            Directory name in which to store files
            We create automatic filenames for each module
        Returns
        -------
        None
        Notes
        -----
        Sets self.written_modules to list of written modules
        """
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        # compose list of modules
        modules = self.discover_modules()
        self.write_modules_api(modules,outdir)
    def write_index(self, outdir, froot='gen', relative_to=None):
        """Make a reST API index file from written files
        Parameters
        ----------
        path : string
            Filename to write index to
        outdir : string
            Directory to which to write generated index file
        froot : string, optional
            root (filename without extension) of filename to write to
            Defaults to 'gen'.  We add ``self.rst_extension``.
        relative_to : string
            path to which written filenames are relative.  This
            component of the written file path will be removed from
            outdir, in the generated index.  Default is None, meaning,
            leave path as it is.
        """
        if self.written_modules is None:
            raise ValueError('No modules written')
        # Get full filename path
        path = os.path.join(outdir, froot+self.rst_extension)
        # Path written into index is relative to rootpath
        if relative_to is not None:
            relpath = outdir.replace(relative_to + os.path.sep, '')
        else:
            relpath = outdir
        idx = open(path,'wt')
        w = idx.write
        w('.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n')
        w('.. toctree::\n\n')
        for f in self.written_modules:
            w('   %s\n' % os.path.join(relpath,f))
        idx.close()
--- a/doc/sphinxext/docscrape.py
+++ b/doc/sphinxext/docscrape.py
@ -1,497 +0,0 @@
 """Extract reference documentation from the NumPy source tree.
 """
 import inspect
 import textwrap
 import re
 import pydoc
 from StringIO import StringIO
 from warnings import warn
 4
 class Reader(object):
    """A line-based string reader.
    """
    def __init__(self, data):
        """
        Parameters
        ----------
        data : str
           String with lines separated by '\n'.
        """
        if isinstance(data,list):
            self._str = data
        else:
            self._str = data.split('\n') # store string as list of lines
        self.reset()
    def __getitem__(self, n):
        return self._str[n]
    def reset(self):
        self._l = 0 # current line nr
    def read(self):
        if not self.eof():
            out = self[self._l]
            self._l += 1
            return out
        else:
            return ''
    def seek_next_non_empty_line(self):
        for l in self[self._l:]:
            if l.strip():
                break
            else:
                self._l += 1
    def eof(self):
        return self._l >= len(self._str)
    def read_to_condition(self, condition_func):
        start = self._l
        for line in self[start:]:
            if condition_func(line):
                return self[start:self._l]
            self._l += 1
            if self.eof():
                return self[start:self._l+1]
        return []
    def read_to_next_empty_line(self):
        self.seek_next_non_empty_line()
        def is_empty(line):
            return not line.strip()
        return self.read_to_condition(is_empty)
    def read_to_next_unindented_line(self):
        def is_unindented(line):
            return (line.strip() and (len(line.lstrip()) == len(line)))
        return self.read_to_condition(is_unindented)
    def peek(self,n=0):
        if self._l + n < len(self._str):
            return self[self._l + n]
        else:
            return ''
    def is_empty(self):
        return not ''.join(self._str).strip()
 class NumpyDocString(object):
    def __init__(self,docstring):
        docstring = textwrap.dedent(docstring).split('\n')
        self._doc = Reader(docstring)
        self._parsed_data = {
            'Signature': '',
            'Summary': [''],
            'Extended Summary': [],
            'Parameters': [],
            'Returns': [],
            'Raises': [],
            'Warns': [],
            'Other Parameters': [],
            'Attributes': [],
            'Methods': [],
            'See Also': [],
            'Notes': [],
            'Warnings': [],
            'References': '',
            'Examples': '',
            'index': {}
            }
        self._parse()
    def __getitem__(self,key):
        return self._parsed_data[key]
    def __setitem__(self,key,val):
        if not self._parsed_data.has_key(key):
            warn("Unknown section %s" % key)
        else:
            self._parsed_data[key] = val
    def _is_at_section(self):
        self._doc.seek_next_non_empty_line()
        if self._doc.eof():
            return False
        l1 = self._doc.peek().strip()  # e.g. Parameters
        if l1.startswith('.. index::'):
            return True
        l2 = self._doc.peek(1).strip() #    ---------- or ==========
        return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
    def _strip(self,doc):
        i = 0
        j = 0
        for i,line in enumerate(doc):
            if line.strip(): break
        for j,line in enumerate(doc[::-1]):
            if line.strip(): break
        return doc[i:len(doc)-j]
    def _read_to_next_section(self):
        section = self._doc.read_to_next_empty_line()
        while not self._is_at_section() and not self._doc.eof():
            if not self._doc.peek(-1).strip(): # previous line was empty
                section += ['']
            section += self._doc.read_to_next_empty_line()
        return section
    def _read_sections(self):
        while not self._doc.eof():
            data = self._read_to_next_section()
            name = data[0].strip()
            if name.startswith('..'): # index section
                yield name, data[1:]
            elif len(data) < 2:
                yield StopIteration
            else:
                yield name, self._strip(data[2:])
    def _parse_param_list(self,content):
        r = Reader(content)
        params = []
        while not r.eof():
            header = r.read().strip()
            if ' : ' in header:
                arg_name, arg_type = header.split(' : ')[:2]
            else:
                arg_name, arg_type = header, ''
            desc = r.read_to_next_unindented_line()
            desc = dedent_lines(desc)
            params.append((arg_name,arg_type,desc))
        return params
    _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
                           r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
    def _parse_see_also(self, content):
        """
        func_name : Descriptive text
            continued text
        another_func_name : Descriptive text
        func_name1, func_name2, :meth:`func_name`, func_name3
        """
        items = []
        def parse_item_name(text):
            """Match ':role:`name`' or 'name'"""
            m = self._name_rgx.match(text)
            if m:
                g = m.groups()
                if g[1] is None:
                    return g[3], None
                else:
                    return g[2], g[1]
            raise ValueError("%s is not a item name" % text)
        def push_item(name, rest):
            if not name:
                return
            name, role = parse_item_name(name)
            items.append((name, list(rest), role))
            del rest[:]
        current_func = None
        rest = []
        for line in content:
            if not line.strip(): continue
            m = self._name_rgx.match(line)
            if m and line[m.end():].strip().startswith(':'):
                push_item(current_func, rest)
                current_func, line = line[:m.end()], line[m.end():]
                rest = [line.split(':', 1)[1].strip()]
                if not rest[0]:
                    rest = []
            elif not line.startswith(' '):
                push_item(current_func, rest)
                current_func = None
                if ',' in line:
                    for func in line.split(','):
                        push_item(func, [])
                elif line.strip():
                    current_func = line
            elif current_func is not None:
                rest.append(line.strip())
        push_item(current_func, rest)
        return items
    def _parse_index(self, section, content):
        """
        .. index: default
           :refguide: something, else, and more
        """
        def strip_each_in(lst):
            return [s.strip() for s in lst]
        out = {}
        section = section.split('::')
        if len(section) > 1:
            out['default'] = strip_each_in(section[1].split(','))[0]
        for line in content:
            line = line.split(':')
            if len(line) > 2:
                out[line[1]] = strip_each_in(line[2].split(','))
        return out
    def _parse_summary(self):
        """Grab signature (if given) and summary"""
        if self._is_at_section():
            return
        summary = self._doc.read_to_next_empty_line()
        summary_str = " ".join([s.strip() for s in summary]).strip()
        if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
            self['Signature'] = summary_str
            if not self._is_at_section():
                self['Summary'] = self._doc.read_to_next_empty_line()
        else:
            self['Summary'] = summary
        if not self._is_at_section():
            self['Extended Summary'] = self._read_to_next_section()
    def _parse(self):
        self._doc.reset()
        self._parse_summary()
        for (section,content) in self._read_sections():
            if not section.startswith('..'):
                section = ' '.join([s.capitalize() for s in section.split(' ')])
            if section in ('Parameters', 'Attributes', 'Methods',
                           'Returns', 'Raises', 'Warns'):
                self[section] = self._parse_param_list(content)
            elif section.startswith('.. index::'):
                self['index'] = self._parse_index(section, content)
            elif section == 'See Also':
                self['See Also'] = self._parse_see_also(content)
            else:
                self[section] = content
    # string conversion routines
    def _str_header(self, name, symbol='-'):
        return [name, len(name)*symbol]
    def _str_indent(self, doc, indent=4):
        out = []
        for line in doc:
            out += [' '*indent + line]
        return out
    def _str_signature(self):
        if self['Signature']:
            return [self['Signature'].replace('*','\*')] + ['']
        else:
            return ['']
    def _str_summary(self):
        if self['Summary']:
            return self['Summary'] + ['']
        else:
            return []
    def _str_extended_summary(self):
        if self['Extended Summary']:
            return self['Extended Summary'] + ['']
        else:
            return []
    def _str_param_list(self, name):
        out = []
        if self[name]:
            out += self._str_header(name)
            for param,param_type,desc in self[name]:
                out += ['%s : %s' % (param, param_type)]
                out += self._str_indent(desc)
            out += ['']
        return out
    def _str_section(self, name):
        out = []
        if self[name]:
            out += self._str_header(name)
            out += self[name]
            out += ['']
        return out
    def _str_see_also(self, func_role):
        if not self['See Also']: return []
        out = []
        out += self._str_header("See Also")
        last_had_desc = True
        for func, desc, role in self['See Also']:
            if role:
                link = ':%s:`%s`' % (role, func)
            elif func_role:
                link = ':%s:`%s`' % (func_role, func)
            else:
                link = "`%s`_" % func
            if desc or last_had_desc:
                out += ['']
                out += [link]
            else:
                out[-1] += ", %s" % link
            if desc:
                out += self._str_indent([' '.join(desc)])
                last_had_desc = True
            else:
                last_had_desc = False
        out += ['']
        return out
    def _str_index(self):
        idx = self['index']
        out = []
        out += ['.. index:: %s' % idx.get('default','')]
        for section, references in idx.iteritems():
            if section == 'default':
                continue
            out += ['   :%s: %s' % (section, ', '.join(references))]
        return out
    def __str__(self, func_role=''):
        out = []
        out += self._str_signature()
        out += self._str_summary()
        out += self._str_extended_summary()
        for param_list in ('Parameters','Returns','Raises'):
            out += self._str_param_list(param_list)
        out += self._str_section('Warnings')
        out += self._str_see_also(func_role)
        for s in ('Notes','References','Examples'):
            out += self._str_section(s)
        out += self._str_index()
        return '\n'.join(out)
 def indent(str,indent=4):
    indent_str = ' '*indent
    if str is None:
        return indent_str
    lines = str.split('\n')
    return '\n'.join(indent_str + l for l in lines)
 def dedent_lines(lines):
    """Deindent a list of lines maximally"""
    return textwrap.dedent("\n".join(lines)).split("\n")
 def header(text, style='-'):
    return text + '\n' + style*len(text) + '\n'
 class FunctionDoc(NumpyDocString):
    def __init__(self, func, role='func', doc=None):
        self._f = func
        self._role = role # e.g. "func" or "meth"
        if doc is None:
            doc = inspect.getdoc(func) or ''
        try:
            NumpyDocString.__init__(self, doc)
        except ValueError, e:
            print '*'*78
            print "ERROR: '%s' while parsing `%s`" % (e, self._f)
            print '*'*78
            #print "Docstring follows:"
            #print doclines
            #print '='*78
        if not self['Signature']:
            func, func_name = self.get_func()
            try:
                # try to read signature
                argspec = inspect.getargspec(func)
                argspec = inspect.formatargspec(*argspec)
                argspec = argspec.replace('*','\*')
                signature = '%s%s' % (func_name, argspec)
            except TypeError, e:
                signature = '%s()' % func_name
            self['Signature'] = signature
    def get_func(self):
        func_name = getattr(self._f, '__name__', self.__class__.__name__)
        if inspect.isclass(self._f):
            func = getattr(self._f, '__call__', self._f.__init__)
        else:
            func = self._f
        return func, func_name
    def __str__(self):
        out = ''
        func, func_name = self.get_func()
        signature = self['Signature'].replace('*', '\*')
        roles = {'func': 'function',
                 'meth': 'method'}
        if self._role:
            if not roles.has_key(self._role):
                print "Warning: invalid role %s" % self._role
            out += '.. %s:: %s\n    \n\n' % (roles.get(self._role,''),
                                             func_name)
        out += super(FunctionDoc, self).__str__(func_role=self._role)
        return out
 class ClassDoc(NumpyDocString):
    def __init__(self,cls,modulename='',func_doc=FunctionDoc,doc=None):
        if not inspect.isclass(cls):
            raise ValueError("Initialise using a class. Got %r" % cls)
        self._cls = cls
        if modulename and not modulename.endswith('.'):
            modulename += '.'
        self._mod = modulename
        self._name = cls.__name__
        self._func_doc = func_doc
        if doc is None:
            doc = pydoc.getdoc(cls)
        NumpyDocString.__init__(self, doc)
    @property
    def methods(self):
        return [name for name,func in inspect.getmembers(self._cls)
                if not name.startswith('_') and callable(func)]
    def __str__(self):
        out = ''
        out += super(ClassDoc, self).__str__()
        out += "\n\n"
        #for m in self.methods:
        #    print "Parsing `%s`" % m
        #    out += str(self._func_doc(getattr(self._cls,m), 'meth')) + '\n\n'
        #    out += '.. index::\n   single: %s; %s\n\n' % (self._name, m)
        return out
--- a/doc/sphinxext/docscrape_sphinx.py
+++ b/doc/sphinxext/docscrape_sphinx.py
@ -1,136 +0,0 @@
 import re, inspect, textwrap, pydoc
 from docscrape import NumpyDocString, FunctionDoc, ClassDoc
 class SphinxDocString(NumpyDocString):
    # string conversion routines
    def _str_header(self, name, symbol='`'):
        return ['.. rubric:: ' + name, '']
    def _str_field_list(self, name):
        return [':' + name + ':']
    def _str_indent(self, doc, indent=4):
        out = []
        for line in doc:
            out += [' '*indent + line]
        return out
    def _str_signature(self):
        return ['']
        if self['Signature']:
            return ['``%s``' % self['Signature']] + ['']
        else:
            return ['']
    def _str_summary(self):
        return self['Summary'] + ['']
    def _str_extended_summary(self):
        return self['Extended Summary'] + ['']
    def _str_param_list(self, name):
        out = []
        if self[name]:
            out += self._str_field_list(name)
            out += ['']
            for param,param_type,desc in self[name]:
                out += self._str_indent(['**%s** : %s' % (param.strip(),
                                                          param_type)])
                out += ['']
                out += self._str_indent(desc,8)
                out += ['']
        return out
    def _str_section(self, name):
        out = []
        if self[name]:
            out += self._str_header(name)
            out += ['']
            content = textwrap.dedent("\n".join(self[name])).split("\n")
            out += content
            out += ['']
        return out
    def _str_see_also(self, func_role):
        out = []
        if self['See Also']:
            see_also = super(SphinxDocString, self)._str_see_also(func_role)
            out = ['.. seealso::', '']
            out += self._str_indent(see_also[2:])
        return out
    def _str_warnings(self):
        out = []
        if self['Warnings']:
            out = ['.. warning::', '']
            out += self._str_indent(self['Warnings'])
        return out
    def _str_index(self):
        idx = self['index']
        out = []
        if len(idx) == 0:
            return out
        out += ['.. index:: %s' % idx.get('default','')]
        for section, references in idx.iteritems():
            if section == 'default':
                continue
            elif section == 'refguide':
                out += ['   single: %s' % (', '.join(references))]
            else:
                out += ['   %s: %s' % (section, ','.join(references))]
        return out
    def _str_references(self):
        out = []
        if self['References']:
            out += self._str_header('References')
            if isinstance(self['References'], str):
                self['References'] = [self['References']]
            out.extend(self['References'])
            out += ['']
        return out
    def __str__(self, indent=0, func_role="obj"):
        out = []
        out += self._str_signature()
        out += self._str_index() + ['']
        out += self._str_summary()
        out += self._str_extended_summary()
        for param_list in ('Parameters', 'Attributes', 'Methods',
                           'Returns','Raises'):
            out += self._str_param_list(param_list)
        out += self._str_warnings()
        out += self._str_see_also(func_role)
        out += self._str_section('Notes')
        out += self._str_references()
        out += self._str_section('Examples')
        out = self._str_indent(out,indent)
        return '\n'.join(out)
 class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
    pass
 class SphinxClassDoc(SphinxDocString, ClassDoc):
    pass
 def get_doc_object(obj, what=None, doc=None):
    if what is None:
        if inspect.isclass(obj):
            what = 'class'
        elif inspect.ismodule(obj):
            what = 'module'
        elif callable(obj):
            what = 'function'
        else:
            what = 'object'
    if what == 'class':
        return SphinxClassDoc(obj, '', func_doc=SphinxFunctionDoc, doc=doc)
    elif what in ('function', 'method'):
        return SphinxFunctionDoc(obj, '', doc=doc)
    else:
        if doc is None:
            doc = pydoc.getdoc(obj)
        return SphinxDocString(doc)
--- a/doc/sphinxext/inheritance_diagram.py
+++ b/doc/sphinxext/inheritance_diagram.py
@ -1,407 +0,0 @@
 """
 Defines a docutils directive for inserting inheritance diagrams.
 Provide the directive with one or more classes or modules (separated
 by whitespace).  For modules, all of the classes in that module will
 be used.
 Example::
   Given the following classes:
   class A: pass
   class B(A): pass
   class C(A): pass
   class D(B, C): pass
   class E(B): pass
   .. inheritance-diagram: D E
   Produces a graph like the following:
               A
              / \
             B   C
            / \ /
           E   D
 The graph is inserted as a PNG+image map into HTML and a PDF in
 LaTeX.
 """
 import inspect
 import os
 import re
 import subprocess
 try:
    from hashlib import md5
 except ImportError:
    from md5 import md5
 from docutils.nodes import Body, Element
 from docutils.parsers.rst import directives
 from sphinx.roles import xfileref_role
 def my_import(name):
    """Module importer - taken from the python documentation.
    This function allows importing names with dots in them."""
    mod = __import__(name)
    components = name.split('.')
    for comp in components[1:]:
        mod = getattr(mod, comp)
    return mod
 class DotException(Exception):
    pass
 class InheritanceGraph(object):
    """
    Given a list of classes, determines the set of classes that
    they inherit from all the way to the root "object", and then
    is able to generate a graphviz dot graph from them.
    """
    def __init__(self, class_names, show_builtins=False):
        """
        *class_names* is a list of child classes to show bases from.
        If *show_builtins* is True, then Python builtins will be shown
        in the graph.
        """
        self.class_names = class_names
        self.classes = self._import_classes(class_names)
        self.all_classes = self._all_classes(self.classes)
        if len(self.all_classes) == 0:
            raise ValueError("No classes found for inheritance diagram")
        self.show_builtins = show_builtins
    py_sig_re = re.compile(r'''^([\w.]*\.)?    # class names
                           (\w+)  \s* $        # optionally arguments
                           ''', re.VERBOSE)
    def _import_class_or_module(self, name):
        """
        Import a class using its fully-qualified *name*.
        """
        try:
            path, base = self.py_sig_re.match(name).groups()
        except:
            raise ValueError(
                "Invalid class or module '%s' specified for inheritance diagram" % name)
        fullname = (path or '') + base
        path = (path and path.rstrip('.'))
        if not path:
            path = base
        try:
            module = __import__(path, None, None, [])
            # We must do an import of the fully qualified name.  Otherwise if a
            # subpackage 'a.b' is requested where 'import a' does NOT provide
            # 'a.b' automatically, then 'a.b' will not be found below.  This
            # second call will force the equivalent of 'import a.b' to happen
            # after the top-level import above.
            my_import(fullname)
        except ImportError:
            raise ValueError(
                "Could not import class or module '%s' specified for inheritance diagram" % name)
        try:
            todoc = module
            for comp in fullname.split('.')[1:]:
                todoc = getattr(todoc, comp)
        except AttributeError:
            raise ValueError(
                "Could not find class or module '%s' specified for inheritance diagram" % name)
        # If a class, just return it
        if inspect.isclass(todoc):
            return [todoc]
        elif inspect.ismodule(todoc):
            classes = []
            for cls in todoc.__dict__.values():
                if inspect.isclass(cls) and cls.__module__ == todoc.__name__:
                    classes.append(cls)
            return classes
        raise ValueError(
            "'%s' does not resolve to a class or module" % name)
    def _import_classes(self, class_names):
        """
        Import a list of classes.
        """
        classes = []
        for name in class_names:
            classes.extend(self._import_class_or_module(name))
        return classes
    def _all_classes(self, classes):
        """
        Return a list of all classes that are ancestors of *classes*.
        """
        all_classes = {}
        def recurse(cls):
            all_classes[cls] = None
            for c in cls.__bases__:
                if c not in all_classes:
                    recurse(c)
        for cls in classes:
            recurse(cls)
        return all_classes.keys()
    def class_name(self, cls, parts=0):
        """
        Given a class object, return a fully-qualified name.  This
        works for things I've tested in matplotlib so far, but may not
        be completely general.
        """
        module = cls.__module__
        if module == '__builtin__':
            fullname = cls.__name__
        else:
            fullname = "%s.%s" % (module, cls.__name__)
        if parts == 0:
            return fullname
        name_parts = fullname.split('.')
        return '.'.join(name_parts[-parts:])
    def get_all_class_names(self):
        """
        Get all of the class names involved in the graph.
        """
        return [self.class_name(x) for x in self.all_classes]
    # These are the default options for graphviz
    default_graph_options = {
        "rankdir": "LR",
        "size": '"8.0, 12.0"'
        }
    default_node_options = {
        "shape": "box",
        "fontsize": 10,
        "height": 0.25,
        "fontname": "Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",
        "style": '"setlinewidth(0.5)"'
        }
    default_edge_options = {
        "arrowsize": 0.5,
        "style": '"setlinewidth(0.5)"'
        }
    def _format_node_options(self, options):
        return ','.join(["%s=%s" % x for x in options.items()])
    def _format_graph_options(self, options):
        return ''.join(["%s=%s;\n" % x for x in options.items()])
    def generate_dot(self, fd, name, parts=0, urls={},
                     graph_options={}, node_options={},
                     edge_options={}):
        """
        Generate a graphviz dot graph from the classes that
        were passed in to __init__.
        *fd* is a Python file-like object to write to.
        *name* is the name of the graph
        *urls* is a dictionary mapping class names to http urls
        *graph_options*, *node_options*, *edge_options* are
        dictionaries containing key/value pairs to pass on as graphviz
        properties.
        """
        g_options = self.default_graph_options.copy()
        g_options.update(graph_options)
        n_options = self.default_node_options.copy()
        n_options.update(node_options)
        e_options = self.default_edge_options.copy()
        e_options.update(edge_options)
        fd.write('digraph %s {\n' % name)
        fd.write(self._format_graph_options(g_options))
        for cls in self.all_classes:
            if not self.show_builtins and cls in __builtins__.values():
                continue
            name = self.class_name(cls, parts)
            # Write the node
            this_node_options = n_options.copy()
            url = urls.get(self.class_name(cls))
            if url is not None:
                this_node_options['URL'] = '"%s"' % url
            fd.write('  "%s" [%s];\n' %
                     (name, self._format_node_options(this_node_options)))
            # Write the edges
            for base in cls.__bases__:
                if not self.show_builtins and base in __builtins__.values():
                    continue
                base_name = self.class_name(base, parts)
                fd.write('  "%s" -> "%s" [%s];\n' %
                         (base_name, name,
                          self._format_node_options(e_options)))
        fd.write('}\n')
    def run_dot(self, args, name, parts=0, urls={},
                graph_options={}, node_options={}, edge_options={}):
        """
        Run graphviz 'dot' over this graph, returning whatever 'dot'
        writes to stdout.
        *args* will be passed along as commandline arguments.
        *name* is the name of the graph
        *urls* is a dictionary mapping class names to http urls
        Raises DotException for any of the many os and
        installation-related errors that may occur.
        """
        try:
            dot = subprocess.Popen(['dot'] + list(args),
                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                   close_fds=True)
        except OSError:
            raise DotException("Could not execute 'dot'.  Are you sure you have 'graphviz' installed?")
        except ValueError:
            raise DotException("'dot' called with invalid arguments")
        except:
            raise DotException("Unexpected error calling 'dot'")
        self.generate_dot(dot.stdin, name, parts, urls, graph_options,
                          node_options, edge_options)
        dot.stdin.close()
        result = dot.stdout.read()
        returncode = dot.wait()
        if returncode != 0:
            raise DotException("'dot' returned the errorcode %d" % returncode)
        return result
 class inheritance_diagram(Body, Element):
    """
    A docutils node to use as a placeholder for the inheritance
    diagram.
    """
    pass
 def inheritance_diagram_directive(name, arguments, options, content, lineno,
                                  content_offset, block_text, state,
                                  state_machine):
    """
    Run when the inheritance_diagram directive is first encountered.
    """
    node = inheritance_diagram()
    class_names = arguments
    # Create a graph starting with the list of classes
    graph = InheritanceGraph(class_names)
    # Create xref nodes for each target of the graph's image map and
    # add them to the doc tree so that Sphinx can resolve the
    # references to real URLs later.  These nodes will eventually be
    # removed from the doctree after we're done with them.
    for name in graph.get_all_class_names():
        refnodes, x = xfileref_role(
            'class', ':class:`%s`' % name, name, 0, state)
        node.extend(refnodes)
    # Store the graph object so we can use it to generate the
    # dot file later
    node['graph'] = graph
    # Store the original content for use as a hash
    node['parts'] = options.get('parts', 0)
    node['content'] = " ".join(class_names)
    return [node]
 def get_graph_hash(node):
    return md5(node['content'] + str(node['parts'])).hexdigest()[-10:]
 def html_output_graph(self, node):
    """
    Output the graph for HTML.  This will insert a PNG with clickable
    image map.
    """
    graph = node['graph']
    parts = node['parts']
    graph_hash = get_graph_hash(node)
    name = "inheritance%s" % graph_hash
    path = '_images'
    dest_path = os.path.join(setup.app.builder.outdir, path)
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    png_path = os.path.join(dest_path, name + ".png")
    path = setup.app.builder.imgpath
    # Create a mapping from fully-qualified class names to URLs.
    urls = {}
    for child in node:
        if child.get('refuri') is not None:
            urls[child['reftitle']] = child.get('refuri')
        elif child.get('refid') is not None:
            urls[child['reftitle']] = '#' + child.get('refid')
    # These arguments to dot will save a PNG file to disk and write
    # an HTML image map to stdout.
    image_map = graph.run_dot(['-Tpng', '-o%s' % png_path, '-Tcmapx'],
                              name, parts, urls)
    return ('<img src="%s/%s.png" usemap="#%s" class="inheritance"/>%s' %
            (path, name, name, image_map))
 def latex_output_graph(self, node):
    """
    Output the graph for LaTeX.  This will insert a PDF.
    """
    graph = node['graph']
    parts = node['parts']
    graph_hash = get_graph_hash(node)
    name = "inheritance%s" % graph_hash
    dest_path = os.path.abspath(os.path.join(setup.app.builder.outdir, '_images'))
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    pdf_path = os.path.abspath(os.path.join(dest_path, name + ".pdf"))
    graph.run_dot(['-Tpdf', '-o%s' % pdf_path],
                  name, parts, graph_options={'size': '"6.0,6.0"'})
    return '\n\\includegraphics{%s}\n\n' % pdf_path
 def visit_inheritance_diagram(inner_func):
    """
    This is just a wrapper around html/latex_output_graph to make it
    easier to handle errors and insert warnings.
    """
    def visitor(self, node):
        try:
            content = inner_func(self, node)
        except DotException, e:
            # Insert the exception as a warning in the document
            warning = self.document.reporter.warning(str(e), line=node.line)
            warning.parent = node
            node.children = [warning]
        else:
            source = self.document.attributes['source']
            self.body.append(content)
            node.children = []
    return visitor
 def do_nothing(self, node):
    pass
 def setup(app):
    setup.app = app
    setup.confdir = app.confdir
    app.add_node(
        inheritance_diagram,
        latex=(visit_inheritance_diagram(latex_output_graph), do_nothing),
        html=(visit_inheritance_diagram(html_output_graph), do_nothing))
    app.add_directive(
        'inheritance-diagram', inheritance_diagram_directive,
        False, (1, 100, 0), parts = directives.nonnegative_int)
--- a/doc/sphinxext/ipython_console_highlighting.py
+++ b/doc/sphinxext/ipython_console_highlighting.py
@ -4,7 +4,6 @@ XXX - See what improvements can be made based on the new (as of Sept 2009)
 'pycon' lexer for the python console.  At the very least it will give better
 highlighted tracebacks.
 """
 from __future__ import print_function
 #-----------------------------------------------------------------------------
 # Needed modules
@ -113,3 +112,4 @@ def setup(app):
 #-----------------------------------------------------------------------------
 # Register the extension as a valid pygments lexer
 highlighting.lexers['ipython'] = IPythonConsoleLexer()
--- a/doc/sphinxext/ipython_directive.py
+++ b/doc/sphinxext/ipython_directive.py
@ -71,14 +71,17 @@ except ImportError:
    from md5 import md5
 # Third-party
-import matplotlib
+try:
    import matplotlib
    matplotlib.use('Agg')
 except ImportError:
    print "Couldn't find matplotlib"
 import sphinx
 from docutils.parsers.rst import directives
 from docutils import nodes
 from sphinx.util.compat import Directive
 matplotlib.use('Agg')
 # Our own
 from IPython import Config, InteractiveShell
 from IPython.core.profiledir import ProfileDir
@ -828,3 +831,5 @@ if __name__=='__main__':
        os.mkdir('_static')
    test()
    print 'All OK? Check figures in _static/'
--- a/doc/sphinxext/mathmpl.py
+++ b/doc/sphinxext/mathmpl.py
@ -1,120 +0,0 @@
 from __future__ import print_function
 import os
 import sys
 try:
    from hashlib import md5
 except ImportError:
    from md5 import md5
 from docutils import nodes
 from docutils.parsers.rst import directives
 import warnings
 from matplotlib import rcParams
 from matplotlib.mathtext import MathTextParser
 rcParams['mathtext.fontset'] = 'cm'
 mathtext_parser = MathTextParser("Bitmap")
 # Define LaTeX math node:
 class latex_math(nodes.General, nodes.Element):
    pass
 def fontset_choice(arg):
    return directives.choice(arg, ['cm', 'stix', 'stixsans'])
 options_spec = {'fontset': fontset_choice}
 def math_role(role, rawtext, text, lineno, inliner,
              options={}, content=[]):
    i = rawtext.find('`')
    latex = rawtext[i+1:-1]
    node = latex_math(rawtext)
    node['latex'] = latex
    node['fontset'] = options.get('fontset', 'cm')
    return [node], []
 math_role.options = options_spec
 def math_directive(name, arguments, options, content, lineno,
                   content_offset, block_text, state, state_machine):
    latex = ''.join(content)
    node = latex_math(block_text)
    node['latex'] = latex
    node['fontset'] = options.get('fontset', 'cm')
    return [node]
 # This uses mathtext to render the expression
 def latex2png(latex, filename, fontset='cm'):
    latex = "$%s$" % latex
    orig_fontset = rcParams['mathtext.fontset']
    rcParams['mathtext.fontset'] = fontset
    if os.path.exists(filename):
        depth = mathtext_parser.get_depth(latex, dpi=100)
    else:
        try:
            depth = mathtext_parser.to_png(filename, latex, dpi=100)
        except:
            warnings.warn("Could not render math expression %s" % latex,
                          Warning)
            depth = 0
    rcParams['mathtext.fontset'] = orig_fontset
    sys.stdout.write("#")
    sys.stdout.flush()
    return depth
 # LaTeX to HTML translation stuff:
 def latex2html(node, source):
    inline = isinstance(node.parent, nodes.TextElement)
    latex = node['latex']
    name = 'math-%s' % md5(latex).hexdigest()[-10:]
    destdir = os.path.join(setup.app.builder.outdir, '_images', 'mathmpl')
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    dest = os.path.join(destdir, '%s.png' % name)
    path = os.path.join(setup.app.builder.imgpath, 'mathmpl')
    depth = latex2png(latex, dest, node['fontset'])
    if inline:
        cls = ''
    else:
        cls = 'class="center" '
    if inline and depth != 0:
        style = 'style="position: relative; bottom: -%dpx"' % (depth + 1)
    else:
        style = ''
    return '<img src="%s/%s.png" %s%s/>' % (path, name, cls, style)
 def setup(app):
    setup.app = app
    app.add_node(latex_math)
    app.add_role('math', math_role)
    # Add visit/depart methods to HTML-Translator:
    def visit_latex_math_html(self, node):
        source = self.document.attributes['source']
        self.body.append(latex2html(node, source))
    def depart_latex_math_html(self, node):
        pass
    # Add visit/depart methods to LaTeX-Translator:
    def visit_latex_math_latex(self, node):
        inline = isinstance(node.parent, nodes.TextElement)
        if inline:
            self.body.append('$%s$' % node['latex'])
        else:
            self.body.extend(['\\begin{equation}',
                              node['latex'],
                              '\\end{equation}'])
    def depart_latex_math_latex(self, node):
        pass
    app.add_node(latex_math, html=(visit_latex_math_html,
                                   depart_latex_math_html))
    app.add_node(latex_math, latex=(visit_latex_math_latex,
                                    depart_latex_math_latex))
    app.add_role('math', math_role)
    app.add_directive('math', math_directive,
                      True, (0, 0, 0), **options_spec)
--- a/doc/sphinxext/numpydoc.py
+++ b/doc/sphinxext/numpydoc.py
@ -1,116 +0,0 @@
 """
 ========
 numpydoc
 ========
 Sphinx extension that handles docstrings in the Numpy standard format. [1]
 It will:
 - Convert Parameters etc. sections to field lists.
 - Convert See Also section to a See also entry.
 - Renumber references.
 - Extract the signature from the docstring, if it can't be determined otherwise.
 .. [1] http://projects.scipy.org/scipy/numpy/wiki/CodingStyleGuidelines#docstring-standard
 """
 import os, re, pydoc
 from docscrape_sphinx import get_doc_object, SphinxDocString
 import inspect
 def mangle_docstrings(app, what, name, obj, options, lines,
                      reference_offset=[0]):
    if what == 'module':
        # Strip top title
        title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
                              re.I|re.S)
        lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
    else:
        doc = get_doc_object(obj, what, "\n".join(lines))
        lines[:] = str(doc).split("\n")
    if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
           obj.__name__:
        if hasattr(obj, '__module__'):
            v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
        else:
            v = dict(full_name=obj.__name__)
        lines += ['', '.. htmlonly::', '']
        lines += ['    %s' % x for x in
                  (app.config.numpydoc_edit_link % v).split("\n")]
    # replace reference numbers so that there are no duplicates
    references = []
    for l in lines:
        l = l.strip()
        if l.startswith('.. ['):
            try:
                references.append(int(l[len('.. ['):l.index(']')]))
            except ValueError:
                print "WARNING: invalid reference in %s docstring" % name
    # Start renaming from the biggest number, otherwise we may
    # overwrite references.
    references.sort()
    if references:
        for i, line in enumerate(lines):
            for r in references:
                new_r = reference_offset[0] + r
                lines[i] = lines[i].replace('[%d]_' % r,
                                            '[%d]_' % new_r)
                lines[i] = lines[i].replace('.. [%d]' % r,
                                            '.. [%d]' % new_r)
    reference_offset[0] += len(references)
 def mangle_signature(app, what, name, obj, options, sig, retann):
    # Do not try to inspect classes that don't define `__init__`
    if (inspect.isclass(obj) and
        'initializes x; see ' in pydoc.getdoc(obj.__init__)):
        return '', ''
    if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return
    if not hasattr(obj, '__doc__'): return
    doc = SphinxDocString(pydoc.getdoc(obj))
    if doc['Signature']:
        sig = re.sub("^[^(]*", "", doc['Signature'])
        return sig, ''
 def initialize(app):
    try:
        app.connect('autodoc-process-signature', mangle_signature)
    except:
        monkeypatch_sphinx_ext_autodoc()
 def setup(app, get_doc_object_=get_doc_object):
    global get_doc_object
    get_doc_object = get_doc_object_
    app.connect('autodoc-process-docstring', mangle_docstrings)
    app.connect('builder-inited', initialize)
    app.add_config_value('numpydoc_edit_link', None, True)
 #------------------------------------------------------------------------------
 # Monkeypatch sphinx.ext.autodoc to accept argspecless autodocs (Sphinx < 0.5)
 #------------------------------------------------------------------------------
 def monkeypatch_sphinx_ext_autodoc():
    global _original_format_signature
    import sphinx.ext.autodoc
    if sphinx.ext.autodoc.format_signature is our_format_signature:
        return
    print "[numpydoc] Monkeypatching sphinx.ext.autodoc ..."
    _original_format_signature = sphinx.ext.autodoc.format_signature
    sphinx.ext.autodoc.format_signature = our_format_signature
 def our_format_signature(what, obj):
    r = mangle_signature(None, what, None, obj, None, None, None)
    if r is not None:
        return r[0]
    else:
        return _original_format_signature(what, obj)
--- a/doc/sphinxext/only_directives.py
+++ b/doc/sphinxext/only_directives.py
@ -1,64 +0,0 @@
 #
 # A pair of directives for inserting content that will only appear in
 # either html or latex.
 #
 from __future__ import print_function
 from docutils.nodes import Body, Element
 from docutils.parsers.rst import directives
 class only_base(Body, Element):
    def dont_traverse(self, *args, **kwargs):
        return []
 class html_only(only_base):
    pass
 class latex_only(only_base):
    pass
 def run(content, node_class, state, content_offset):
    text = '\n'.join(content)
    node = node_class(text)
    state.nested_parse(content, content_offset, node)
    return [node]
 def html_only_directive(name, arguments, options, content, lineno,
                        content_offset, block_text, state, state_machine):
    return run(content, html_only, state, content_offset)
 def latex_only_directive(name, arguments, options, content, lineno,
                         content_offset, block_text, state, state_machine):
    return run(content, latex_only, state, content_offset)
 def builder_inited(app):
    if app.builder.name == 'html':
        latex_only.traverse = only_base.dont_traverse
    else:
        html_only.traverse = only_base.dont_traverse
 def setup(app):
    app.add_directive('htmlonly', html_only_directive, True, (0, 0, 0))
    app.add_directive('latexonly', latex_only_directive, True, (0, 0, 0))
    app.add_node(html_only)
    app.add_node(latex_only)
    # This will *really* never see the light of day As it turns out,
    # this results in "broken" image nodes since they never get
    # processed, so best not to do this.
    # app.connect('builder-inited', builder_inited)
    # Add visit/depart methods to HTML-Translator:
    def visit_perform(self, node):
        pass
    def depart_perform(self, node):
        pass
    def visit_ignore(self, node):
        node.children = []
    def depart_ignore(self, node):
        node.children = []
    app.add_node(html_only, html=(visit_perform, depart_perform))
    app.add_node(html_only, latex=(visit_ignore, depart_ignore))
    app.add_node(latex_only, latex=(visit_perform, depart_perform))
    app.add_node(latex_only, html=(visit_ignore, depart_ignore))
--- a/doc/sphinxext/plot_directive.py
+++ b/doc/sphinxext/plot_directive.py
@ -1,819 +0,0 @@
 """
 A directive for including a matplotlib plot in a Sphinx document.
 By default, in HTML output, `plot` will include a .png file with a
 link to a high-res .png and .pdf.  In LaTeX output, it will include a
 .pdf.
 The source code for the plot may be included in one of three ways:
  1. **A path to a source file** as the argument to the directive::
       .. plot:: path/to/plot.py
     When a path to a source file is given, the content of the
     directive may optionally contain a caption for the plot::
       .. plot:: path/to/plot.py
          This is the caption for the plot
     Additionally, one my specify the name of a function to call (with
     no arguments) immediately after importing the module::
       .. plot:: path/to/plot.py plot_function1
  2. Included as **inline content** to the directive::
       .. plot::
          import matplotlib.pyplot as plt
          import matplotlib.image as mpimg
          import numpy as np
          img = mpimg.imread('_static/stinkbug.png')
          imgplot = plt.imshow(img)
  3. Using **doctest** syntax::
       .. plot::
          A plotting example:
          >>> import matplotlib.pyplot as plt
          >>> plt.plot([1,2,3], [4,5,6])
 Options
 -------
 The ``plot`` directive supports the following options:
    format : {'python', 'doctest'}
        Specify the format of the input
    include-source : bool
        Whether to display the source code. The default can be changed
        using the `plot_include_source` variable in conf.py
    encoding : str
        If this source file is in a non-UTF8 or non-ASCII encoding,
        the encoding must be specified using the `:encoding:` option.
        The encoding will not be inferred using the ``-*- coding -*-``
        metacomment.
    context : bool
        If provided, the code will be run in the context of all
        previous plot directives for which the `:context:` option was
        specified.  This only applies to inline code plot directives,
        not those run from files.
    nofigs : bool
        If specified, the code block will be run, but no figures will
        be inserted.  This is usually useful with the ``:context:``
        option.
 Additionally, this directive supports all of the options of the
 `image` directive, except for `target` (since plot will add its own
 target).  These include `alt`, `height`, `width`, `scale`, `align` and
 `class`.
 Configuration options
 ---------------------
 The plot directive has the following configuration options:
    plot_include_source
        Default value for the include-source option
    plot_pre_code
        Code that should be executed before each plot.
    plot_basedir
        Base directory, to which ``plot::`` file names are relative
        to.  (If None or empty, file names are relative to the
        directoly where the file containing the directive is.)
    plot_formats
        File formats to generate. List of tuples or strings::
            [(suffix, dpi), suffix, ...]
        that determine the file format and the DPI. For entries whose
        DPI was omitted, sensible defaults are chosen.
    plot_html_show_formats
        Whether to show links to the files in HTML.
    plot_rcparams
        A dictionary containing any non-standard rcParams that should
        be applied before each plot.
    plot_apply_rcparams
        By default, rcParams are applied when `context` option is not used in 
        a plot  directive.  This configuration option overrides this behaviour 
        and applies rcParams before each plot.
    plot_working_directory
        By default, the working directory will be changed to the directory of 
        the example, so the code can get at its data files, if any.  Also its 
        path will be added to `sys.path` so it can import any helper modules 
        sitting beside it.  This configuration option can be used to specify 
        a central directory (also added to `sys.path`) where data files and 
        helper modules for all code are located. 
    plot_template
        Provide a customized template for preparing resturctured text.
 """
 from __future__ import print_function
 import sys, os, glob, shutil, imp, warnings, cStringIO, re, textwrap, \
       traceback, exceptions
 from docutils.parsers.rst import directives
 from docutils import nodes
 from docutils.parsers.rst.directives.images import Image
 align = Image.align
 import sphinx
 sphinx_version = sphinx.__version__.split(".")
 # The split is necessary for sphinx beta versions where the string is
 # '6b1'
 sphinx_version = tuple([int(re.split('[a-z]', x)[0])
                        for x in sphinx_version[:2]])
 try:
    # Sphinx depends on either Jinja or Jinja2
    import jinja2
    def format_template(template, **kw):
        return jinja2.Template(template).render(**kw)
 except ImportError:
    import jinja
    def format_template(template, **kw):
        return jinja.from_string(template, **kw)
 import matplotlib
 import matplotlib.cbook as cbook
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from matplotlib import _pylab_helpers
 __version__ = 2
 #------------------------------------------------------------------------------
 # Relative pathnames
 #------------------------------------------------------------------------------
 # os.path.relpath is new in Python 2.6
 try:
    from os.path import relpath
 except ImportError:
    # Copied from Python 2.7
    if 'posix' in sys.builtin_module_names:
        def relpath(path, start=os.path.curdir):
            """Return a relative version of a path"""
            from os.path import sep, curdir, join, abspath, commonprefix, \
                 pardir
            if not path:
                raise ValueError("no path specified")
            start_list = abspath(start).split(sep)
            path_list = abspath(path).split(sep)
            # Work out how much of the filepath is shared by start and path.
            i = len(commonprefix([start_list, path_list]))
            rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
            if not rel_list:
                return curdir
            return join(*rel_list)
    elif 'nt' in sys.builtin_module_names:
        def relpath(path, start=os.path.curdir):
            """Return a relative version of a path"""
            from os.path import sep, curdir, join, abspath, commonprefix, \
                 pardir, splitunc
            if not path:
                raise ValueError("no path specified")
            start_list = abspath(start).split(sep)
            path_list = abspath(path).split(sep)
            if start_list[0].lower() != path_list[0].lower():
                unc_path, rest = splitunc(path)
                unc_start, rest = splitunc(start)
                if bool(unc_path) ^ bool(unc_start):
                    raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
                                                                        % (path, start))
                else:
                    raise ValueError("path is on drive %s, start on drive %s"
                                                        % (path_list[0], start_list[0]))
            # Work out how much of the filepath is shared by start and path.
            for i in range(min(len(start_list), len(path_list))):
                if start_list[i].lower() != path_list[i].lower():
                    break
            else:
                i += 1
            rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
            if not rel_list:
                return curdir
            return join(*rel_list)
    else:
        raise RuntimeError("Unsupported platform (no relpath available!)")
 #------------------------------------------------------------------------------
 # Registration hook
 #------------------------------------------------------------------------------
 def plot_directive(name, arguments, options, content, lineno,
                   content_offset, block_text, state, state_machine):
    return run(arguments, content, options, state_machine, state, lineno)
 plot_directive.__doc__ = __doc__
 def _option_boolean(arg):
    if not arg or not arg.strip():
        # no argument given, assume used as a flag
        return True
    elif arg.strip().lower() in ('no', '0', 'false'):
        return False
    elif arg.strip().lower() in ('yes', '1', 'true'):
        return True
    else:
        raise ValueError('"%s" unknown boolean' % arg)
 def _option_format(arg):
    return directives.choice(arg, ('python', 'doctest'))
 def _option_align(arg):
    return directives.choice(arg, ("top", "middle", "bottom", "left", "center",
                                   "right"))
 def mark_plot_labels(app, document):
    """
    To make plots referenceable, we need to move the reference from
    the "htmlonly" (or "latexonly") node to the actual figure node
    itself.
    """
    for name, explicit in document.nametypes.iteritems():
        if not explicit:
            continue
        labelid = document.nameids[name]
        if labelid is None:
            continue
        node = document.ids[labelid]
        if node.tagname in ('html_only', 'latex_only'):
            for n in node:
                if n.tagname == 'figure':
                    sectname = name
                    for c in n:
                        if c.tagname == 'caption':
                            sectname = c.astext()
                            break
                    node['ids'].remove(labelid)
                    node['names'].remove(name)
                    n['ids'].append(labelid)
                    n['names'].append(name)
                    document.settings.env.labels[name] = \
                        document.settings.env.docname, labelid, sectname
                    break
 def setup(app):
    setup.app = app
    setup.config = app.config
    setup.confdir = app.confdir
    options = {'alt': directives.unchanged,
               'height': directives.length_or_unitless,
               'width': directives.length_or_percentage_or_unitless,
               'scale': directives.nonnegative_int,
               'align': _option_align,
               'class': directives.class_option,
               'include-source': _option_boolean,
               'format': _option_format,
               'context': directives.flag,
               'nofigs': directives.flag,
               'encoding': directives.encoding
               }
    app.add_directive('plot', plot_directive, True, (0, 2, False), **options)
    app.add_config_value('plot_pre_code', None, True)
    app.add_config_value('plot_include_source', False, True)
    app.add_config_value('plot_formats', ['png', 'hires.png', 'pdf'], True)
    app.add_config_value('plot_basedir', None, True)
    app.add_config_value('plot_html_show_formats', True, True)
    app.add_config_value('plot_rcparams', {}, True)
    app.add_config_value('plot_apply_rcparams', False, True)
    app.add_config_value('plot_working_directory', None, True)
    app.add_config_value('plot_template', None, True)
    app.connect('doctree-read', mark_plot_labels)
 #------------------------------------------------------------------------------
 # Doctest handling
 #------------------------------------------------------------------------------
 def contains_doctest(text):
    try:
        # check if it's valid Python as-is
        compile(text, '<string>', 'exec')
        return False
    except SyntaxError:
        pass
    r = re.compile(r'^\s*>>>', re.M)
    m = r.search(text)
    return bool(m)
 def unescape_doctest(text):
    """
    Extract code from a piece of text, which contains either Python code
    or doctests.
    """
    if not contains_doctest(text):
        return text
    code = ""
    for line in text.split("\n"):
        m = re.match(r'^\s*(>>>|\.\.\.) (.*)$', line)
        if m:
            code += m.group(2) + "\n"
        elif line.strip():
            code += "# " + line.strip() + "\n"
        else:
            code += "\n"
    return code
 def split_code_at_show(text):
    """
    Split code at plt.show()
    """
    parts = []
    is_doctest = contains_doctest(text)
    part = []
    for line in text.split("\n"):
        if (not is_doctest and line.strip() == 'plt.show()') or \
               (is_doctest and line.strip() == '>>> plt.show()'):
            part.append(line)
            parts.append("\n".join(part))
            part = []
        else:
            part.append(line)
    if "\n".join(part).strip():
        parts.append("\n".join(part))
    return parts
 #------------------------------------------------------------------------------
 # Template
 #------------------------------------------------------------------------------
 TEMPLATE = """
 {{ source_code }}
 {{ only_html }}
   {% if source_link or (html_show_formats and not multi_image) %}
   (
   {%- if source_link -%}
   `Source code <{{ source_link }}>`__
   {%- endif -%}
   {%- if html_show_formats and not multi_image -%}
     {%- for img in images -%}
       {%- for fmt in img.formats -%}
         {%- if source_link or not loop.first -%}, {% endif -%}
         `{{ fmt }} <{{ dest_dir }}/{{ img.basename }}.{{ fmt }}>`__
       {%- endfor -%}
     {%- endfor -%}
   {%- endif -%}
   )
   {% endif %}
   {% for img in images %}
   .. figure:: {{ build_dir }}/{{ img.basename }}.png
      {%- for option in options %}
      {{ option }}
      {% endfor %}
      {% if html_show_formats and multi_image -%}
        (
        {%- for fmt in img.formats -%}
        {%- if not loop.first -%}, {% endif -%}
        `{{ fmt }} <{{ dest_dir }}/{{ img.basename }}.{{ fmt }}>`__
        {%- endfor -%}
        )
      {%- endif -%}
      {{ caption }}
   {% endfor %}
 {{ only_latex }}
   {% for img in images %}
   .. image:: {{ build_dir }}/{{ img.basename }}.pdf
   {% endfor %}
 {{ only_texinfo }}
   {% for img in images %}
   .. image:: {{ build_dir }}/{{ img.basename }}.png
      {%- for option in options %}
      {{ option }}
      {% endfor %}
   {% endfor %}
 """
 exception_template = """
 .. htmlonly::
   [`source code <%(linkdir)s/%(basename)s.py>`__]
 Exception occurred rendering plot.
 """
 # the context of the plot for all directives specified with the
 # :context: option
 plot_context = dict()
 class ImageFile(object):
    def __init__(self, basename, dirname):
        self.basename = basename
        self.dirname = dirname
        self.formats = []
    def filename(self, format):
        return os.path.join(self.dirname, "%s.%s" % (self.basename, format))
    def filenames(self):
        return [self.filename(fmt) for fmt in self.formats]
 def out_of_date(original, derived):
    """
    Returns True if derivative is out-of-date wrt original,
    both of which are full file paths.
    """
    return (not os.path.exists(derived) or
            (os.path.exists(original) and
             os.stat(derived).st_mtime < os.stat(original).st_mtime))
 class PlotError(RuntimeError):
    pass
 def run_code(code, code_path, ns=None, function_name=None):
    """
    Import a Python module from a path, and run the function given by
    name, if function_name is not None.
    """
    # Change the working directory to the directory of the example, so
    # it can get at its data files, if any.  Add its path to sys.path
    # so it can import any helper modules sitting beside it.
    pwd = os.getcwd()
    old_sys_path = list(sys.path)
    if setup.config.plot_working_directory is not None:
        try:
            os.chdir(setup.config.plot_working_directory)
        except OSError as err:
            raise OSError(str(err) + '\n`plot_working_directory` option in'
                          'Sphinx configuration file must be a valid '
                          'directory path')
        except TypeError as err:
            raise TypeError(str(err) + '\n`plot_working_directory` option in '
                            'Sphinx configuration file must be a string or '
                            'None')
        sys.path.insert(0, setup.config.plot_working_directory)
    elif code_path is not None:
        dirname = os.path.abspath(os.path.dirname(code_path))
        os.chdir(dirname)
        sys.path.insert(0, dirname)
    # Redirect stdout
    stdout = sys.stdout
    sys.stdout = cStringIO.StringIO()
    # Reset sys.argv
    old_sys_argv = sys.argv
    sys.argv = [code_path]
    try:
        try:
            code = unescape_doctest(code)
            if ns is None:
                ns = {}
            if not ns:
                if setup.config.plot_pre_code is None:
                    exec "import numpy as np\nfrom matplotlib import pyplot as plt\n" in ns
                else:
                    exec setup.config.plot_pre_code in ns
            if "__main__" in code:
                exec "__name__ = '__main__'" in ns
            exec code in ns
            if function_name is not None:
                exec function_name + "()" in ns
        except (Exception, SystemExit), err:
            raise PlotError(traceback.format_exc())
    finally:
        os.chdir(pwd)
        sys.argv = old_sys_argv
        sys.path[:] = old_sys_path
        sys.stdout = stdout
    return ns
 def clear_state(plot_rcparams):
    plt.close('all')
    matplotlib.rc_file_defaults()
    matplotlib.rcParams.update(plot_rcparams)
 def render_figures(code, code_path, output_dir, output_base, context,
                   function_name, config):
    """
    Run a pyplot script and save the low and high res PNGs and a PDF
    in outdir.
    Save the images under *output_dir* with file names derived from
    *output_base*
    """
    # -- Parse format list
    default_dpi = {'png': 80, 'hires.png': 200, 'pdf': 200}
    formats = []
    plot_formats = config.plot_formats
    if isinstance(plot_formats, (str, unicode)):
        plot_formats = eval(plot_formats)
    for fmt in plot_formats:
        if isinstance(fmt, str):
            formats.append((fmt, default_dpi.get(fmt, 80)))
        elif type(fmt) in (tuple, list) and len(fmt)==2:
            formats.append((str(fmt[0]), int(fmt[1])))
        else:
            raise PlotError('invalid image format "%r" in plot_formats' % fmt)
    # -- Try to determine if all images already exist
    code_pieces = split_code_at_show(code)
    # Look for single-figure output files first
    # Look for single-figure output files first
    all_exists = True
    img = ImageFile(output_base, output_dir)
    for format, dpi in formats:
        if out_of_date(code_path, img.filename(format)):
            all_exists = False
            break
        img.formats.append(format)
    if all_exists:
        return [(code, [img])]
    # Then look for multi-figure output files
    results = []
    all_exists = True
    for i, code_piece in enumerate(code_pieces):
        images = []
        for j in xrange(1000):
            if len(code_pieces) > 1:
                img = ImageFile('%s_%02d_%02d' % (output_base, i, j), output_dir)
            else:
                img = ImageFile('%s_%02d' % (output_base, j), output_dir)
            for format, dpi in formats:
                if out_of_date(code_path, img.filename(format)):
                    all_exists = False
                    break
                img.formats.append(format)
            # assume that if we have one, we have them all
            if not all_exists:
                all_exists = (j > 0)
                break
            images.append(img)
        if not all_exists:
            break
        results.append((code_piece, images))
    if all_exists:
        return results
    # We didn't find the files, so build them
    results = []
    if context:
        ns = plot_context
    else:
        ns = {}
    for i, code_piece in enumerate(code_pieces):
        if not context or config.plot_apply_rcparams:
            clear_state(config.plot_rcparams)
        run_code(code_piece, code_path, ns, function_name)
        images = []
        fig_managers = _pylab_helpers.Gcf.get_all_fig_managers()
        for j, figman in enumerate(fig_managers):
            if len(fig_managers) == 1 and len(code_pieces) == 1:
                img = ImageFile(output_base, output_dir)
            elif len(code_pieces) == 1:
                img = ImageFile("%s_%02d" % (output_base, j), output_dir)
            else:
                img = ImageFile("%s_%02d_%02d" % (output_base, i, j),
                                output_dir)
            images.append(img)
            for format, dpi in formats:
                try:
                    figman.canvas.figure.savefig(img.filename(format), dpi=dpi)
                except Exception,err:
                    raise PlotError(traceback.format_exc())
                img.formats.append(format)
        results.append((code_piece, images))
    if not context or config.plot_apply_rcparams:
        clear_state(config.plot_rcparams)
    return results
 def run(arguments, content, options, state_machine, state, lineno):
    # The user may provide a filename *or* Python code content, but not both
    if arguments and content:
        raise RuntimeError("plot:: directive can't have both args and content")
    document = state_machine.document
    config = document.settings.env.config
    nofigs = options.has_key('nofigs')
    options.setdefault('include-source', config.plot_include_source)
    context = options.has_key('context')
    rst_file = document.attributes['source']
    rst_dir = os.path.dirname(rst_file)
    if len(arguments):
        if not config.plot_basedir:
            source_file_name = os.path.join(setup.app.builder.srcdir,
                                            directives.uri(arguments[0]))
        else:
            source_file_name = os.path.join(setup.confdir, config.plot_basedir,
                                            directives.uri(arguments[0]))
        # If there is content, it will be passed as a caption.
        caption = '\n'.join(content)
        # If the optional function name is provided, use it
        if len(arguments) == 2:
            function_name = arguments[1]
        else:
            function_name = None
        with open(source_file_name, 'r') as fd:
            code = fd.read()
        output_base = os.path.basename(source_file_name)
    else:
        source_file_name = rst_file
        code = textwrap.dedent("\n".join(map(str, content)))
        counter = document.attributes.get('_plot_counter', 0) + 1
        document.attributes['_plot_counter'] = counter
        base, ext = os.path.splitext(os.path.basename(source_file_name))
        output_base = '%s-%d.py' % (base, counter)
        function_name = None
        caption = ''
    base, source_ext = os.path.splitext(output_base)
    if source_ext in ('.py', '.rst', '.txt'):
        output_base = base
    else:
        source_ext = ''
    # ensure that LaTeX includegraphics doesn't choke in foo.bar.pdf filenames
    output_base = output_base.replace('.', '-')
    # is it in doctest format?
    is_doctest = contains_doctest(code)
    if options.has_key('format'):
        if options['format'] == 'python':
            is_doctest = False
        else:
            is_doctest = True
    # determine output directory name fragment
    source_rel_name = relpath(source_file_name, setup.confdir)
    source_rel_dir = os.path.dirname(source_rel_name)
    while source_rel_dir.startswith(os.path.sep):
        source_rel_dir = source_rel_dir[1:]
    # build_dir: where to place output files (temporarily)
    build_dir = os.path.join(os.path.dirname(setup.app.doctreedir),
                             'plot_directive',
                             source_rel_dir)
    # get rid of .. in paths, also changes pathsep
    # see note in Python docs for warning about symbolic links on Windows.
    # need to compare source and dest paths at end
    build_dir = os.path.normpath(build_dir)
    if not os.path.exists(build_dir):
        os.makedirs(build_dir)
    # output_dir: final location in the builder's directory
    dest_dir = os.path.abspath(os.path.join(setup.app.builder.outdir,
                                            source_rel_dir))
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir) # no problem here for me, but just use built-ins
    # how to link to files from the RST file
    dest_dir_link = os.path.join(relpath(setup.confdir, rst_dir),
                                 source_rel_dir).replace(os.path.sep, '/')
    build_dir_link = relpath(build_dir, rst_dir).replace(os.path.sep, '/')
    source_link = dest_dir_link + '/' + output_base + source_ext
    # make figures
    try:
        results = render_figures(code, source_file_name, build_dir, output_base,
                                 context, function_name, config)
        errors = []
    except PlotError, err:
        reporter = state.memo.reporter
        sm = reporter.system_message(
            2, "Exception occurred in plotting %s\n from %s:\n%s" % (output_base,
                                                source_file_name, err),
            line=lineno)
        results = [(code, [])]
        errors = [sm]
    # Properly indent the caption
    caption = '\n'.join('      ' + line.strip()
                        for line in caption.split('\n'))
    # generate output restructuredtext
    total_lines = []
    for j, (code_piece, images) in enumerate(results):
        if options['include-source']:
            if is_doctest:
                lines = ['']
                lines += [row.rstrip() for row in code_piece.split('\n')]
            else:
                lines = ['.. code-block:: python', '']
                lines += ['    %s' % row.rstrip()
                          for row in code_piece.split('\n')]
            source_code = "\n".join(lines)
        else:
            source_code = ""
        if nofigs:
            images = []
        opts = [':%s: %s' % (key, val) for key, val in options.items()
                if key in ('alt', 'height', 'width', 'scale', 'align', 'class')]
        only_html = ".. only:: html"
        only_latex = ".. only:: latex"
        only_texinfo = ".. only:: texinfo"
        if j == 0:
            src_link = source_link
        else:
            src_link = None
        result = format_template(
            config.plot_template or TEMPLATE,
            dest_dir=dest_dir_link,
            build_dir=build_dir_link,
            source_link=src_link,
            multi_image=len(images) > 1,
            only_html=only_html,
            only_latex=only_latex,
            only_texinfo=only_texinfo,
            options=opts,
            images=images,
            source_code=source_code,
            html_show_formats=config.plot_html_show_formats,
            caption=caption)
        total_lines.extend(result.split("\n"))
        total_lines.extend("\n")
    if total_lines:
        state_machine.insert_input(total_lines, source=source_file_name)
    # copy image files to builder's output directory, if necessary
    if not os.path.exists(dest_dir):
        cbook.mkdirs(dest_dir)
    for code_piece, images in results:
        for img in images:
            for fn in img.filenames():
                destimg = os.path.join(dest_dir, os.path.basename(fn))
                if fn != destimg:
                    shutil.copyfile(fn, destimg)
    # copy script (if necessary)
    target_name = os.path.join(dest_dir, output_base + source_ext)
    with open(target_name, 'w') as f:
        if source_file_name == rst_file:
            code_escaped = unescape_doctest(code)
        else:
            code_escaped = code
        f.write(code_escaped)
    return errors
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@ -1,4 +1,3 @@
 *************************************
 Gaussian process regression tutorial
 *************************************
@ -12,7 +11,7 @@ We first import the libraries we will need: ::
    import numpy as np
    import GPy
-1 dimensional model
+1-dimensional model
 ===================
 For this toy example, we assume we have the following inputs and outputs::
@ -22,13 +21,11 @@ For this toy example, we assume we have the following inputs and outputs::
 Note that the observations Y include some noise.
-The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
+The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
-    Gaussian = GPy.kern.rbf(D=1)
+    kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
    noise = GPy.kern.white(D=1)
    kernel = Gaussian + noise
-The parameter ``D`` stands for the dimension of the input space. Note that many other kernels are implemented such as:
+The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as:
 * linear (``GPy.kern.linear``)
 * exponential kernel (``GPy.kern.exponential``)
@ -41,19 +38,19 @@ The inputs required for building the model are the observations and the kernel::
    m = GPy.models.GP_regression(X,Y,kernel)
-The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
+By default, some observation noise is added to the modle. The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
    print m
    m.plot()
 gives the following output: ::
-    Marginal log-likelihood: -2.281e+01
+    Marginal log-likelihood: -4.479e+00
           Name        |  Value   |  Constraints  |  Ties  |  Prior  
    -----------------------------------------------------------------
       rbf_variance    |  1.0000  |               |        |         
      rbf_lengthscale  |  1.0000  |               |        |         
-      white_variance   |  1.0000  |               |        |         
+      noise variance   |  1.0000  |               |        |         
 .. figure::  Figures/tuto_GP_regression_m1.png
    :align:   center
@ -75,24 +72,24 @@ but it is also possible to set a range on to constrain one parameter to be fixed
    m.unconstrain('')                            # Required to remove the previous constrains
    m.constrain_positive('rbf_variance')
    m.constrain_bounded('lengthscale',1.,10. )
-    m.constrain_fixed('white',0.0025)
+    m.constrain_fixed('noise',0.0025)
 Once the constrains have been imposed, the model can be optimized::
    m.optimize()
-If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
+If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
    m.optimize_restarts(Nrestarts = 10)
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
-    Marginal log-likelihood: 2.001e+01
+    Marginal log-likelihood: 3.603e+01
           Name        |  Value   |  Constraints  |  Ties  |  Prior  
    -----------------------------------------------------------------
-       rbf_variance    |  0.8033  |     (+ve)     |        |         
+       rbf_variance    |  0.8151  |     (+ve)     |        |         
-      rbf_lengthscale  |  1.8033  |  (1.0, 10.0)  |        |         
+      rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
-      white_variance   |  0.0025  |     Fixed     |        |               
+      noise variance   |  0.0025  |     Fixed     |        |         
 .. figure::  Figures/tuto_GP_regression_m2.png
    :align:   center
@ -101,7 +98,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo
    GP regression model after optimization of the parameters.
-2 dimensional example
+2-dimensional example
 =====================
 Here is a 2 dimensional example::
@ -131,15 +128,16 @@ Here is a 2 dimensional example::
    m.plot()
    print(m)
-The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last 2 lines is::
+The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
-    Marginal log-likelihood: 2.893e+01
+    Marginal log-likelihood: 6.682e+01
-               Name            |  Value   |  Constraints  |  Ties  |  Prior  
+             Name          |  Value   |  Constraints  |  Ties  |  Prior  
-    -------------------------------------------------------------------------
+    ---------------------------------------------------------------------
-        Mat52_ARD_variance     |  0.4094  |     (+ve)     |        |         
+        Mat52_variance     |  0.3860  |     (+ve)     |        |         
-      Mat52_ARD_lengthscale_0  |  2.1060  |     (+ve)     |        |         
+      Mat52_lengthscale_0  |  2.0578  |     (+ve)     |        |         
-      Mat52_ARD_lengthscale_1  |  2.0546  |     (+ve)     |        |         
+      Mat52_lengthscale_1  |  1.8542  |     (+ve)     |        |         
-          white_variance       |  0.0012  |     (+ve)     |        |         
+        white_variance     |  0.0023  |     (+ve)     |        |         
        noise variance     |  0.0000  |     (+ve)     |        |         
 .. figure::  Figures/tuto_GP_regression_m3.png
    :align:   center
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@ -0,0 +1,177 @@
 ****************************
 tutorial : A kernel overview
 ****************************
 First we import the libraries we will need ::
    import pylab as pb
    import numpy as np
    import GPy
    pb.ion()
 For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) ::
    ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
    ker2 = GPy.kern.rbf(D=1, variance = 1.5, lengthscale=2.)
    ker3 = GPy.kern.rbf(1, .5, .5)
 A `plot` and a `print` functions are implemented to represent kernel objects ::
    print ker1
    ker1.plot()
    ker2.plot()
    ker3.plot()
 .. figure::  Figures/tuto_kern_overview_basicdef.png
    :align:   center
    :height: 350px
 Implemented kernels
 ===================
 Many kernels are already implemented in GPy. Here is a summary of most of them:
 .. figure::  Figures/tuto_kern_overview_allkern.png
    :align:  center
    :height: 800px
 On the other hand, it is possible to use the `sympy` package to build new kernels. This will be the subject of another tutorial.
 Operations to combine kernel
 ============================
 In ``GPy``, kernel objects can be combined with the usual ``+`` and ``*`` operators. ::
    k1 = GPy.kern.rbf(1,variance=1., lengthscale=2)
    k2 = GPy.kern.Matern32(1,variance=1., lengthscale=2)
    ker_add = k1 + k2
    print ker_add
    ker_prod = k1 * k2
    print ker_prod
 Note that by default, the operator ``+`` adds kernels defined on the same input space whereas ``*`` assumes that the kernels are defined on different input spaces. Here for example ``ker_add.D`` will return ``1`` whereas ``ker_prod.D`` will return ``2``.
 In order to add kernels defined on the different input spaces, the required command is::
    ker_add_orth = k1.add_orthogonal(k2)
 .. figure::  Figures/tuto_kern_overview_add_orth.png
    :align:  center
    :height: 350px
    Output of ``ker_add_orth.plot(plot_limits=[[-10,-10],[10,10]])``.
 Example : Building an ANOVA kernel
 ==================================
 In two dimensions ANOVA kernels have the following form: 
 .. math::
    k_{ANOVA}(x,y) = \prod_{i=1}^2 (1 + k_i(x_i,y_i)) = 1 + k_1(x_1,y_1) + k_2(x_2,y_2) + k_1(x_1,y_1) \times k_2(x_2,y_2).
 Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel for :math:`k_i`. As seen previously, we can define this kernel as follows ::
    k_cst = GPy.kern.bias(1,variance=1.)
    k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
    Kanova = (k_cst + k_mat) * (k_cst + k_mat)
    print Kanova
 Printing the resulting kernel outputs the following ::
                     Name                  |  Value   |  Constraints  |  Ties  
    ---------------------------------------------------------------------------
           bias<times>bias_variance        |  1.0000  |               |        
           bias<times>Mat52_variance       |  1.0000  |               |        
      bias<times>Mat52_Mat52_lengthscale   |  3.0000  |               |  (1)   
           Mat52<times>bias_variance       |  1.0000  |               |        
      Mat52<times>bias_Mat52_lengthscale   |  3.0000  |               |  (0)   
          Mat52<times>Mat52_variance       |  1.0000  |               |        
      Mat52<times>Mat52_Mat52_lengthscale  |  3.0000  |               |  (0)   
      Mat52<times>Mat52_Mat52_lengthscale  |  3.0000  |               |  (1)
 Note the ties between the lengthscales of ``Kanova`` to keep the number of lengthscales equal to 2. On the other hand, there are four variance terms in the new parameterization: one for each term of the right hand part of the above equation. We can illustrate the use of this kernel on a toy example::
    # sample inputs and outputs
    X = np.random.uniform(-3.,3.,(40,2))
    Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
    # Create GP regression model
    m = GPy.models.GP_regression(X,Y,Kanova)
    m.plot()
 .. figure::  Figures/tuto_kern_overview_mANOVA.png
    :align:  center
    :height: 350px
 As :math:`k_{ANOVA}` corresponds to the sum of 4 kernels, the best predictor can be splited in a sum of 4 functions 
 .. math::
    bp(x) & = k(x)^t K^{-1} Y \\
          & = (1 + k_1(x_1) +  k_2(x_2) +  k_1(x_1)k_2(x_2))^t K^{-1} Y \\
          & = 1^t K^{-1} Y + k_1(x_1)^t K^{-1} Y + k_2(x_2)^t K^{-1} Y + (k_1(x_1)k_2(x_2))^t K^{-1} Y
 The submodels can be represented with the option ``which_function`` of ``plot``: ::
    pb.figure(figsize=(20,5))
    pb.subplots_adjust(wspace=0.5)
    pb.subplot(1,5,1)
    m.plot()
    pb.subplot(1,5,2)
    pb.ylabel("=   ",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,3)
    m.plot(which_functions=[False,True,False,False])
    pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,4)
    m.plot(which_functions=[False,False,True,False])
    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,5)
    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
    m.plot(which_functions=[False,False,False,True])
 .. figure::  Figures/tuto_kern_overview_mANOVAdec.png
    :align:  center
    :height: 200px
 ..  import pylab as pb
    import numpy as np
    import GPy
    pb.ion()
    ker1 = GPy.kern.rbf(D=1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
    ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
    ker3 = GPy.kern.rbf(1, .5, .25)
    ker1.plot()
    ker2.plot()
    ker3.plot()
    #pb.savefig("Figures/tuto_kern_overview_basicdef.png")
    kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1),  GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
    kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
    pb.figure(figsize=(16,12))
    pb.subplots_adjust(wspace=.5, hspace=.5)
    for i, kern in enumerate(kernels):
       pb.subplot(3,4,i+1)
       kern.plot(x=7.5,plot_limits=[0.00001,15.])
       pb.title(kernel_names[i]+ '\n')
       #pb.axes([.1,.1,.8,.7])
       #pb.figtext(.5,.9,'Foo Bar', fontsize=18, ha='center')
       #pb.figtext(.5,.85,'Lorem ipsum dolor sit amet, consectetur adipiscing elit',fontsize=10,ha='center')
    # actual plot for the noise
    i = 11
    X = np.linspace(0.,15.,201)
    WN = 0*X
    WN[100] = 1.
    pb.subplot(3,4,i+1)
    pb.plot(X,WN,'b')
--- a/grid_parameters.py
+++ b/grid_parameters.py
@ -1,64 +0,0 @@
 import numpy as np
 import pylab as pb
 pb.ion()
 import sys
 import GPy
 pb.close('all')
 N = 200
 M = 15
 resolution=5
 X = np.linspace(0,12,N)[:,None]
 Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
 Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
 #k = GPy.kern.rbf(1)
 k = GPy.kern.Matern32(1) + GPy.kern.white(1)
 models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
    ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
 models[0].scale_factor = 1.
 models[1].scale_factor = 10.
 models[2].scale_factor = 100.
 models[3].scale_factor = 1000.
    #GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
    #GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
    #GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
 [m.constrain_fixed('white',0.1) for m in models]
 #xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
 xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
 lls = []
 cgs = []
 grads = []
 count = 0
 for l,v in zip(xx.flatten(),yy.flatten()):
    count += 1
    print count, 'of', resolution**2
    sys.stdout.flush()
    [m.set('lengthscale',l) for m in models]
    [m.set('_variance',10.**v) for m in models]
    lls.append([m.log_likelihood() for m in models])
    grads.append([m.log_likelihood_gradients() for m in models])
    cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
 lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
 cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
 for ll,cg in zip(lls,cgs):
    pb.figure()
    pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
    pb.colorbar()
    try:
        pb.contour(xx,yy,np.exp(ll),colors='k')
    except:
        pass
    pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
    pb.colorbar()
--- a/setup.py
+++ b/setup.py
@ -3,7 +3,7 @@
 import os
 from numpy.distutils.core import Extension, setup
-from sphinx.setup_command import BuildDoc
+#from sphinx.setup_command import BuildDoc
 # Version number
 version = '0.1.3'
@ -19,16 +19,19 @@ setup(name = 'GPy',
      license = "BSD 3-clause",
      keywords = "machine-learning gaussian-processes kernels",
      url = "http://ml.sheffield.ac.uk/GPy/",
-      packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples'],
+      packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
      package_dir={'GPy': 'GPy'},
      package_data = {'GPy': ['GPy/examples']},
      py_modules = ['GPy.__init__'],
      long_description=read('README.md'),
      #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
      #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['mock', 'sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
-      setup_requires=['sphinx'],
+      extras_require = {
-      cmdclass = {'build_sphinx': BuildDoc},
+        'docs':['Sphinx', 'ipython'],
      },
      #setup_requires=['sphinx'],
      #cmdclass = {'build_sphinx': BuildDoc},
      classifiers=[
      "Development Status :: 1 - Alpha",
      "Topic :: Machine Learning",