Revert "Minor edits to reading Lee data in datasets.py"

This reverts commit 730e229238.
2026-05-21 14:05:14 +02:00 · 2014-06-08 21:02:42 +01:00 · 2014-06-08 21:02:42 +01:00 · 0812a0e15c
commit 0812a0e15c
parent 1dfe7ed0a8
57 changed files with 5 additions and 5969 deletions
--- a/GPy/kern/_src/notes.txt
+++ b/GPy/kern/_src/notes.txt
@ -1,5 +0,0 @@
-Combination covariances, with a particular flag (partition?) allow interogation of the individual covariance contribution. So if we have k_3 = k_1 + k_2 because f_3 = f_1 + f_2 then we allow a multi output covariance over f_1, f_2 and f_3 jointly. Then users can make observations of f_3 and f_1 and build a posterior over f_2 using multiple output encoding. When this flag is set an extra input is added to deal with it. We allow for a new 'combination covariance' of the form scale and add so that
-
-f_3 = f_1*a + f_2
-
-Where a can also be negative.
--- a/GPy/kern/_src/ode1_eq_lfm.py
+++ b/GPy/kern/_src/ode1_eq_lfm.py
@ -1,52 +0,0 @@
-try:
-    import sympy as sym
-    sympy_available=True
-except ImportError:
-    sympy_available=False
-
-import numpy as np
-from GPy.util.symbolic import differfln
-from symbolic import Symbolic
-from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
-
-class Ode1_eq_lfm(Symbolic):
-    """
-    A symbolic covariance based on a first order differential equation being driven by a latent force that is an exponentiated quadratic. 
-
-    """
-    def __init__(self, output_dim=1, param=None, name='Ode1_eq_lfm'):
-
-        input_dim = 2
-        x_0, z_0, decay_i, decay_j, lengthscale = sym.symbols('x_0, z_0, decay_i, decay_j, lengthscale', positive=True)
-        scale_i, scale_j = sym.symbols('scale_i, scale_j')
-        # note that covariance only valid for positive time.
-        
-        class sim_h(Function):
-            nargs = 5
-            @classmethod
-            def eval(cls, t, tprime, d_i, d_j, l):
-                half_l_di = 0.5*l*d_i
-                arg_1 = half_l_di + tprime/l
-                arg_2 = half_l_di - (t-tprime)/l
-                ln_part_1 = differfln(arg_1, arg_2)
-                arg_1 = half_l_di 
-                arg_2 = half_l_di - t/l
-                ln_part_2 = differfln(half_l_di, half_l_di - t/l)
-
-                
-                return (exp(half_l_di*half_l_di
-                                     - d_i*(t-tprime)
-                                     + ln_part_1
-                                     - log(d_i + d_j))
-                        - exp(half_l_di*half_l_di
-                              - d_i*t - d_j*tprime
-                              + ln_part_2
-                              - log(d_i + d_j)))
-            
-
-        f = scale_i*scale_j*(sim_h(x_0, z_0, decay_i, decay_j, lengthscale)
-                             + sim_h(z_0, x_0, decay_j, decay_i, lengthscale))
-        # extra input dim is to signify the output dimension. 
-        super(Ode1_eq_lfm, self).__init__(input_dim, k=f, output_dim=output_dim, name=name)
-        self.lengthscale.constrain_positive()
-        self.decay.constrain_positive()
--- a/GPy/kern/_src/old_symbolic.py
+++ b/GPy/kern/_src/old_symbolic.py
@ -1,445 +0,0 @@
-# Check Matthew Rocklin's blog post.
-try:
-    import sympy as sym
-    sympy_available=True
-    from sympy.utilities.lambdify import lambdify
-    from GPy.util.symbolic import stabilise
-except ImportError:
-    sympy_available=False
-
-import numpy as np
-from kern import Kern
-from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
-from GPy.util.functions import normcdf, normcdfln, logistic, logisticln, differfln
-from ...core.parameterization import Param
-
-class Symbolic(Kern):
-    """
-    A kernel object, where all the hard work is done by sympy.
-
-    :param k: the covariance function
-    :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
-
-    To construct a new sympy kernel, you'll need to define:
-     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
-     - that's it! we'll extract the variables from the function k.
-
-    Note:
-     - to handle multiple inputs, call them x_1, z_1, etc
-     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
-    """
-    def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
-
-        if k is None:
-            raise ValueError, "You must provide an argument for the covariance function."
-
-        self.func_modules = func_modules
-        self.func_modules += [{'gamma':gamma,
-                               'gammaln':gammaln,
-                               'erf':erf, 'erfc':erfc,
-                               'erfcx':erfcx,
-                               'polygamma':polygamma,
-                               'differfln':differfln,
-                               'normcdf':normcdf,
-                               'normcdfln':normcdfln,
-                               'logistic':logistic,
-                               'logisticln':logisticln},
-                              'numpy']
-
-        super(Symbolic, self).__init__(input_dim, active_dims, name)
-
-        self._sym_k = k
-
-        # pull the variable names out of the symbolic covariance function.
-        sym_vars = [e for e in k.atoms() if e.is_Symbol]
-        self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
-        self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
-
-        # Check that variable names make sense.
-        assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
-        assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
-        assert len(self._sym_x)==len(self._sym_z)
-        x_dim=len(self._sym_x)
-
-        self._sym_kdiag = k
-        for x, z in zip(self._sym_x, self._sym_z):
-            self._sym_kdiag = self._sym_kdiag.subs(z, x)
-
-        # If it is a multi-output covariance, add an input for indexing the outputs.
-        self._real_input_dim = x_dim
-        # Check input dim is number of xs + 1 if output_dim is >1
-        assert self.input_dim == x_dim + int(output_dim > 1)
-        self.output_dim = output_dim
-
-        # extract parameter names from the covariance
-        thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
-
-        # Look for parameters with index (subscripts), they are associated with different outputs.
-        if self.output_dim>1:
-            self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
-            self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
-
-            # Make sure parameter appears with both indices!
-            assert len(self._sym_theta_i)==len(self._sym_theta_j)
-            assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
-
-            # Extract names of shared parameters (those without a subscript)
-            self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
-
-            self.num_split_params = len(self._sym_theta_i)
-            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
-            # Add split parameters to the model.
-            for theta in self._split_theta_names:
-                # TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
-                setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
-                self.add_parameter(getattr(self, theta))
-
-
-            self.num_shared_params = len(self._sym_theta)
-            for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
-                self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
-
-        else:
-            self.num_split_params = 0
-            self._split_theta_names = []
-            self._sym_theta = thetas
-            self.num_shared_params = len(self._sym_theta)
-
-        # Add parameters to the model.
-        for theta in self._sym_theta:
-            val = 1.0
-            # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
-            if param is not None:
-                if param.has_key(theta.name):
-                    val = param[theta.name]
-            setattr(self, theta.name, Param(theta.name, val, None))
-            self.add_parameters(getattr(self, theta.name))
-
-        # Differentiate with respect to parameters.
-        derivative_arguments = self._sym_x + self._sym_theta
-        if self.output_dim > 1:
-            derivative_arguments += self._sym_theta_i
-
-        self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
-        self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
-
-        # This gives the parameters for the arg list.
-        self.arg_list = self._sym_x + self._sym_z + self._sym_theta
-        self.diag_arg_list = self._sym_x + self._sym_theta
-        if self.output_dim > 1:
-            self.arg_list += self._sym_theta_i + self._sym_theta_j
-            self.diag_arg_list += self._sym_theta_i
-
-        # Check if there are additional linear operators on the covariance.
-        self._sym_operators = operators
-        # TODO: Deal with linear operators
-        #if self._sym_operators:
-        #    for operator in self._sym_operators:
-                
-        # psi_stats aren't yet implemented.
-        if False:
-            self.compute_psi_stats()
-
-        # generate the code for the covariance functions
-        self._gen_code()
-
-    def __add__(self,other):
-        return spkern(self._sym_k+other._sym_k)
-
-    def _gen_code(self):
-        #fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
-        self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
-        self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
-        self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
-        self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
-
-    def K(self,X,X2=None):
-        self._K_computations(X, X2)
-        return self._K_function(**self._arguments)
-
-
-    def Kdiag(self,X):
-        self._K_computations(X)
-        return self._Kdiag_function(**self._diag_arguments)
-
-    def _param_grad_helper(self,partial,X,Z,target):
-        pass
-
-
-    def gradients_X(self, dL_dK, X, X2=None):
-        #if self._X is None or X.base is not self._X.base or X2 is not None:
-        self._K_computations(X, X2)
-        gradients_X = np.zeros_like(X)
-        for i, x in enumerate(self._sym_x):
-            gf = self._K_derivatives_code[x.name]
-            gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
-        if X2 is None:
-            gradients_X *= 2
-        return gradients_X
-
-    def gradients_X_diag(self, dL_dK, X):
-        self._K_computations(X)
-        dX = np.zeros_like(X)
-        for i, x in enumerate(self._sym_x):
-            gf = self._Kdiag_derivatives_code[x.name]
-            dX[:, i] = gf(**self._diag_arguments)*dL_dK
-        return dX
-
-    def update_gradients_full(self, dL_dK, X, X2=None):
-        # Need to extract parameters to local variables first
-        self._K_computations(X, X2)
-        for theta in self._sym_theta:
-            parameter = getattr(self, theta.name)
-            gf = self._K_derivatives_code[theta.name]
-            gradient = (gf(**self._arguments)*dL_dK).sum()
-            if X2 is not None:
-                gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
-            setattr(parameter, 'gradient', gradient)
-        if self.output_dim>1:
-            for theta in self._sym_theta_i:
-                parameter = getattr(self, theta.name[:-2])
-                gf = self._K_derivatives_code[theta.name]
-                A = gf(**self._arguments)*dL_dK
-                gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
-                                       for i in np.arange(self.output_dim)])
-                if X2 is None:
-                    gradient *= 2
-                else:
-                    A = gf(**self._reverse_arguments)*dL_dK.T
-                    gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
-                                 for i in np.arange(self.output_dim)])
-                setattr(parameter, 'gradient', gradient)
-
-
-    def update_gradients_diag(self, dL_dKdiag, X):
-        self._K_computations(X)
-        for theta in self._sym_theta:
-            parameter = getattr(self, theta.name)
-            gf = self._Kdiag_derivatives_code[theta.name]
-            setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
-        if self.output_dim>1:
-            for theta in self._sym_theta_i:
-                parameter = getattr(self, theta.name[:-2])
-                gf = self._Kdiag_derivatives_code[theta.name]
-                a = gf(**self._diag_arguments)*dL_dKdiag
-                setattr(parameter, 'gradient',
-                        np.asarray([a[np.where(self._output_ind==i)].sum()
-                         for i in np.arange(self.output_dim)]))
-
-    def _K_computations(self, X, X2=None):
-        """Set up argument lists for the derivatives."""
-        # Could check if this needs doing or not, there could
-        # definitely be some computational savings by checking for
-        # parameter updates here.
-        self._arguments = {}
-        self._diag_arguments = {}
-        for i, x in enumerate(self._sym_x):
-            self._arguments[x.name] =  X[:, i][:, None]
-            self._diag_arguments[x.name] =  X[:, i][:, None]
-        if self.output_dim > 1:
-            self._output_ind = np.asarray(X[:, -1], dtype='int')
-            for i, theta in enumerate(self._sym_theta_i):
-                self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
-                self._diag_arguments[theta.name] = self._arguments[theta.name]
-        for theta in self._sym_theta:
-            self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
-            self._diag_arguments[theta.name] = self._arguments[theta.name]
-
-        if X2 is not None:
-            for i, z in enumerate(self._sym_z):
-                self._arguments[z.name] =  X2[:, i][None, :]
-            if self.output_dim > 1:
-                self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
-                for i, theta in enumerate(self._sym_theta_j):
-                    self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
-        else:
-            for z in self._sym_z:
-                self._arguments[z.name] =  self._arguments['x_'+z.name[2:]].T
-            if self.output_dim > 1:
-                self._output_ind2 = self._output_ind
-                for theta in self._sym_theta_j:
-                    self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
-        if X2 is not None:
-            # These arguments are needed in gradients when X2 is not equal to X.
-            self._reverse_arguments = self._arguments
-            for x, z in zip(self._sym_x, self._sym_z):
-                self._reverse_arguments[x.name] = self._arguments[z.name].T
-                self._reverse_arguments[z.name] = self._arguments[x.name].T
-            if self.output_dim > 1:
-                for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
-                    self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
-                    self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
-
-if False:
-    class Symcombine(CombinationKernel):
-        """
-        Combine list of given sympy covariances together with the provided operations.
-        """
-        def __init__(self, subkerns, operations, name='sympy_combine'):
-            super(Symcombine, self).__init__(subkerns, name)
-            for subkern, operation in zip(subkerns, operations):
-                self._sym_k += self._k_double_operate(subkern._sym_k, operation)
-
-        #def _double_operate(self, k, operation):
-
-
-        @Cache_this(limit=2, force_kwargs=['which_parts'])
-        def K(self, X, X2=None, which_parts=None):
-            """
-            Combine covariances with a linear operator.
-            """
-            assert X.shape[1] == self.input_dim
-            if which_parts is None:
-                which_parts = self.parts
-            elif not isinstance(which_parts, (list, tuple)):
-                # if only one part is given
-                which_parts = [which_parts]
-            return reduce(np.add, (p.K(X, X2) for p in which_parts))
-
-        @Cache_this(limit=2, force_kwargs=['which_parts'])
-        def Kdiag(self, X, which_parts=None):
-            assert X.shape[1] == self.input_dim
-            if which_parts is None:
-                which_parts = self.parts
-            elif not isinstance(which_parts, (list, tuple)):
-                # if only one part is given
-                which_parts = [which_parts]
-            return reduce(np.add, (p.Kdiag(X) for p in which_parts))
-
-        def update_gradients_full(self, dL_dK, X, X2=None):
-            [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
-
-        def update_gradients_diag(self, dL_dK, X):
-            [p.update_gradients_diag(dL_dK, X) for p in self.parts]
-
-        def gradients_X(self, dL_dK, X, X2=None):
-            """Compute the gradient of the objective function with respect to X.
-
-            :param dL_dK: An array of gradients of the objective function with respect to the covariance function.
-            :type dL_dK: np.ndarray (num_samples x num_inducing)
-            :param X: Observed data inputs
-            :type X: np.ndarray (num_samples x input_dim)
-            :param X2: Observed data inputs (optional, defaults to X)
-            :type X2: np.ndarray (num_inducing x input_dim)"""
-
-            target = np.zeros(X.shape)
-            [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
-            return target
-
-        def gradients_X_diag(self, dL_dKdiag, X):
-            target = np.zeros(X.shape)
-            [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
-            return target
-
-        def psi0(self, Z, variational_posterior):
-            return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
-
-        def psi1(self, Z, variational_posterior):
-            return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
-
-        def psi2(self, Z, variational_posterior):
-            psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
-            #return psi2
-            # compute the "cross" terms
-            from static import White, Bias
-            from rbf import RBF
-            #from rbf_inv import RBFInv
-            from linear import Linear
-            #ffrom fixed import Fixed
-
-            for p1, p2 in itertools.combinations(self.parts, 2):
-                # i1, i2 = p1.active_dims, p2.active_dims
-                # white doesn;t combine with anything
-                if isinstance(p1, White) or isinstance(p2, White):
-                    pass
-                # rbf X bias
-                #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
-                elif isinstance(p1,  Bias) and isinstance(p2, (RBF, Linear)):
-                    tmp = p2.psi1(Z, variational_posterior)
-                    psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
-                #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
-                elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
-                    tmp = p1.psi1(Z, variational_posterior)
-                    psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
-                elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
-                    assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
-                    tmp1 = p1.psi1(Z, variational_posterior)
-                    tmp2 = p2.psi1(Z, variational_posterior)
-                    psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
-                else:
-                    raise NotImplementedError, "psi2 cannot be computed for this kernel"
-            return psi2
-
-        def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-            from static import White, Bias
-            for p1 in self.parts:
-                #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
-                eff_dL_dpsi1 = dL_dpsi1.copy()
-                for p2 in self.parts:
-                    if p2 is p1:
-                        continue
-                    if isinstance(p2, White):
-                        continue
-                    elif isinstance(p2, Bias):
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
-                    else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
-                p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
-
-        def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-            from static import White, Bias
-            target = np.zeros(Z.shape)
-            for p1 in self.parts:
-                #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
-                eff_dL_dpsi1 = dL_dpsi1.copy()
-                for p2 in self.parts:
-                    if p2 is p1:
-                        continue
-                    if isinstance(p2, White):
-                        continue
-                    elif isinstance(p2, Bias):
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
-                    else:
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
-                target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
-            return target
-
-        def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-            from static import White, Bias
-            target_mu = np.zeros(variational_posterior.shape)
-            target_S = np.zeros(variational_posterior.shape)
-            for p1 in self._parameters_:
-                #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
-                eff_dL_dpsi1 = dL_dpsi1.copy()
-                for p2 in self._parameters_:
-                    if p2 is p1:
-                        continue
-                    if isinstance(p2, White):
-                        continue
-                    elif isinstance(p2, Bias):
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
-                    else:
-                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
-                a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
-                target_mu += a
-                target_S += b
-            return target_mu, target_S
-
-        def _getstate(self):
-            """
-            Get the current state of the class,
-            here just all the indices, rest can get recomputed
-            """
-            return super(Add, self)._getstate()
-
-        def _setstate(self, state):
-            super(Add, self)._setstate(state)
-
-        def add(self, other, name='sum'):
-            if isinstance(other, Add):
-                other_params = other._parameters_.copy()
-                for p in other_params:
-                    other.remove_parameter(p)
-                self.add_parameters(*other_params)
-            else: self.add_parameter(other)
-            return self
--- a/GPy/kern/_src/old_sympykern.py
+++ b/GPy/kern/_src/old_sympykern.py
@ -1,564 +0,0 @@
-# Check Matthew Rocklin's blog post.
-try: 
-    import sympy as sp
-    sympy_available=True
-    from sympy.utilities.autowrap import ufuncify
-except ImportError:
-    sympy_available=False
-    exit()
-
-from sympy.core.cache import clear_cache
-from sympy.utilities.codegen import codegen
-
-try:
-    from scipy import weave
-    weave_available = True
-except ImportError:
-    weave_available = False
-
-import os
-current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
-import sys
-import numpy as np
-import re
-import tempfile
-import pdb
-import ast
-
-from kernpart import Kernpart
-from ...core.parameterization import Param
-from ...core.parameterization.transformations import Logexp
-# TODO have this set up in a set up file!
-user_code_storage = tempfile.gettempdir()
-
-class spkern(Kern):
-    """
-    A kernel object, where all the hard work in done by sympy.
-
-    :param k: the covariance function
-    :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
-
-    To construct a new sympy kernel, you'll need to define:
-     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
-     - that's it! we'll extract the variables from the function k.
-
-    Note:
-     - to handle multiple inputs, call them x_1, z_1, etc
-     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
-    """
-    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
-
-        if name is None:
-            name='sympykern'
-        if k is None:
-            raise ValueError, "You must provide an argument for the covariance function."
-        super(spkern, self).__init__(input_dim, name)
-
-        self._sp_k = k
-
-        # pull the variable names out of the symbolic covariance function.
-        sp_vars = [e for e in k.atoms() if e.is_Symbol]
-        self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
-        self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
-
-        # Check that variable names make sense.
-        assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
-        assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
-        assert len(self._sp_x)==len(self._sp_z)
-        x_dim=len(self._sp_x)
-
-        # If it is a multi-output covariance, add an input for indexing the outputs.
-        self._real_input_dim = x_dim
-        # Check input dim is number of xs + 1 if output_dim is >1
-        assert self.input_dim == x_dim + int(output_dim > 1)
-        self.output_dim = output_dim
-
-        # extract parameter names from the covariance
-        thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
-
-
-        # Look for parameters with index (subscripts), they are associated with different outputs.
-        if self.output_dim>1:
-            self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
-            self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
-
-            # Make sure parameter appears with both indices!
-            assert len(self._sp_theta_i)==len(self._sp_theta_j)
-            assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
-
-            # Extract names of shared parameters (those without a subscript)
-            self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
-            
-            self.num_split_params = len(self._sp_theta_i)
-            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
-            for theta in self._split_theta_names:
-                setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
-                self.add_parameters(getattr(self, theta))
-
-                #setattr(self, theta, np.ones(self.output_dim))
-            
-            self.num_shared_params = len(self._sp_theta)
-            #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
-            
-        else:
-            self.num_split_params = 0
-            self._split_theta_names = []
-            self._sp_theta = thetas
-            self.num_shared_params = len(self._sp_theta)
-            #self.num_params = self.num_shared_params
-
-        # Add parameters to the model.
-        for theta in self._sp_theta:
-            val = 1.0
-            if param is not None:
-                if param.has_key(theta):
-                    val = param[theta]
-            #setattr(self, theta.name, val)
-            setattr(self, theta.name, Param(theta.name, val, None))
-            self.add_parameters(getattr(self, theta.name))
-        #deal with param            
-        #self._set_params(self._get_params())
-
-        # Differentiate with respect to parameters.
-        self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
-        if self.output_dim > 1:
-            self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
-
-        # differentiate with respect to input variables.
-        self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
-
-        # psi_stats aren't yet implemented.
-        if False:
-            self.compute_psi_stats()
-
-        self._code = {}
-
-        # generate the code for the covariance functions
-        self._gen_code()
-
-        if weave_available:
-            if False:
-                extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
-            else:
-                extra_compile_args = []
-            
-                self.weave_kwargs = {
-                    'support_code': None, #self._function_code,
-                    'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
-                    'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
-                    'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
-                    'extra_compile_args':extra_compile_args,
-                    'extra_link_args':['-lgomp'],
-                    'verbose':True}
-        self.parameters_changed() # initializes caches
-
-
-    def __add__(self,other):
-        return spkern(self._sp_k+other._sp_k)
-
-    def _gen_code(self):
-
-        argument_sequence = self._sp_x+self._sp_z+self._sp_theta
-        code_list = [('k',self._sp_k)]
-        # gradients with respect to covariance input
-        code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
-        # gradient with respect to parameters
-        code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
-        # gradient with respect to multiple output parameters
-        if self.output_dim > 1:
-            argument_sequence += self._sp_theta_i + self._sp_theta_j
-            code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
-        # generate c functions from sympy objects
-        if weave_available:
-            code_type = "C"
-        else:
-            code_type = "PYTHON"
-        # Need to add the sympy_helpers header in here.
-        (foo_c,self._function_code), (foo_h,self._function_header) = \
-                                     codegen(code_list,
-                                             code_type,
-                                             self.name,
-                                             argument_sequence=argument_sequence)
-
-
-        # Use weave to compute the underlying functions.
-        if weave_available:
-            # put the header file where we can find it
-            f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
-            f.write(self._function_header)
-            f.close()
-    
-
-        if weave_available:
-            # Substitute any known derivatives which sympy doesn't compute
-            self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
-            # put the cpp file in user code storage (defaults to temp file location)
-            f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
-        else:
-            # put the python file in user code storage
-            f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
-        f.write(self._function_code)
-        f.close()
-
-        if weave_available:
-            # arg_list will store the arguments required for the C code.
-            input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
-                        + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
-
-            # for multiple outputs reverse argument list is also required
-            if self.output_dim>1:
-                reverse_input_arg_list = list(input_arg_list)
-                reverse_input_arg_list.reverse()
-
-            # This gives the parameters for the arg list.
-            param_arg_list = [shared_params.name for shared_params in self._sp_theta]
-            arg_list = input_arg_list + param_arg_list
-
-            precompute_list=[]
-            if self.output_dim > 1:
-                reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
-                # For multiple outputs, also need the split parameters.
-                split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
-                split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
-                arg_list += split_param_arg_list
-                reverse_arg_list += split_param_reverse_arg_list
-                # Extract the right output indices from the inputs.
-                c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
-                precompute_list += c_define_output_indices
-                reverse_arg_string = ", ".join(reverse_arg_list)
-            arg_string = ", ".join(arg_list)
-            precompute_string = "\n".join(precompute_list)
-
-            # Now we use the arguments in code that computes the separate parts.
-
-            # Any precomputations will be done here eventually.
-            self._precompute = \
-                """
-                // Precompute code would go here. It will be called when parameters are updated. 
-                """
-
-            # Here's the code to do the looping for K
-            self._code['K'] =\
-            """
-            // _K_code
-            // Code for computing the covariance function.
-            int i;
-            int j;
-            int n = target_array->dimensions[0];
-            int num_inducing = target_array->dimensions[1];
-            int input_dim = X_array->dimensions[1];
-            //#pragma omp parallel for private(j)
-            for (i=0;i<n;i++){
-                for (j=0;j<num_inducing;j++){
-                    %s
-                    //target[i*num_inducing+j] = 
-                    TARGET2(i, j) += k(%s);
-                }
-            }
-            %s
-            """%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
-           # adding a string representation of the function in the
-           # comment forces recompile when needed
-            self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
-
-
-            # Code to compute diagonal of covariance.
-            diag_arg_string = re.sub('Z','X',arg_string)
-            diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
-            diag_arg_string = re.sub('j','i',diag_arg_string)
-            diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
-            diag_precompute_string = re.sub('Z','X',diag_precompute_string)
-            diag_precompute_string = re.sub('j','i',diag_precompute_string)
-            # Code to do the looping for Kdiag
-            self._code['Kdiag'] =\
-            """
-            // _code['Kdiag']
-            // Code for computing diagonal of covariance function.
-            int i;
-            int n = target_array->dimensions[0];
-            int input_dim = X_array->dimensions[1];
-            //#pragma omp parallel for
-            for (i=0;i<n;i++){
-                    %s
-                    //target[i] =
-                    TARGET1(i)=k(%s);
-            }
-            %s
-            """%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-            # Code to compute gradients
-            if self.output_dim>1:
-                for i, theta in enumerate(self._sp_theta_i):
-                    grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
-                    grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
-                    grad_func_list = c_define_output_indices+grad_func_list
-
-                    grad_func_string = '\n'.join(grad_func_list) 
-                    self._code['dK_d' + theta.name] =\
-                      """
-                      int i;
-                      int j;
-                      int n = partial_array->dimensions[0];
-                      int num_inducing = partial_array->dimensions[1];
-                      int input_dim = X_array->dimensions[1];
-                      //#pragma omp parallel for private(j)
-                      for (i=0;i<n;i++){
-                        for (j=0;j<num_inducing;j++){
-%s
-                        }
-                      }
-                      %s
-                      """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
-                    self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
-                    # Code to compute gradients for Kdiag TODO: needs clean up
-                    diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
-                    diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
-                    diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
-                    diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
-                    self._code['dKdiag_d' + theta.name] =\
-                      """
-                      // _dKdiag_dtheta_code
-                      // Code for computing gradient of diagonal with respect to parameters.
-                      int i;
-                      int n = partial_array->dimensions[0];
-                      int input_dim = X_array->dimensions[1];
-                      for (i=0;i<n;i++){
-                        %s
-                      }
-                      %s
-                      """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-            for i, theta in enumerate(self._sp_theta):
-                grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
-                grad_func_string = '\n'.join(grad_func_list) 
-
-                self._code['dK_d' + theta.name] =\
-                  """
-                  // _dK_dtheta_code
-                  // Code for computing gradient of covariance with respect to parameters.
-                  int i;
-                  int j;
-                  int n = partial_array->dimensions[0];
-                  int num_inducing = partial_array->dimensions[1];
-                  int input_dim = X_array->dimensions[1];
-                  //#pragma omp parallel for private(j)
-                  for (i=0;i<n;i++){
-                    for (j=0;j<num_inducing;j++){
-                      %s
-                    }
-                  }
-                  %s
-                  """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
-                self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
-                # Code to compute gradients for Kdiag TODO: needs clean up
-                diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
-                diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
-                diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
-                diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
-                self._code['dKdiag_d' + theta.name] =\
-                   """
-                   // _dKdiag_dtheta_code
-                   // Code for computing gradient of diagonal with respect to parameters.
-                   int i;
-                   int n = partial_array->dimensions[0];
-                   int input_dim = X_array->dimensions[1];
-                   for (i=0;i<n;i++){
-                     %s
-                   }
-                   %s
-                   """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-
-
-            # Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
-            gradX_func_list = []
-            if self.output_dim>1:
-                gradX_func_list += c_define_output_indices
-            gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
-            gradX_func_string = "\n".join(gradX_func_list)
-
-            self._code['dK_dX'] = \
-            """
-            // _dK_dX_code
-            // Code for computing gradient of covariance with respect to inputs.
-            int i;
-            int j;
-            int n = partial_array->dimensions[0];
-            int num_inducing = partial_array->dimensions[1];
-            int input_dim = X_array->dimensions[1];
-            //#pragma omp parallel for private(j)
-            for (i=0;i<n; i++){
-              for (j=0; j<num_inducing; j++){
-                %s
-              }
-            }
-            %s
-            """%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-            self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
-
-
-            diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
-            diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
-            diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
-            diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
-
-            # Code for gradients of Kdiag wrt X
-            self._code['dKdiag_dX'] = \
-            """
-            // _dKdiag_dX_code
-            // Code for computing gradient of diagonal with respect to inputs.
-            int n = partial_array->dimensions[0];
-            int input_dim = X_array->dimensions[1];
-            for (int i=0;i<n; i++){
-                %s
-            }
-            %s
-            """%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
-            # string representation forces recompile when needed Get rid
-            # of Zs in argument for diagonal. TODO: Why wasn't
-            # diag_func_string called here? Need to check that.
-
-            
-
-
-            #TODO: insert multiple functions here via string manipulation
-            #TODO: similar functions for psi_stats
-            #TODO: similar functions when cython available.
-            #TODO: similar functions when only python available.
-            
-    def _get_arg_names(self, target=None, Z=None, partial=None):
-        arg_names = ['X']
-        if target is not None:
-            arg_names += ['target']
-        for shared_params in self._sp_theta:
-            arg_names += [shared_params.name]
-        if Z is not None:
-            arg_names += ['Z']
-        if partial is not None:
-            arg_names += ['partial']
-        if self.output_dim>1:
-            arg_names += self._split_theta_names
-            arg_names += ['output_dim']
-        return arg_names
-
-    def _generate_inline(self, code, X, target=None, Z=None, partial=None):
-        output_dim = self.output_dim
-        # Need to extract parameters to local variables first
-        for shared_params in self._sp_theta:
-            locals()[shared_params.name] = getattr(self, shared_params.name)
-            
-        for split_params in self._split_theta_names:
-            locals()[split_params] = np.asarray(getattr(self, split_params))
-        arg_names = self._get_arg_names(target, Z, partial)        
-
-        if weave_available:
-            return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
-        else:
-            raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
-
-    def K(self,X,Z,target):        
-        if Z is None:
-            self._generate_inline(self._code['K_X'], X, target)
-        else:
-            self._generate_inline(self._code['K'], X, target, Z)
-
-
-    def Kdiag(self,X,target):
-        self._generate_inline(self._code['Kdiag'], X, target)
-
-    def _param_grad_helper(self,partial,X,Z,target):
-        if Z is None:
-            self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
-        else:
-            self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
-
-    def dKdiag_dtheta(self,partial,X,target):
-        self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
-               
-    def gradients_X(self,partial,X,Z,target):
-        if Z is None:
-            self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
-        else:
-            self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
-
-    def dKdiag_dX(self,partial,X,target):
-        self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
-
-    def compute_psi_stats(self):
-        #define some normal distributions
-        mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
-        Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
-        normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
-
-        #do some integration!
-        #self._sp_psi0 = ??
-        self._sp_psi1 = self._sp_k
-        for i in range(self.input_dim):
-            print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
-            sys.stdout.flush()
-            self._sp_psi1 *= normals[i]
-            self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
-            clear_cache()
-        self._sp_psi1 = self._sp_psi1.simplify()
-
-        #and here's psi2 (eek!)
-        zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
-        self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
-        for i in range(self.input_dim):
-            print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
-            sys.stdout.flush()
-            self._sp_psi2 *= normals[i]
-            self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
-            clear_cache()
-        self._sp_psi2 = self._sp_psi2.simplify()
-
-    def parameters_changed(self):
-        # Reset the caches
-        self._cache, self._cache2 = np.empty(shape=(2, 1))
-        self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1)) 
-
-    def update_gradients_full(self, dL_dK, X):
-        # Need to extract parameters to local variables first
-        self._K_computations(X, None)
-        for shared_params in self._sp_theta:
-            parameter = getattr(self, shared_params.name)
-            code = self._code['dK_d' + shared_params.name]
-            setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
-            
-        for split_params in self._split_theta_names:
-            parameter = getattr(self, split_params.name)
-            code = self._code['dK_d' + split_params.name]
-            setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
-    
-
-    # def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
-    #     #contributions from Kdiag
-    #     self.variance.gradient = np.sum(dL_dKdiag)
-        
-    #     #from Knm
-    #     self._K_computations(X, Z)
-    #     self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
-    #     if self.ARD:
-    #         self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
-            
-    #     else:
-    #         self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
-            
-    #     #from Kmm
-    #     self._K_computations(Z, None)
-    #     self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
-    #     if self.ARD:
-    #         self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
-    #     else:
-    #         self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
-            
-            
-    #---------------------------------------#
-    #            Precomputations            #
-    #---------------------------------------#
-
-    def _K_computations(self, X, Z):
-        if Z is None:
-            self._generate_inline(self._precompute, X)
-        else:
-            self._generate_inline(self._precompute, X, Z=Z)
--- a/GPy/kern/_src/poly_tmp.py
+++ b/GPy/kern/_src/poly_tmp.py
@ -1,134 +0,0 @@
-# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-from kernpart import Kernpart
-import numpy as np
-four_over_tau = 2./np.pi
-
-class POLY(Kernpart):
-    """
-
-    Polynomial kernel parameter initialisation.  Included for completeness, but generally not recommended, is the polynomial kernel:
-
-    .. math::
-        k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
-
-    The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
-    (weight_variance), :math:`\sigma^2_b` (bias_variance) and d
-    (degree). Only gradients of the first three are provided for
-    kernel optimisation, it is assumed that polynomial degree would
-    be set by hand.
-
-    The kernel is not recommended as it is badly behaved when the
-    :math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
-    there is an automatic relevance determination version of this
-    kernel provided (NOTE YET IMPLEMENTED!).
-    :param input_dim: the number of input dimensions
-    :type input_dim: int 
-    :param variance: the variance :math:`\sigma^2`
-    :type variance: float
-    :param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
-    :type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
-    :param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
-    :param degree: the degree of the polynomial.
-    :type degree: int
-    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
-    :type ARD: Boolean
-    :rtype: Kernpart object
-
-    """
-
-    def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
-        self.input_dim = input_dim
-        self.ARD = ARD
-        if not ARD:
-            self.num_params=3
-            if weight_variance is not None:
-                weight_variance = np.asarray(weight_variance)
-                assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
-            else:
-                weight_variance = 1.*np.ones(1)
-        else:
-            self.num_params = self.input_dim + 2
-            if weight_variance is not None:
-                weight_variance = np.asarray(weight_variance)
-                assert weight_variance.size == self.input_dim, "bad number of weight variances"
-            else:
-                weight_variance = np.ones(self.input_dim)
-            raise NotImplementedError
-        self.degree=degree
-        self.name='poly_deg' + str(self.degree)
-        self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
-
-    def _get_params(self):
-        return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
-
-    def _set_params(self, x):
-        assert x.size == (self.num_params)
-        self.variance = x[0]
-        self.weight_variance = x[1:-1]
-        self.weight_std = np.sqrt(self.weight_variance)
-        self.bias_variance = x[-1]
-
-    def _get_param_names(self):
-        if self.num_params == 3:
-            return ['variance', 'weight_variance', 'bias_variance']
-        else:
-            return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
-
-    def K(self, X, X2, target):
-        """Return covariance between X and X2."""
-        self._K_computations(X, X2)
-        target += self.variance*self._K_dvar
-
-    def Kdiag(self, X, target):
-        """Compute the diagonal of the covariance matrix for X."""
-        self._K_diag_computations(X)
-        target+= self.variance*self._K_diag_dvar
-
-    def dK_dtheta(self, dL_dK, X, X2, target):
-        """Derivative of the covariance with respect to the parameters."""
-        self._K_computations(X, X2)
-        base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
-        base_cov_grad = base*dL_dK
-
-
-            
-        target[0] += np.sum(self._K_dvar*dL_dK)
-        target[1] += (self._K_inner_prod*base_cov_grad).sum()
-        target[2] += base_cov_grad.sum()
-
-
-    def dK_dX(self, dL_dK, X, X2, target):
-        """Derivative of the covariance matrix with respect to X"""
-        self._K_computations(X, X2)
-        arg = self._K_poly_arg
-        if X2 is None:
-            target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
-        else:
-            target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
-            
-    def dKdiag_dX(self, dL_dKdiag, X, target):
-        """Gradient of diagonal of covariance with respect to X"""
-        self._K_diag_computations(X)
-        arg = self._K_diag_poly_arg
-        target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
-    
-    
-    def _K_computations(self, X, X2):
-        if self.ARD:
-            pass
-        else:
-            if X2 is None:
-                self._K_inner_prod = np.dot(X,X.T)
-            else:
-                self._K_inner_prod = np.dot(X,X2.T)
-            self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
-        self._K_dvar = self._K_poly_arg**self.degree
-
-    def _K_diag_computations(self, X):
-        if self.ARD:
-            pass
-        else:
-            self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
-        self._K_diag_dvar = self._K_diag_poly_arg**self.degree
--- a/GPy/kern/_src/symbolic2.py
+++ b/GPy/kern/_src/symbolic2.py
@ -1,70 +0,0 @@
-# Check Matthew Rocklin's blog post.
-import sympy as sym
-import numpy as np
-from kern import Kern
-from ...core.symbolic import Symbolic_core
-
-
-class Symbolic(Kern, Symbolic_core):
-    """
-    """
-    def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
-
-        if k is None:
-            raise ValueError, "You must provide an argument for the covariance function."
-
-        Kern.__init__(self, input_dim, active_dims, name=name)
-        kdiag = k
-        self.cacheable = ['X', 'Z']
-        Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)        
-        self.output_dim = output_dim
-
-    def __add__(self,other):
-        return spkern(self._sym_k+other._sym_k)
-
-    def _set_expressions(self, expressions):
-        """This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
-        Symbolic_core._set_expressions(self, expressions)
-        Symbolic_core._set_variables(self, self.cacheable)
-        # Substitute z with x to obtain kdiag.
-        for x, z in zip(self.variables['X'], self.variables['Z']):
-            expressions['kdiag'] = expressions['kdiag'].subs(z, x)
-        Symbolic_core._set_expressions(self, expressions)
-            
-        
-    def K(self,X,X2=None):
-        if X2 is None:
-            return self.eval_function('k', X=X, Z=X)
-        else:
-            return self.eval_function('k', X=X, Z=X2)
-
-
-    def Kdiag(self,X):
-        d = self.eval_function('kdiag', X=X)
-        if not d.shape[0] == X.shape[0]:
-            d = np.tile(d, (X.shape[0], 1))
-        return d
-
-
-    def gradients_X(self, dL_dK, X, X2=None):
-        #if self._X is None or X.base is not self._X.base or X2 is not None:
-        g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
-        if X2 is None:
-            g *= 2
-        return g
-
-    def gradients_X_diag(self, dL_dK, X):
-        return self.eval_gradients_X('kdiag', dL_dK, X=X)
-
-    def update_gradients_full(self, dL_dK, X, X2=None):
-        # Need to extract parameters to local variables first
-        if X2 is None:
-            # need to double this inside ...
-            self.eval_update_gradients('k', dL_dK, X=X)
-        else:
-            self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
-
-
-    def update_gradients_diag(self, dL_dKdiag, X):
-        self.eval_update_gradients('kdiag', dL_dKdiag, X)
-
--- a/GPy/kern/matlab_construct.py
+++ b/GPy/kern/matlab_construct.py
@ -1,17 +0,0 @@
-# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Construct covariance functions from matlab saves.
-import numpy as np
-from kern import kern
-import parts
-import scipy.io
-
-def read_matlab(mat_data)
-    mat_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'frey_rawface.mat'))
-    if mat_data['type']=='cmpnd':
-        # cmpnd kernel
-        types = []
-        for i in range(mat_data['comp'][0][0]):
-            types.append(mat_data['comp'][0][0][i]
-        
--- a/GPy/kern/parts/eq.py
+++ b/GPy/kern/parts/eq.py
@ -1,318 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import Kernpart
-import numpy as np
-from scipy import weave
-from ...util.linalg import tdot
-from ...util.misc import fast_array_equal
-
-class EQ(Kernpart_stationary):
-    """
-    Exponentiated Quadratic covariance function als known as radial basis function, squared-exponential or Gaussian kernel.
-
-    .. math::
-
-       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \  \\text{ where  } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
-
-    where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
-
-    :param input_dim: the number of input dimensions
-    :type input_dim: int
-    :param variance: the variance of the kernel
-    :type variance: float
-    :param lengthscale: the vector of lengthscale of the kernel
-    :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
-    :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
-    :type ARD: Boolean
-    :rtype: kernel object
-
-    .. Note: this object implements both the ARD and 'spherical' version of the function
-    """
-
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
-        Kernpart_stationary.__init__(self, input_dim, lengthscale, ARD)
-        self.name = 'rbf'
-        self._set_params(np.hstack((variance, self.lengthscale.flatten())))
-
-        # a set of optional args to pass to weave
-        self.weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
-                         'extra_link_args'   : ['-lgomp']}
-
-
-
-    def _get_params(self):
-        return np.hstack((self.variance, self.lengthscale))
-
-    def _set_params(self, x):
-        assert x.size == (self.num_params)
-        self.variance = x[0]
-        Kernpart_stationary._set_params(self, x[1:])
-
-    def _get_param_names(self):
-        if self.num_params == 2:
-            return ['variance', 'lengthscale']
-        else:
-            return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
-
-    def K(self, X, X2, target):
-        self._K_computations(X, X2)
-        target += self.variance * self._K_dvar
-
-    def Kdiag(self, X, target):
-        np.add(target, self.variance, target)
-
-    def dK_dtheta(self, dL_dK, X, X2, target):
-        self._K_computations(X, X2)
-        target[0] += np.sum(self._K_dvar * dL_dK)
-        if self.ARD:
-            dvardLdK = self._K_dvar * dL_dK
-            var_len3 = self.variance / np.power(self.lengthscale, 3)
-            if X2 is None:
-                # save computation for the symmetrical case
-                dvardLdK = dvardLdK + dvardLdK.T
-                code = """
-                int q,i,j;
-                double tmp;
-                for(q=0; q<input_dim; q++){
-                  tmp = 0;
-                  for(i=0; i<num_data; i++){
-                    for(j=0; j<i; j++){
-                      tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
-                    }
-                  }
-                  target(q+1) += var_len3(q)*tmp;
-                }
-                """
-                num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
-                weave.inline(code, arg_names=['num_data',
-                                              'num_inducing',
-                                              'input_dim',
-                                              'X', 'X2',
-                                              'target',
-                                              'dvardLdK',
-                                              'var_len3'],
-                             type_converters=weave.converters.blitz,
-                             **self.weave_options)
-            else:
-                code = """
-                int q,i,j;
-                double tmp;
-                for(q=0; q<input_dim; q++){
-                  tmp = 0;
-                  for(i=0; i<num_data; i++){
-                    for(j=0; j<num_inducing; j++){
-                      tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
-                    }
-                  }
-                  target(q+1) += var_len3(q)*tmp;
-                }
-                """
-                num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
-                # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
-                weave.inline(code, arg_names=['num_data',
-                                              'num_inducing',
-                                              'input_dim',
-                                              'X', 'X2',
-                                              'target',
-                                              'dvardLdK',
-                                              'var_len3'],
-                             type_converters=weave.converters.blitz,
-                             **self.weave_options)
-        else:
-            target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
-
-
-    def dK_dX(self, dL_dK, X, X2, target):
-        self._K_computations(X, X2)
-        _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
-        dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
-        target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
-
-    def dKdiag_dX(self, dL_dKdiag, X, target):
-        pass
-
-
-    #---------------------------------------#
-    #             PSI statistics            #
-    #---------------------------------------#
-
-    def psi0(self, Z, mu, S, target):
-        target += self.variance
-
-    def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
-        target[0] += np.sum(dL_dpsi0)
-
-    def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
-        pass
-
-    def psi1(self, Z, mu, S, target):
-        self._psi_computations(Z, mu, S)
-        target += self._psi1
-
-    def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
-        self._psi_computations(Z, mu, S)
-        target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
-        d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
-        dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
-        if not self.ARD:
-            target[1] += dpsi1_dlength.sum()
-        else:
-            target[1:] += dpsi1_dlength.sum(0).sum(0)
-
-    def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
-        self._psi_computations(Z, mu, S)
-        denominator = (self.lengthscale2 * (self._psi1_denom))
-        dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
-        target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
-
-    def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
-        self._psi_computations(Z, mu, S)
-        tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
-        target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
-        target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
-
-    def psi2(self, Z, mu, S, target):
-        self._psi_computations(Z, mu, S)
-        target += self._psi2
-
-    def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
-        """Shape N,num_inducing,num_inducing,Ntheta"""
-        self._psi_computations(Z, mu, S)
-        d_var = 2.*self._psi2 / self.variance
-        d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
-
-        target[0] += np.sum(dL_dpsi2 * d_var)
-        dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
-        if not self.ARD:
-            target[1] += dpsi2_dlength.sum()
-        else:
-            target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
-
-    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
-        self._psi_computations(Z, mu, S)
-        term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
-        term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
-        dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
-        target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
-
-    def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
-        """Think N,num_inducing,num_inducing,input_dim """
-        self._psi_computations(Z, mu, S)
-        tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
-        target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
-        target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
-
-    #---------------------------------------#
-    #            Precomputations            #
-    #---------------------------------------#
-
-    def _K_computations(self, X, X2):
-        params = self._get_params()
-        if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , params)):
-            self._X = X.copy()
-            self._params = params.copy()
-            if X2 is None:
-                self._X2 = None
-                X = X / self.lengthscale
-                Xsquare = np.sum(np.square(X), 1)
-                self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
-            else:
-                self._X2 = X2.copy()
-                X = X / self.lengthscale
-                X2 = X2 / self.lengthscale
-                self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
-            self._K_dvar = np.exp(-0.5 * self._K_dist2)
-
-    def _psi_computations(self, Z, mu, S):
-        # here are the "statistics" for psi1 and psi2
-        if not fast_array_equal(Z, self._Z):
-            # Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
-            self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
-
-        if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
-            # something's changed. recompute EVERYTHING
-
-            # psi1
-            self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
-            self._psi1_dist = Z[None, :, :] - mu[:, None, :]
-            self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
-            self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
-            self._psi1 = self.variance * np.exp(self._psi1_exponent)
-
-            # psi2
-            self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
-            self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
-            # self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
-            # self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
-            # self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
-            self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
-
-            # store matrices for caching
-            self._Z, self._mu, self._S = Z, mu, S
-
-    def weave_psi2(self, mu, Zhat):
-        N, input_dim = mu.shape
-        num_inducing = Zhat.shape[0]
-
-        mudist = np.empty((N, num_inducing, num_inducing, input_dim))
-        mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
-        psi2_exponent = np.zeros((N, num_inducing, num_inducing))
-        psi2 = np.empty((N, num_inducing, num_inducing))
-
-        psi2_Zdist_sq = self._psi2_Zdist_sq
-        _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
-        half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
-        variance_sq = float(np.square(self.variance))
-        if self.ARD:
-            lengthscale2 = self.lengthscale2
-        else:
-            lengthscale2 = np.ones(input_dim) * self.lengthscale2
-        code = """
-        double tmp;
-
-        #pragma omp parallel for private(tmp)
-        for (int n=0; n<N; n++){
-            for (int m=0; m<num_inducing; m++){
-               for (int mm=0; mm<(m+1); mm++){
-                   for (int q=0; q<input_dim; q++){
-                       //compute mudist
-                       tmp = mu(n,q) - Zhat(m,mm,q);
-                       mudist(n,m,mm,q) = tmp;
-                       mudist(n,mm,m,q) = tmp;
-
-                       //now mudist_sq
-                       tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
-                       mudist_sq(n,m,mm,q) = tmp;
-                       mudist_sq(n,mm,m,q) = tmp;
-
-                       //now psi2_exponent
-                       tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
-                       psi2_exponent(n,mm,m) += tmp;
-                       if (m !=mm){
-                           psi2_exponent(n,m,mm) += tmp;
-                       }
-                   //psi2 would be computed like this, but np is faster
-                   //tmp = variance_sq*exp(psi2_exponent(n,m,mm));
-                   //psi2(n,m,mm) = tmp;
-                   //psi2(n,mm,m) = tmp;
-                   }
-                }
-            }
-        }
-
-        """
-
-        support_code = """
-        #include <omp.h>
-        #include <math.h>
-        """
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
-                     arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
-                     type_converters=weave.converters.blitz, **self.weave_options)
-
-        return mudist, mudist_sq, psi2_exponent, psi2
--- a/GPy/kern/parts/ode1.py
+++ b/GPy/kern/parts/ode1.py
@ -1,428 +0,0 @@
-# Copyright (c) 2012, James Hensman and Ricardo Andrade
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-from kernpart import Kernpart
-import numpy as np
-from GPy.util.linalg import mdot, pdinv
-from GPy.util import ln_diff_erfs
-import pdb
-from scipy import weave
-
-class Ode1(Kernpart):
-    """
-    Covariance function for first order differential equation driven by an exponetiated quadratic covariance.
-
-    This kernel has the form
-    .. math::
-
-    :param num_outputs: number of outputs driven by latent function.
-    :type num_outputs: int
-
-    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
-    """
-    def __init__(self,num_outputs, W=None, rank=1, delay=None, kappa=None):
-        self.rank = rank
-        self.input_dim = 1
-        self.name = 'ode1'
-        self.num_outputs = num_outputs
-        self.num_params = self.num_outputs*(1. + self.rank) + 1
-        if kappa is not None:
-            self.num_params+=num_outputs
-        if delay is not None:
-            self.num_params+=num_outputs
-        self.rank = rank
-        if W is None:
-            self.W = 0.5*np.random.randn(self.num_outputs,self.rank)/np.sqrt(self.rank)
-        else:
-            assert W.shape==(self.num_outputs,self.rank)
-            self.W = W
-            
-        if kappa is not None:
-            assert kappa.shape==(self.num_outputs,)
-        self.kappa = kappa
-
-        if delay is not None:
-            assert delay.shape==(self.num_outputs,)
-        self.delay = delay
-        self._set_params(self._get_params())
-
-    def _get_params(self):
-        param_list = [self.W.flatten()]
-        if self.kappa is not None:
-            param_list.append(self.kappa)
-        param_list.append(self.decay)
-        if self.delay is not None:
-            param_list.append(self.delay)
-        param_list.append(self.length_scale)
-        return np.hstack(param_list)
-
-    def _set_params(self,x):
-        assert x.size == self.num_params
-        end = self.num_outputs*self.rank
-        self.W = x[:end].reshape(self.num_outputs,self.rank)
-        start = end
-        self.B = np.dot(self.W,self.W.T)
-        if self.kappa is not None:
-            end+=self.num_outputs
-            self.kappa = x[start:end]
-            self.B += np.diag(self.kappa)
-            start=end
-        end+=num_outputs
-        self.decay = x[start:end]
-        start=end
-        if self.delay is not None:
-            end+=num_outputs
-            self.delay = x[start:end]
-            start=end
-        end+=1
-        self.length_scale = x[start]
-        self.sigma = np.sqrt(2)*self.length_scale
-
-
-    def _get_param_names(self):
-        param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.num_outputs)],[])
-        if self.kappa is not None:
-            param_names += ['kappa_%i'%i for i in range(self.num_outputs)]
-        param_names += ['decay_%i'%i for i in range(self.num_outputs)]
-        if self.delay is not None:
-            param_names += ['delay_%i'%i for i in range(self.num_outputs)]
-        param_names+= ['length_scale'] 
-        return param_names
-
-    def K(self,X,X2,target):
-        
-        if X.shape[1] > 2:
-            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
-
-        self._K_computations()
-        target += self._scales*self._dK_dvar
-
-        if self.gaussian_initial:
-            # Add covariance associated with initial condition.
-            t1_mat = self._t[self._rorder, None]
-            t2_mat = self._t2[None, self._rorder2]
-            target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
-
-
-
-    def Kdiag(self,index,target):
-        #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
-        pass
-    
-    def dK_dtheta(self,dL_dK,index,index2,target):
-        pass
-
-    def dKdiag_dtheta(self,dL_dKdiag,index,target):
-        pass
-
-    def dK_dX(self,dL_dK,X,X2,target):
-        pass
-
-    def _extract_t_indices(X, X2=None):
-        """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
-
-        # TODO: some fast checking here to see if this needs recomputing?
-        self._t = X[:, 0]
-        if X.shape[1]==1:
-            # No index passed, assume single output of ode model.
-            self._index = np.ones_like(X[:, 1],dtype=np.int)
-        self._index = np.asarray(X[:, 1],dtype=np.int)
-        # Sort indices so that outputs are in blocks for computational
-        # convenience.
-        self._order = self._index.argsort()
-        self._index = self._index[self._order]
-        self._t = self._t[self._order]
-        self._rorder = self._order.argsort() # rorder is for reversing the order
-        
-        if X2 is None:
-            self._t2 = None
-            self._index2 = None
-            self._rorder2 = self._rorder
-        else:
-            if X2.shape[1] > 2:
-                raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
-            self._t2 = X2[:, 0]
-            if X.shape[1]==1:
-                # No index passed, assume single output of ode model.
-                self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
-            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
-            self._order2 = self._index2.argsort()
-            slef._index2 = self._index2[self._order2]
-            self._t2 = self._t2[self._order2]
-            self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
-
-    def _K_computations(X, X2):
-        """Perform main body of computations for the ode1 covariance function."""
-        # First extract times and indices.
-        self._extract_t_indices(X, X2)
-
-        self._K_compute_eq()
-        self._K_compute_eq_x_ode()
-        if X2 is None:
-            self._K_ode_eq = self._K_ode_eq.T
-        else:
-            self._K_compute_eq_x_ode(transpose=True)
-        self._K_compute_ode()
-        
-        # Reorder values of blocks for placing back into _K_dvar.
-        self._K_dvar[self._rorder, :] = np.vstack((
-            np.hstack((self._K_eq, self._Keq_ode))
-            np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
-
-
-    def _K_compute_eq():
-        """Compute covariance for latent covariance."""
-        t_eq = self._t[self._index==0]
-        if t_eq.shape[0]==0:
-            self._K_eq = np.zeros((0, 0))
-            return
-        
-        if self._t2 is None:
-            self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
-        else:
-            t2_eq = self._t2[self._index2==0]
-            if t2_eq.shape[0]==0:
-                self._K_eq_eq = np.zeros((0, 0))
-                return
-            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
-        
-        self._K_eq = np.exp(-self._dist2/(2*self.length_scale*self.length_scale))
-        if self.is_normalise:
-            self._K_eq/=(np.sqrt(2*np.pi)*self.length_scale)
-
-    def _K_compute_ode_eq(transpose=False):
-        """Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
-
-        :param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
-        :type transpose: bool"""
-
-        if transpose:
-            t_ode = self._t2[self._index>0]
-            index_ode = self._index2[self._index>0]-1
-            if t_ode.shape[0]==0:
-                self._K_ode = np.zeros((0, 0))
-                return
-        
-            if self._t2 is not None:            
-                t2_ode = self._t2[self._index2>0]
-                index2_ode = self._index2[self._index2>0]-1
-                if t2_eq.shape[0]==0:
-                    self._K_ode = np.zeros((0, 0))
-                    return
-        else:
-
-        # Matrix giving scales of each output
-        self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
-        code="""
-        for(int i=0;i<N; i++){
-              for(int j=0; j<N2; j++){
-                  scale_mat[i+j*N] = self.W[index_sim[i]+index_eq[j]*num_outputs];
-                }
-              }
-            """
-            scale_mat, B = self._scale, self._B
-            N, N2, num_outputs = index_ode.size, index_eq.size, self.num_outputs
-            weave.inline(code,['index_ode', 'index_eq',
-                               'scale_mat', 'B',
-                               'N', 'N2', 'num_outputs'])
-        else:
-            self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-            code = """
-            for(int i=0; i<N; i++){
-              for(int j=0; j<N2; j++){
-                scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
-              }
-            }
-            """
-            scale_mat, B = self._scale, self._B
-            N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
-            weave.inline(code, ['index_ode', 'index2_ode',
-                                'scale_mat', 'B',
-                                'N', 'N2', 'num_outputs'])
-        if transpose:
-            t_ode = t2 - self.delay
-            t_eq_mat = t1[None, :]
-        else:
-            t_ode = t1 - self.delay
-            t_eq_mat = t2[None, :]
-        t_ode_mat = t_ode[:, None]
-        diff_t = (t_ode_mat - t_eq_mat)
-        sigma = sqrt(2/self.inverseWidth)
-
-        invSigmaDiffT = 1/sigma*diff_t
-        halfSigmaD_i = 0.5*sigma*self.decay
-
-        if self.isStationary == false
-          [ln_part, signs] = ln_diff_erfs(halfSigmaD_i + t2Mat/sigma, halfSigmaD_i - invSigmaDiffT)
-        else
-          [ln_part, signs] = ln_diff_erfs(inf, halfSigmaD_i - invSigmaDiffT)
-        end
-        sK = signs .* exp(halfSigmaD_i*halfSigmaD_i - self.decay*diff_t + ln_part)
-
-        sK *= 0.5
-
-        if not self.is_normalised:
-            sK *= sqrt(pi)*self.sigma
-
-
-        if transpose:
-            self._K_eq_ode = 
-        else:
-            self._K_ode_eq = sK
-        return K
-        
-    def _K_compute_ode():
-        # Compute covariances between outputs of the ODE models.
-
-        t_ode = self._t[self._index>0]
-        index_ode = self._index[self._index>0]-1
-        if t_ode.shape[0]==0:
-            self._K_ode = np.zeros((0, 0))
-            return
-        
-        if self._t2 is not None:            
-            t2_ode = self._t2[self._index2>0]
-            index2_ode = self._index2[self._index2>0]-1
-            if t2_eq.shape[0]==0:
-                self._K_ode = np.zeros((0, 0))
-                return
-        
-        if self._index2 is None:
-            # Matrix giving scales of each output
-            self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
-            code="""
-            for(int i=0;i<N; i++){
-              scale_mat[i+i*N] = B[index_ode[i]+num_outputs*(index_ode[i])];
-              for(int j=0; j<i; j++){
-                  scale_mat[j+i*N] = B[index_ode[i]+num_outputs*index_ode[j]];
-                  scale_mat[i+j*N] = scale_mat[j+i*N];
-                }
-              }
-            """
-            scale_mat, B = self._scale, self._B
-            N, num_outputs = index_ode.size, self.num_outputs
-            weave.inline(code,['index_ode',
-                               'scale_mat', 'B',
-                               'N', 'num_outputs'])
-        else:
-            self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
-            code = """
-            for(int i=0; i<N; i++){
-              for(int j=0; j<N2; j++){
-                scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
-              }
-            }
-            """
-            scale_mat, B = self._scale, self._B
-            N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
-            weave.inline(code, ['index_ode', 'index2_ode',
-                                'scale_mat', 'B',
-                                'N', 'N2', 'num_outputs'])
-        
-        # When index is identical
-        if self.is_stationary:
-            h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode)
-        else:
-            h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode)
-
-        if t2 is None:
-            self._K_ode = 0.5 * (h + h.T)
-        else:
-            if self.is_stationary:
-                h2 = self._compute_H_stat(t2, index2, t, index)
-            else:
-                h2 = self._compute_H(t2, index2, t, index)                
-            self._K_ode += 0.5 * (h + h2.T)
-
-        if not self.is_normalized:
-            self._K_ode *= np.sqrt(np.pi)*sigma
-    
-    def _compute_H(t, index, t2, index2, update_derivatives=False):
-        """Helper function for computing part of the ode1 covariance function.
-
-        :param t: first time input.
-        :type t: array
-        :param index: Indices of first output.
-        :type index: array of int
-        :param t2: second time input.
-        :type t2: array
-        :param index2: Indices of second output.
-        :type index2: array of int
-        :param update_derivatives: whether to update derivatives (default is False)
-        :return h : result of this subcomponent of the kernel for the given values.
-        :rtype: ndarray
-"""
-
-        if t.shape[1] > 1 or t2.shape[1] > 1:
-            raise error('Input can only have one column')
-
-        # Vector of decays and delays associated with each output.
-        Decay = np.zeros(t.shape[0])
-        Delay = np.zeros(t.shape[0])
-        Decay2 = np.zeros(t2.shape[0])
-        Delay2 = np.zeros(t2.shape[0])
-        code="""
-        for(int i=0;i<N; i++){
-          Delay[i] = decays[index[i]];
-          Decay[i] = delays[index[i]];
-          }
-        for(int i=0; i<N2; i++){
-          Delay2[i] = decays[index2[i]];
-          Decay2[i] = delays[index2[i]];
-          }
-        """
-        delays, decays = self.delays, self.decays
-        N, N2 = index.size, index2.size
-        weave.inline(code,['index_ode',
-                           'Delay', 'Decay',
-                           'Delay2', 'Decay2',
-                           'delays', 'decays',
-                           'N', 'N2'])
-
-        t_mat = t[:, None]-Delay[:, None]
-        t2_mat = t2[None, :]-Delay2[None, :]
-        diff_t = (t_mat - t2_mat)
-        inv_sigma_diff_t = 1./self.sigma*diff_t
-        half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
-
-        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, 
-                                        half_sigma_decay_i - inv_sigma_diff_t)
-        ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
-                                        half_sigma_decay_i - t_mat/self.sigma)
-
-        h = sign1*np.exp(half_sigma_decay_i
-                         *half_sigma_decay_i
-                         -Decay[:, None]*diff_t+ln_part_1
-                         -np.log(Decay[:, None] + Decay2[None, :]))
-        h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
-                          -Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
-                          -np.log(Decay[:, None] + Decay2[None, :]))
-
-
-        # if update_derivatives:
-        #     sigma2 = self.sigma*self.sigma
-        #     # Update ith decay gradient
-        #     dh_ddecay += (0.5*self.decay[i]*sigma2*(self.decay[i] + decay[j])-1)*h
-        #     + (-diff_t*sign1*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*diff_t+ln_part_1) 
-        #        +t_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*t_mat - decay[j]*t2_mat+ln_part_2)) ...
-        #     +self.sigma/sqrt(pi)*(-np.exp(-diff_t*diff_t/sigma2)
-        #                           +np.exp(-t2_mat*t2_mat/sigma2-self.decay[i]*t_mat) 
-        #                           +np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) ...
-        #                           -np.exp(-(self.decay[i]*t_mat + decay[j]*t2_mat)))
-        #     self._dh_ddecay[i] += real(dh_ddecay/(self.decay[i]+decay[j]))
-
-        #     # Update jth decay gradient
-        #     dh_ddecay = t2_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-(self.decay[i]*t_mat + decay[j]*t2_mat)+ln_part_2)-h
-        #     self._dh_ddecay[j] += real(dh_ddecay/(self.decay[i] + decay[j]))
-            
-        #     # Update sigma gradient
-        #     self._dh_dsigma += 0.5*self.decay[i]*self.decay[i]*self.sigma*h + 2/(np.sqrt(np.pi)*(self.decay[i]+decay[j]))*((-diff_t/sigma2-self.decay[i]/ 
-        #                                                                                                                     2)*np.exp(-diff_t*
-        #                                                                                                                               diff_t/sigma2) 
-        #                                                                                                                    + (-t2_mat/sigma2+self.decay[i]/2)
-        #                                                                                                                    *np.exp(-t2_mat*t2_mat/sigma2 
-        #                                                                                                                            -self.decay[i]*t_mat) 
-        #                                                                                                                    - (-t_mat/sigma2-self.decay[i]/2) 
-        #                                                                                                                    *np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) 
-        #                                                                                                                    - self.decay[i]/2*np.exp(-(self.decay[i]*t_mat+decay[j]*t2_mat)))
-                
--- a/GPy/kern/parts/sympykern_old.py
+++ b/GPy/kern/parts/sympykern_old.py
@ -1,461 +0,0 @@
-import numpy as np
-import sympy as sp
-from sympy.utilities.codegen import codegen
-from sympy.core.cache import clear_cache
-from scipy import weave
-import re
-import os
-import sys
-current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
-import tempfile
-import pdb
-import ast
-from kernpart import Kernpart
-from ...util.config import config
-
-class spkern(Kernpart):
-    """
-    A kernel object, where all the hard work in done by sympy.
-
-    :param k: the covariance function
-    :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
-
-    To construct a new sympy kernel, you'll need to define:
-     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
-     - that's it! we'll extract the variables from the function k.
-
-    Note:
-     - to handle multiple inputs, call them x_1, z_1, etc
-     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
-    """
-    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
-        if name is None:
-            self.name='sympykern'
-        else:
-            self.name = name
-        if k is None:
-            raise ValueError, "You must provide an argument for the covariance function."
-        self._sp_k = k
-        sp_vars = [e for e in k.atoms() if e.is_Symbol]
-        self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
-        self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
-        # Check that variable names make sense.
-        assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
-        assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
-        assert len(self._sp_x)==len(self._sp_z)
-        self.input_dim = len(self._sp_x)
-        self._real_input_dim = self.input_dim
-        if output_dim > 1:
-            self.input_dim += 1
-        assert self.input_dim == input_dim
-        self.output_dim = output_dim
-        # extract parameter names
-        thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
-
-
-        # Look for parameters with index.
-        if self.output_dim>1:
-            self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
-            self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
-            # Make sure parameter appears with both indices!
-            assert len(self._sp_theta_i)==len(self._sp_theta_j)
-            assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
-
-            # Extract names of shared parameters
-            self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
-            
-            self.num_split_params = len(self._sp_theta_i)
-            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
-            for theta in self._split_theta_names:
-                setattr(self, theta, np.ones(self.output_dim))
-            
-            self.num_shared_params = len(self._sp_theta)
-            self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
-            
-        else:
-            self.num_split_params = 0
-            self._split_theta_names = []
-            self._sp_theta = thetas
-            self.num_shared_params = len(self._sp_theta)
-            self.num_params = self.num_shared_params
-        
-        for theta in self._sp_theta:
-            val = 1.0
-            if param is not None:
-                if param.has_key(theta):
-                    val = param[theta]
-            setattr(self, theta.name, val)
-        #deal with param            
-        self._set_params(self._get_params())
-
-        #Differentiate!
-        self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
-        if self.output_dim > 1:
-            self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
-            
-        self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
-
-        if False:
-            self.compute_psi_stats()
-
-        self._gen_code()
-
-        if False:
-            extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
-        else:
-            extra_compile_args = []
-            
-        self.weave_kwargs = {
-            'support_code':self._function_code,
-            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
-            'headers':['"sympy_helpers.h"'],
-            'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
-            'extra_compile_args':extra_compile_args,
-            'extra_link_args':[],
-            'verbose':True}
-        if config.getboolean('parallel', 'openmp'): self.weave_kwargs.append('-lgomp')
-
-    def __add__(self,other):
-        return spkern(self._sp_k+other._sp_k)
-
-    def _gen_code(self):
-        """Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
-        #TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
-
-        #generate c functions from sympy objects        
-        argument_sequence = self._sp_x+self._sp_z+self._sp_theta
-        code_list = [('k',self._sp_k)]
-        # gradients with respect to covariance input
-        code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
-        # gradient with respect to parameters
-        code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
-        # gradient with respect to multiple output parameters
-        if self.output_dim > 1:
-            argument_sequence += self._sp_theta_i + self._sp_theta_j
-            code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
-        (foo_c,self._function_code), (foo_h,self._function_header) = \
-                                     codegen(code_list, "C",'kernel_code',argument_sequence=argument_sequence)
-        #put the header file where we can find it
-        f = file(os.path.join(tempfile.gettempdir(),'kernel_code.h'),'w')
-        f.write(self._function_header)
-        f.close()
-
-        # Substitute any known derivatives which sympy doesn't compute
-        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
-
-
-        ############################################################
-        # This is the basic argument construction for the C code.  #
-        ############################################################
-        
-        arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
-                    + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
-
-        # for multiple outputs need to also provide these arguments reversed.
-        if self.output_dim>1:
-            reverse_arg_list = list(arg_list)
-            reverse_arg_list.reverse()
-
-        # Add in any 'shared' parameters to the list.
-        param_arg_list = [shared_params.name for shared_params in self._sp_theta]
-        arg_list += param_arg_list
-
-        precompute_list=[]
-        if self.output_dim > 1:
-            reverse_arg_list+=list(param_arg_list)
-            split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
-            split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
-            arg_list += split_param_arg_list
-            reverse_arg_list += split_param_reverse_arg_list
-            # Extract the right output indices from the inputs.
-            c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
-            precompute_list += c_define_output_indices
-            reverse_arg_string = ", ".join(reverse_arg_list)
-        arg_string = ", ".join(arg_list)
-        precompute_string = "\n".join(precompute_list)
-
-        # Code to compute argments string needed when only X is provided.
-        X_arg_string = re.sub('Z','X',arg_string)
-        # Code to compute argument string when only diagonal is required.
-        diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
-        diag_arg_string = re.sub('j','i',diag_arg_string)
-        if precompute_string == '':
-            # if it's not multioutput, the precompute strings are set to zero
-            diag_precompute_string = ''
-            diag_precompute_replace = ''
-        else:
-            # for multioutput we need to extract the index of the output form the input.
-            diag_precompute_string = precompute_list[0]
-            diag_precompute_replace = precompute_list[1]
-        
-
-        # Here's the code to do the looping for K
-        self._K_code =\
-        """
-        // _K_code
-        // Code for computing the covariance function.
-        int i;
-        int j;
-        int N = target_array->dimensions[0];
-        int num_inducing = target_array->dimensions[1];
-        int input_dim = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
-        for (i=0;i<N;i++){
-            for (j=0;j<num_inducing;j++){
-%s
-                //target[i*num_inducing+j] = 
-                TARGET2(i, j) += k(%s);
-            }
-        }
-        %s
-        """%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-        self._K_code_X = """
-        // _K_code_X
-        // Code for computing the covariance function.
-        int i;
-        int j;
-        int N = target_array->dimensions[0];
-        int num_inducing = target_array->dimensions[1];
-        int input_dim = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
-        for (i=0;i<N;i++){
-            %s // int ii=(int)X2(i, 1);
-            TARGET2(i, i) += k(%s);
-            for (j=0;j<i;j++){
-              %s //int jj=(int)X2(j, 1);
-              double kval = k(%s); //double kval = k(X2(i, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii));
-              TARGET2(i, j) += kval;
-              TARGET2(j, i) += kval;
-            }
-        }
-        /*%s*/
-        """%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', diag_precompute_replace), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
-
-        # Code to do the looping for Kdiag
-        self._Kdiag_code =\
-        """
-        // _Kdiag_code
-        // Code for computing diagonal of covariance function.
-        int i;
-        int N = target_array->dimensions[0];
-        int input_dim = X_array->dimensions[1];
-        //#pragma omp parallel for
-        for (i=0;i<N;i++){
-                %s
-                //target[i] =
-                TARGET1(i)=k(%s);
-        }
-        %s
-        """%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-        # Code to compute gradients
-        grad_func_list = []
-        if self.output_dim>1:
-            grad_func_list += c_define_output_indices
-            grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
-            grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
-        grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in  enumerate(self._sp_theta)])
-        grad_func_string = '\n'.join(grad_func_list) 
-
-        self._dK_dtheta_code =\
-        """
-        // _dK_dtheta_code
-        // Code for computing gradient of covariance with respect to parameters.
-        int i;
-        int j;
-        int N = partial_array->dimensions[0];
-        int num_inducing = partial_array->dimensions[1];
-        int input_dim = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
-        for (i=0;i<N;i++){
-            for (j=0;j<num_inducing;j++){
-%s
-            }
-        }
-        %s
-        """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
-
-
-        # Code to compute gradients for Kdiag TODO: needs clean up
-        diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
-        diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
-        diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
-        diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
-        self._dKdiag_dtheta_code =\
-        """
-        // _dKdiag_dtheta_code
-        // Code for computing gradient of diagonal with respect to parameters.
-        int i;
-        int N = partial_array->dimensions[0];
-        int input_dim = X_array->dimensions[1];
-        for (i=0;i<N;i++){
-                %s
-        }
-        %s
-        """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-
-        # Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
-        gradX_func_list = []
-        if self.output_dim>1:
-            gradX_func_list += c_define_output_indices
-        gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
-        gradX_func_string = "\n".join(gradX_func_list)
-
-        self._dK_dX_code = \
-        """
-        // _dK_dX_code
-        // Code for computing gradient of covariance with respect to inputs.
-        int i;
-        int j;
-        int N = partial_array->dimensions[0];
-        int num_inducing = partial_array->dimensions[1];
-        int input_dim = X_array->dimensions[1];
-        //#pragma omp parallel for private(j)
-        for (i=0;i<N; i++){
-          for (j=0; j<num_inducing; j++){
-            %s
-          }
-        }
-        %s
-        """%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
-  
-
-        diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
-        diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
-        diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
-        diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
-
-        # Code for gradients of Kdiag wrt X
-        self._dKdiag_dX_code= \
-        """
-        // _dKdiag_dX_code
-        // Code for computing gradient of diagonal with respect to inputs.
-        int N = partial_array->dimensions[0];
-        int input_dim = X_array->dimensions[1];
-        for (int i=0;i<N; i++){
-            %s
-        }
-        %s
-        """%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
-        # string representation forces recompile when needed Get rid
-        # of Zs in argument for diagonal. TODO: Why wasn't
-        # diag_func_string called here? Need to check that.
-        #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
-
-        # Code to use when only X is provided. 
-        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
-        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= PARTIAL2(', '+= 2*PARTIAL2(') 
-        self._dK_dtheta_code_X = self._dK_dtheta_code_X.replace('Z2(', 'X2(')
-        self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
-
-
-        #TODO: insert multiple functions here via string manipulation
-        #TODO: similar functions for psi_stats
-    def _get_arg_names(self, Z=None, partial=None):
-        arg_names = ['target','X']
-        for shared_params in self._sp_theta:
-            arg_names += [shared_params.name]
-        if Z is not None:
-            arg_names += ['Z']
-        if partial is not None:
-            arg_names += ['partial']
-        if self.output_dim>1:
-            arg_names += self._split_theta_names
-            arg_names += ['output_dim']
-        return arg_names
-        
-    def _weave_inline(self, code, X, target, Z=None, partial=None):
-        output_dim = self.output_dim
-        for shared_params in self._sp_theta:
-            locals()[shared_params.name] = getattr(self, shared_params.name)
-
-        # Need to extract parameters first
-        for split_params in self._split_theta_names:
-            locals()[split_params] = getattr(self, split_params)
-        arg_names = self._get_arg_names(Z, partial)        
-        weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
-
-    def K(self,X,Z,target):        
-        if Z is None:
-            self._weave_inline(self._K_code_X, X, target)
-        else:
-            self._weave_inline(self._K_code, X, target, Z)
-
-
-    def Kdiag(self,X,target):
-        self._weave_inline(self._Kdiag_code, X, target)
-
-    def dK_dtheta(self,partial,X,Z,target):
-        if Z is None:
-            self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
-        else:
-            self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
-            
-    def dKdiag_dtheta(self,partial,X,target):
-        self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
-               
-    def dK_dX(self,partial,X,Z,target):
-        if Z is None:
-            self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
-        else:
-            self._weave_inline(self._dK_dX_code, X, target, Z, partial)
-
-    def dKdiag_dX(self,partial,X,target):
-        self._weave_inline(self._dKdiag_dX_code, X, target, Z=None, partial=partial)
-
-    def compute_psi_stats(self):
-        #define some normal distributions
-        mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
-        Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
-        normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
-
-        #do some integration!
-        #self._sp_psi0 = ??
-        self._sp_psi1 = self._sp_k
-        for i in range(self.input_dim):
-            print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
-            sys.stdout.flush()
-            self._sp_psi1 *= normals[i]
-            self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
-            clear_cache()
-        self._sp_psi1 = self._sp_psi1.simplify()
-
-        #and here's psi2 (eek!)
-        zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
-        self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
-        for i in range(self.input_dim):
-            print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
-            sys.stdout.flush()
-            self._sp_psi2 *= normals[i]
-            self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
-            clear_cache()
-        self._sp_psi2 = self._sp_psi2.simplify()
-
-
-    def _set_params(self,param):        
-        assert param.size == (self.num_params)
-        for i, shared_params in enumerate(self._sp_theta):
-            setattr(self, shared_params.name, param[i])
-            
-        if self.output_dim>1:
-            for i, split_params in enumerate(self._split_theta_names):
-                start = self.num_shared_params + i*self.output_dim
-                end = self.num_shared_params + (i+1)*self.output_dim
-                setattr(self, split_params, param[start:end])
-
-
-    def _get_params(self):
-        params = np.zeros(0)
-        for shared_params in self._sp_theta:
-            params = np.hstack((params, getattr(self, shared_params.name)))
-        if self.output_dim>1:
-            for split_params in self._split_theta_names:
-                params = np.hstack((params, getattr(self, split_params).flatten()))
-        return params
-
-    def _get_param_names(self):
-        if self.output_dim>1:
-            return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i)  for x in self._sp_theta_i for i in range(self.output_dim)]
-        else:
-            return [x.name for x in self._sp_theta]
--- a/GPy/kern/ratquad.py
+++ b/GPy/kern/ratquad.py
@ -1,179 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-import hashlib
-
-class rbf(kernpart):
-    """
-    Rational quadratic kernel.
-    .. math::
-
-       k(r) = \sigma^2 \left(\frac{(\ell^{-2} +  r^2)}{2*\alpha}\right)^{-\alpha} \qquad \qquad \\text{ where  } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
-
-    where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
-
-    :param D: the number of input dimensions
-    :type D: int
-    :param variance: the variance of the kernel
-    :type variance: float
-    :param lengthscale: the lengthscale of the kernel (l)
-    :type lengthscale: float
-    :param smoothness: the smoothness parameter of the kernel
-    :type lengthscale: float
-
-    .. Note: for rational quadratic with different lengthscale on each dimension, see ratquad_ARD
-    """
-
-    def __init__(self,D,variance=1.,lengthscale=1.,smoothness)=1.:
-        self.D = D
-        self.Nparam = 3
-        self.name = 'ratquad'
-        self._set_params(np.hstack((variance,lengthscale,smoothness)))
-
-        #initialize cache
-        self._Z, self._mu, self._S = np.empty(shape=(3,1))
-        self._X, self._X2, self._params = np.empty(shape=(3,1))
-
-    def _get_params(self):
-        return np.hstack((self.variance,self.lengthscale,self.smoothness))
-
-    def _set_params(self,x):
-        self.variance, self.lengthscale, self.smoothness = x
-        self.lengthscale2 = np.square(self.lengthscale)
-        #reset cached results
-        self._X, self._X2, self._params = np.empty(shape=(3,1))
-        self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
-
-    def _get_param_names(self):
-        return ['variance','lengthscale','smoothness']
-
-    def K(self,X,X2,target):
-        if X2 is None:
-            X2 = X
-        self._K_computations(X,X2)
-        np.add(self.variance*self._K_dvar, target,target)
-
-    def Kdiag(self,X,target):
-        np.add(target,self.variance,target)
-
-    def dK_dtheta(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        target[0] += np.sum(self._K_dvar*partial)
-        target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
-        target[2] += 
-    def dKdiag_dtheta(self,partial,X,target):
-        #NB: derivative of diagonal elements wrt lengthscale is 0
-        target[0] += np.sum(partial)
-
-    def dK_dX(self,partial,X,X2,target):
-        self._K_computations(X,X2)
-        if X2 is None:
-            _K_dist = 2*X[:, None, :] - X[None, :, :]
-        else:
-            _K_dist = X[:,None,:]-X2[None,:,:]
-        dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
-        target += np.sum(dK_dX*partial.T[:,:,None],0)
-
-    def dKdiag_dX(self,partial,X,target):
-        pass
-
-    def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2)):
-            self._X = X
-            self._X2 = X2
-            if X2 is None: X2 = X
-            XXT = np.dot(X,X2.T)
-            if X is X2:
-                self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
-            else:
-                self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,np.newaxis] + np.sum(np.square(X2),1)[np.newaxis,:])/self.lengthscale2
-            self._K_dvar = (0.5./self.smoothness*(1./self.lengthscale2 + self._K_dist2))**(-self.smoothness)
-
-    def psi0(self,Z,mu,S,target):
-        target += self.variance
-
-    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += 1.
-
-    def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
-        pass
-
-    def psi1(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += self._psi1
-
-    def dpsi1_dtheta(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
-        d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
-        target[0] += np.sum(partial*self._psi1/self.variance)
-        target[1] += np.sum(d_length*partial[:,:,None])
-
-    def dpsi1_dZ(self,partial,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
-
-    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
-        target_mu += np.sum(partial*tmp*self._psi1_dist,1)
-        target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
-
-    def psi2(self,Z,mu,S,target):
-        self._psi_computations(Z,mu,S)
-        target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
-
-    def dpsi2_dtheta(self,partial,Z,mu,S,target):
-        """Shape N,M,M,Ntheta"""
-        self._psi_computations(Z,mu,S)
-        d_var = np.sum(2.*self._psi2/self.variance,0)
-        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
-        d_length = d_length.sum(0)
-        target[0] += np.sum(partial*d_var)
-        target[1] += np.sum(d_length*partial)
-
-    def dpsi2_dZ(self,partial,Z,mu,S,target):
-        """Returns shape N,M,M,Q"""
-        self._psi_computations(Z,mu,S)
-        dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
-        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
-
-    def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
-        """Think N,M,M,Q """
-        self._psi_computations(Z,mu,S)
-        tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
-        target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
-
-    def _psi_computations(self,Z,mu,S):
-        #here are the "statistics" for psi1 and psi2
-        if not np.all(Z==self._Z):
-            #Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
-            self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscale2 # M,M,Q
-            self._Z = Z
-
-        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
-            #something's changed. recompute EVERYTHING
-
-            #TODO: make more efficient for large Q (using NDL's dot product trick)
-            #psi1
-            self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
-            self._psi1_dist = Z[None,:,:]-mu[:,None,:]
-            self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
-            self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
-            self._psi1 = self.variance*np.exp(self._psi1_exponent)
-
-            #psi2
-            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
-            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
-            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
-            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
-            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
-
-            self._Z, self._mu, self._S = Z, mu,S
-