Merge branch 'params' of github.com:SheffieldML/GPy into params

2026-05-06 10:32:39 +02:00 · 2014-04-28 15:14:26 +01:00 · 2014-04-28 15:14:26 +01:00 · 2b7b0a543c
commit 2b7b0a543c
parent 4e793fe3a0 643f338337
12 changed files with 361 additions and 500 deletions
--- a/GPy/core/symbolic.py
+++ b/GPy/core/symbolic.py
@ -9,9 +9,10 @@ import sympy as sym
 from ..core.parameterization import Param
 from sympy.utilities.lambdify import lambdastr, _imp_namespace, _get_namespace
 from sympy.utilities.iterables import numbered_symbols
-from numpy import exp
+import scipy
-from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
+import GPy
-from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
+#from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
 from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
 def getFromDict(dataDict, mapList):
    return reduce(lambda d, k: d[k], mapList, dataDict)
@ -27,15 +28,15 @@ class Symbolic_core():
        # Base class init, do some basic derivatives etc.
        # Func_modules sets up the right mapping for functions.
-        func_modules += [{'gamma':gamma,
+        func_modules += [{'gamma':scipy.special.gamma,
-                          'gammaln':gammaln,
+                          'gammaln':scipy.special.gammaln,
-                          'erf':erf, 'erfc':erfc,
+                          'erf':scipy.special.erf, 'erfc':scipy.special.erfc,
-                          'erfcx':erfcx,
+                          'erfcx':scipy.special.erfcx,
-                          'polygamma':polygamma,
+                          'polygamma':scipy.special.polygamma,
-                          'normcdf':normcdf,
+                          'normcdf':GPy.util.functions.normcdf,
-                          'normcdfln':normcdfln,
+                          'normcdfln':GPy.util.functions.normcdfln,
-                          'logistic':logistic,
+                          'logistic':GPy.util.functions.logistic,
-                          'logisticln':logisticln},
+                          'logisticln':GPy.util.functions.logisticln},
                         'numpy']
        self._set_expressions(expressions)
@ -73,12 +74,13 @@ class Symbolic_core():
    def _set_variables(self, cacheable):
        """Pull the variable names out of the provided expressions and separate into cacheable expressions and normal parameters. Those that are only stored in the cache, the parameters are stored in this object."""
        # pull the parameters and inputs out of the symbolic pdf
        def extract_vars(expr):
            return [e for e in expr.atoms() if e.is_Symbol and e not in vars]
        self.cacheable = cacheable
        self.variables = {}
        vars = []
        for expression in self.expressions.values():
-            vars += [e for e in expression['function'].atoms() if e.is_Symbol and e not in vars]
+            vars += extract_vars(expression['function'])
        print vars
        # inputs are assumed to be those things that are
        # cacheable. I.e. those things that aren't stored within the
        # object except as cached. For covariance functions this is X
@ -96,6 +98,8 @@ class Symbolic_core():
    def _set_derivatives(self, derivatives):
        # these are arguments for computing derivatives.
        def extract_derivative(function, derivative_arguments):
            return {theta.name : self.stabilize(sym.diff(function,theta)) for theta in derivative_arguments}
        derivative_arguments = []
        if derivatives is not None:
            for derivative in derivatives:
@ -103,7 +107,15 @@ class Symbolic_core():
            # Do symbolic work to compute derivatives.        
            for key, func in self.expressions.items():
-                self.expressions[key]['derivative'] = {theta.name : self.stabilize(sym.diff(func['function'],theta)) for theta in derivative_arguments}
+                if func['function'].is_Matrix:
                    rows = func['function'].shape[0]
                    cols = func['function'].shape[1]
                    self.expressions[key]['derivative'] = sym.zeros(rows, cols)
                    for i in xrange(rows):
                        for j in xrange(cols):
                            self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
                else:
                    self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)
    def _set_parameters(self, parameters):
        """Add parameters to the model and initialize with given values."""
@ -127,8 +139,7 @@ class Symbolic_core():
        # TODO: place checks for inf/nan in here
        # for all provided keywords
-        for var in sorted(self.code['parameters_changed'].keys(), key=lambda x: int(re.findall(r'\d+$', x)[0])):
+        for var, code in self.variable_sort(self.code['parameters_changed']):
            code = self.code['parameters_changed'][var]
            self._set_attribute(var, eval(code, self.namespace))
        for var, value in kwargs.items():
@ -152,18 +163,17 @@ class Symbolic_core():
                    value = np.atleast_1d(value)
                    for i, theta in enumerate(self.variables[var]):
                        self._set_attribute(theta.name, value[i])
-        for var in sorted(self.code['update_cache'].keys(), key=lambda x: int(re.findall(r'\d+$', x)[0])):
+        for var, code in self.variable_sort(self.code['update_cache']):
            code = self.code['update_cache'][var]
            self._set_attribute(var, eval(code, self.namespace))
    def eval_update_gradients(self, function, partial, **kwargs):
-        # TODO: place checks for inf/nan in here
+        # TODO: place checks for inf/nan in here?
        self.eval_update_cache(**kwargs)
        gradient = {}
        for theta in self.variables['theta']:
            code = self.code[function]['derivative'][theta.name]
-            setattr(getattr(self, theta.name),
+            gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
-                    'gradient',
+        return gradient
                    (partial*eval(code, self.namespace)).sum())
    def eval_gradients_X(self, function, partial, **kwargs):
        if kwargs.has_key('X'):
@ -181,7 +191,7 @@ class Symbolic_core():
    def code_parameters_changed(self):
        # do all the precomputation codes.
        lcode = ''
-        for variable, code in sorted(self.code['parameters_changed'].iteritems()):
+        for variable, code in self.variable_sort(self.code['parameters_changed']):
            lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
        return lcode
@ -196,10 +206,10 @@ class Symbolic_core():
            else:
                reorder = ''
            for i, theta in enumerate(self.variables[var]):
-                lcode+= "\t" + var + '= np.atleast_2d(' + var + ')'
+                lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
                lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
-        for variable, code in sorted(self.code['update_cache'].iteritems()):
+        for variable, code in self.variable_sort(self.code['update_cache']):
            lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
        return lcode
@ -331,20 +341,6 @@ class Symbolic_core():
                self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
        # for var, expr in common_sub_expressions:
        #     if var in list(cacheable_list):
        #         self.expressions['update_cache'].append({var.name: expr.subarg_list = [e for e in sub_expr_pair[1].atoms() if e.is_Symbol]
        #     cacheable_symbols = [e for e in arg_list if e in cacheable_list or e in self.cacheable_vars]
        #     if cacheable_symbols:
        #         self.expressions['update_cache'].append((sub_expr_pair[0].name, self._expr2code(arg_list, sub_expr_pair[1])))
        #         # list which ensures dependencies are cacheable.
        #         cacheable_list.append(sub_expr_pair[0])
        #         cache_expressions_list.append(sub_expr_pair[0].name)
        #     else:
        #         self.expressions['parameters_change'].append((sub_expr_pair[0].name, self._expr2code(arg_list, sub_expr_pair[1])))
        #         sub_expression_list.append(sub_expr_pair[0].name)
    def _gen_code(self):
        """Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
        # This is the dictionary that stores all the generated code.
@ -363,32 +359,6 @@ class Symbolic_core():
        self.code = match_key(self.expressions)
        # for keys in self.expression_keys:
        #     expr = getFromDict(self.expressions, keys)
        #     arg_list = [e for e in expr.atoms() if e.is_Symbol]
        #     setInDict(self.code, keys, self._expr2code(arg_list, expr))
        # # Set up precompute code as either cacheN or subN.
        # self.code['update_cache'] = {}
        # for key, val in self.expressions['update_cache']:
        #     expr = val
        #     for key2, val2 in cache_dict.iteritems():
        #         expr = expr.replace(key2, val2.name)
        #     for key2, val2 in sub_dict.iteritems():
        #         expr = expr.replace(key2, val2.name)
        #     self.code['update_cache'][cache_dict[key]] = expr
        # self.expressions['update_cache'] = dict(self.expressions['update_cache'])
        # self.code['parameters_change'] = {}
        # for key, val in self.expressions['parameters_change']:
        #     expr = val
        #     for key2, val2 in cache_dict.iteritems():
        #         expr = expr.replace(key2, val2.name)
        #     for key2, val2 in sub_dict.iteritems():
        #         expr = expr.replace(key2, val2.name)
        #     self.code['parameters_change'][sub_dict[key]] = expr
        # self.expressions['parameters_change'] = dict(self.expressions['parameters_change'])
    def _expr2code(self, arg_list, expr):
        """Convert the given symbolic expression into code."""
        code = lambdastr(arg_list, expr)
@ -405,3 +375,46 @@ class Symbolic_core():
            for arg in self.variables[key]:
                code = code.replace(arg.name, 'self.'+arg.name)
        return code
    def _display_expression(self, keys, user_substitutes={}):
        """Helper function for human friendly display of the symbolic components."""
        # Create some pretty maths symbols for the display.
        sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
        substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
        substitutes.update(user_substitutes)
        function_substitutes = {normcdfln : lambda arg : sym.log(normcdf(arg)),
                                logisticln : lambda arg : -sym.log(1+sym.exp(-arg)),
                                logistic : lambda arg : 1/(1+sym.exp(-arg)),
                                erfcx : lambda arg : erfc(arg)/sym.exp(arg*arg),
                                gammaln : lambda arg : sym.log(sym.gamma(arg))}
        expr = getFromDict(self.expressions, keys)
        for var_name, sub in self.variable_sort(self.expressions['update_cache'], reverse=True):
            for var in self.variables['cache']:
                if var_name == var.name:
                    expr = expr.subs(var, sub)
                    break
        for var_name, sub in self.variable_sort(self.expressions['parameters_changed'], reverse=True):
            for var in self.variables['sub']:
                if var_name == var.name:
                    expr = expr.subs(var, sub)
                    break
        for var_name, sub in self.variable_sort(substitutes, reverse=True):
            for var in self.variables['theta']:
                if var_name == var.name:
                    expr = expr.subs(var, sub)
                    break
        for m, r in function_substitutes.iteritems():
            expr = expr.replace(m, r)#normcdfln, lambda arg : sym.log(normcdf(arg)))
        return expr.simplify()
    def variable_sort(self, var_dict, reverse=False):
        def sort_key(x):
            digits = re.findall(r'\d+$', x[0])
            if digits:
                return int(digits[0])
            else:
                return x[0]
        return sorted(var_dict.iteritems(), key=sort_key, reverse=reverse)
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -12,6 +12,7 @@ from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
 from _src.ODE_UY import ODE_UY
 # TODO: put this in an init file somewhere
 #I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
 try:
    import sympy as sym
    sympy_available=True
@ -22,5 +23,5 @@ if sympy_available:
    from _src.symbolic2 import Symbolic
    from _src.eq import Eq
    from _src.heat_eqinit import Heat_eqinit
-    from _src.ode1_eq_lfm import Ode1_eq_lfm
+    #from _src.ode1_eq_lfm import Ode1_eq_lfm
--- a/GPy/kern/_src/eq.py
+++ b/GPy/kern/_src/eq.py
@ -0,0 +1,30 @@
 try:
    import sympy as sym
    sympy_available=True
 except ImportError:
    sympy_available=False
 import numpy as np
 from symbolic import Symbolic
 class Eq(Symbolic):
    """
    The exponentiated quadratic covariance as a symbolic function. 
    """
    def __init__(self, input_dim, output_dim=1, variance=1.0, lengthscale=1.0, name='Eq'):
        parameters = {'variance' : variance, 'lengthscale' : lengthscale}
        x = sym.symbols('x_:' + str(input_dim))
        z = sym.symbols('z_:' + str(input_dim))
        variance = sym.var('variance',positive=True)
        lengthscale = sym.var('lengthscale', positive=True)
        dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
        from sympy.parsing.sympy_parser import parse_expr
        dist = parse_expr(dist_string)
        # this is the covariance function               
        f = variance*sym.exp(-dist/(2*lengthscale**2))
        # extra input dim is to signify the output dimension. 
        super(Eq, self).__init__(input_dim=input_dim, k=f, output_dim=output_dim, parameters=parameters, name=name)
--- a/GPy/kern/_src/heat_eqinit.py
+++ b/GPy/kern/_src/heat_eqinit.py
@ -0,0 +1,30 @@
 try:
    import sympy as sym
    sympy_available=True
 except ImportError:
    sympy_available=False
 import numpy as np
 from symbolic import Symbolic
 class Heat_eqinit(Symbolic):
    """
    A symbolic covariance based on laying down an initial condition of the heat equation with an exponentiated quadratic covariance. The covariance then has multiple outputs which are interpreted as observations of the diffused process with different diffusion coefficients (or at different times). 
    """
    def __init__(self, input_dim, output_dim=1, param=None, name='Heat_eqinit'):
        x = sym.symbols('x_:' + str(input_dim))
        z = sym.symbols('z_:' + str(input_dim))
        scale = sym.var('scale_i scale_j',positive=True)
        lengthscale = sym.var('lengthscale_i lengthscale_j', positive=True)
        shared_lengthscale = sym.var('shared_lengthscale', positive=True)
        dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
        from sympy.parsing.sympy_parser import parse_expr
        dist = parse_expr(dist_string)
        # this is the covariance function               
        f = scale_i*scale_j*sym.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
        # extra input dim is to signify the output dimension. 
        super(Heat_eqinit, self).__init__(input_dim=input_dim+1, k=f, output_dim=output_dim, name=name)
--- a/GPy/kern/_src/symbolic.py
+++ b/GPy/kern/_src/symbolic.py
@ -1,445 +1,75 @@
 # Check Matthew Rocklin's blog post.
-try:
+import sympy as sym
    import sympy as sym
    sympy_available=True
    from sympy.utilities.lambdify import lambdify
    from GPy.util.symbolic import stabilise
 except ImportError:
    sympy_available=False
 import numpy as np
 from kern import Kern
-from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
+from ...core.symbolic import Symbolic_core
 from GPy.util.functions import normcdf, normcdfln, logistic, logisticln, differfln
 from ...core.parameterization import Param
-class Symbolic(Kern):
+
 class Symbolic(Kern, Symbolic_core):
    """
    A kernel object, where all the hard work is done by sympy.
    :param k: the covariance function
    :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
    To construct a new sympy kernel, you'll need to define:
     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
     - that's it! we'll extract the variables from the function k.
    Note:
     - to handle multiple inputs, call them x_1, z_1, etc
     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
    """
-    def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
+    def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
        if k is None:
            raise ValueError, "You must provide an argument for the covariance function."
-        self.func_modules = func_modules
+        Kern.__init__(self, input_dim, active_dims, name=name)
-        self.func_modules += [{'gamma':gamma,
+        kdiag = k
-                               'gammaln':gammaln,
+        self.cacheable = ['X', 'Z']
-                               'erf':erf, 'erfc':erfc,
+        Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)        
                               'erfcx':erfcx,
                               'polygamma':polygamma,
                               'differfln':differfln,
                               'normcdf':normcdf,
                               'normcdfln':normcdfln,
                               'logistic':logistic,
                               'logisticln':logisticln},
                              'numpy']
        super(Symbolic, self).__init__(input_dim, active_dims, name)
        self._sym_k = k
        # pull the variable names out of the symbolic covariance function.
        sym_vars = [e for e in k.atoms() if e.is_Symbol]
        self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
        self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
        # Check that variable names make sense.
        assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
        assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
        assert len(self._sym_x)==len(self._sym_z)
        x_dim=len(self._sym_x)
        self._sym_kdiag = k
        for x, z in zip(self._sym_x, self._sym_z):
            self._sym_kdiag = self._sym_kdiag.subs(z, x)
        # If it is a multi-output covariance, add an input for indexing the outputs.
        self._real_input_dim = x_dim
        # Check input dim is number of xs + 1 if output_dim is >1
        assert self.input_dim == x_dim + int(output_dim > 1)
        self.output_dim = output_dim
        # extract parameter names from the covariance
        thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
        # Look for parameters with index (subscripts), they are associated with different outputs.
        if self.output_dim>1:
            self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
            self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
            # Make sure parameter appears with both indices!
            assert len(self._sym_theta_i)==len(self._sym_theta_j)
            assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
            # Extract names of shared parameters (those without a subscript)
            self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
            self.num_split_params = len(self._sym_theta_i)
            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
            # Add split parameters to the model.
            for theta in self._split_theta_names:
                # TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
                setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
                self.add_parameter(getattr(self, theta))
            self.num_shared_params = len(self._sym_theta)
            for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
                self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
        else:
            self.num_split_params = 0
            self._split_theta_names = []
            self._sym_theta = thetas
            self.num_shared_params = len(self._sym_theta)
        # Add parameters to the model.
        for theta in self._sym_theta:
            val = 1.0
            # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
            if param is not None:
                if param.has_key(theta.name):
                    val = param[theta.name]
            setattr(self, theta.name, Param(theta.name, val, None))
            self.add_parameters(getattr(self, theta.name))
        # Differentiate with respect to parameters.
        derivative_arguments = self._sym_x + self._sym_theta
        if self.output_dim > 1:
            derivative_arguments += self._sym_theta_i
        self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
        self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
        # This gives the parameters for the arg list.
        self.arg_list = self._sym_x + self._sym_z + self._sym_theta
        self.diag_arg_list = self._sym_x + self._sym_theta
        if self.output_dim > 1:
            self.arg_list += self._sym_theta_i + self._sym_theta_j
            self.diag_arg_list += self._sym_theta_i
        # Check if there are additional linear operators on the covariance.
        self._sym_operators = operators
        # TODO: Deal with linear operators
        #if self._sym_operators:
        #    for operator in self._sym_operators:
        # psi_stats aren't yet implemented.
        if False:
            self.compute_psi_stats()
        # generate the code for the covariance functions
        self._gen_code()
    def __add__(self,other):
        return spkern(self._sym_k+other._sym_k)
-    def _gen_code(self):
+    def _set_expressions(self, expressions):
-        #fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
+        """This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
-        self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
+        Symbolic_core._set_expressions(self, expressions)
-        self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
+        Symbolic_core._set_variables(self, self.cacheable)
-        self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
+        # Substitute z with x to obtain kdiag.
-        self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
+        for x, z in zip(self.variables['X'], self.variables['Z']):
            expressions['kdiag'] = expressions['kdiag'].subs(z, x)
        Symbolic_core._set_expressions(self, expressions)
    def K(self,X,X2=None):
-        self._K_computations(X, X2)
+        if X2 is None:
-        return self._K_function(**self._arguments)
+            return self.eval_function('k', X=X, Z=X)
        else:
            return self.eval_function('k', X=X, Z=X2)
    def Kdiag(self,X):
-        self._K_computations(X)
+        d = self.eval_function('kdiag', X=X)
-        return self._Kdiag_function(**self._diag_arguments)
+        if not d.shape[0] == X.shape[0]:
-
+            d = np.tile(d, (X.shape[0], 1))
-    def _param_grad_helper(self,partial,X,Z,target):
+        return d
        pass
    def gradients_X(self, dL_dK, X, X2=None):
        #if self._X is None or X.base is not self._X.base or X2 is not None:
-        self._K_computations(X, X2)
+        g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
        gradients_X = np.zeros_like(X)
        for i, x in enumerate(self._sym_x):
            gf = self._K_derivatives_code[x.name]
            gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
        if X2 is None:
-            gradients_X *= 2
+            g *= 2
-        return gradients_X
+        return g
    def gradients_X_diag(self, dL_dK, X):
-        self._K_computations(X)
+        return self.eval_gradients_X('kdiag', dL_dK, X=X)
        dX = np.zeros_like(X)
        for i, x in enumerate(self._sym_x):
            gf = self._Kdiag_derivatives_code[x.name]
            dX[:, i] = gf(**self._diag_arguments)*dL_dK
        return dX
    def update_gradients_full(self, dL_dK, X, X2=None):
        # Need to extract parameters to local variables first
-        self._K_computations(X, X2)
+        if X2 is None:
-        for theta in self._sym_theta:
+            # need to double this inside ...
-            parameter = getattr(self, theta.name)
+            gradients = self.eval_update_gradients('k', dL_dK, X=X)
-            gf = self._K_derivatives_code[theta.name]
+        else:
-            gradient = (gf(**self._arguments)*dL_dK).sum()
+            gradients = self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
-            if X2 is not None:
+
-                gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
+        for name, val in gradients:
-            setattr(parameter, 'gradient', gradient)
+            setattr(getattr(self, name), 'gradient', val)
        if self.output_dim>1:
            for theta in self._sym_theta_i:
                parameter = getattr(self, theta.name[:-2])
                gf = self._K_derivatives_code[theta.name]
                A = gf(**self._arguments)*dL_dK
                gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
                                       for i in np.arange(self.output_dim)])
                if X2 is None:
                    gradient *= 2
                else:
                    A = gf(**self._reverse_arguments)*dL_dK.T
                    gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
                                 for i in np.arange(self.output_dim)])
                setattr(parameter, 'gradient', gradient)
    def update_gradients_diag(self, dL_dKdiag, X):
-        self._K_computations(X)
+        gradients = self.eval_update_gradients('kdiag', dL_dKdiag, X)
-        for theta in self._sym_theta:
+        for name, val in gradients:
-            parameter = getattr(self, theta.name)
+            setattr(getattr(self, name), 'gradient', val)
            gf = self._Kdiag_derivatives_code[theta.name]
            setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
        if self.output_dim>1:
            for theta in self._sym_theta_i:
                parameter = getattr(self, theta.name[:-2])
                gf = self._Kdiag_derivatives_code[theta.name]
                a = gf(**self._diag_arguments)*dL_dKdiag
                setattr(parameter, 'gradient',
                        np.asarray([a[np.where(self._output_ind==i)].sum()
                         for i in np.arange(self.output_dim)]))
    def _K_computations(self, X, X2=None):
        """Set up argument lists for the derivatives."""
        # Could check if this needs doing or not, there could
        # definitely be some computational savings by checking for
        # parameter updates here.
        self._arguments = {}
        self._diag_arguments = {}
        for i, x in enumerate(self._sym_x):
            self._arguments[x.name] =  X[:, i][:, None]
            self._diag_arguments[x.name] =  X[:, i][:, None]
        if self.output_dim > 1:
            self._output_ind = np.asarray(X[:, -1], dtype='int')
            for i, theta in enumerate(self._sym_theta_i):
                self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
                self._diag_arguments[theta.name] = self._arguments[theta.name]
        for theta in self._sym_theta:
            self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
            self._diag_arguments[theta.name] = self._arguments[theta.name]
        if X2 is not None:
            for i, z in enumerate(self._sym_z):
                self._arguments[z.name] =  X2[:, i][None, :]
            if self.output_dim > 1:
                self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
                for i, theta in enumerate(self._sym_theta_j):
                    self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
        else:
            for z in self._sym_z:
                self._arguments[z.name] =  self._arguments['x_'+z.name[2:]].T
            if self.output_dim > 1:
                self._output_ind2 = self._output_ind
                for theta in self._sym_theta_j:
                    self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
        if X2 is not None:
            # These arguments are needed in gradients when X2 is not equal to X.
            self._reverse_arguments = self._arguments
            for x, z in zip(self._sym_x, self._sym_z):
                self._reverse_arguments[x.name] = self._arguments[z.name].T
                self._reverse_arguments[z.name] = self._arguments[x.name].T
            if self.output_dim > 1:
                for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
                    self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
                    self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
 if False:
    class Symcombine(CombinationKernel):
        """
        Combine list of given sympy covariances together with the provided operations.
        """
        def __init__(self, subkerns, operations, name='sympy_combine'):
            super(Symcombine, self).__init__(subkerns, name)
            for subkern, operation in zip(subkerns, operations):
                self._sym_k += self._k_double_operate(subkern._sym_k, operation)
        #def _double_operate(self, k, operation):
        @Cache_this(limit=2, force_kwargs=['which_parts'])
        def K(self, X, X2=None, which_parts=None):
            """
            Combine covariances with a linear operator.
            """
            assert X.shape[1] == self.input_dim
            if which_parts is None:
                which_parts = self.parts
            elif not isinstance(which_parts, (list, tuple)):
                # if only one part is given
                which_parts = [which_parts]
            return reduce(np.add, (p.K(X, X2) for p in which_parts))
        @Cache_this(limit=2, force_kwargs=['which_parts'])
        def Kdiag(self, X, which_parts=None):
            assert X.shape[1] == self.input_dim
            if which_parts is None:
                which_parts = self.parts
            elif not isinstance(which_parts, (list, tuple)):
                # if only one part is given
                which_parts = [which_parts]
            return reduce(np.add, (p.Kdiag(X) for p in which_parts))
        def update_gradients_full(self, dL_dK, X, X2=None):
            [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
        def update_gradients_diag(self, dL_dK, X):
            [p.update_gradients_diag(dL_dK, X) for p in self.parts]
        def gradients_X(self, dL_dK, X, X2=None):
            """Compute the gradient of the objective function with respect to X.
            :param dL_dK: An array of gradients of the objective function with respect to the covariance function.
            :type dL_dK: np.ndarray (num_samples x num_inducing)
            :param X: Observed data inputs
            :type X: np.ndarray (num_samples x input_dim)
            :param X2: Observed data inputs (optional, defaults to X)
            :type X2: np.ndarray (num_inducing x input_dim)"""
            target = np.zeros(X.shape)
            [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
            return target
        def gradients_X_diag(self, dL_dKdiag, X):
            target = np.zeros(X.shape)
            [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
            return target
        def psi0(self, Z, variational_posterior):
            return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
        def psi1(self, Z, variational_posterior):
            return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
        def psi2(self, Z, variational_posterior):
            psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
            #return psi2
            # compute the "cross" terms
            from static import White, Bias
            from rbf import RBF
            #from rbf_inv import RBFInv
            from linear import Linear
            #ffrom fixed import Fixed
            for p1, p2 in itertools.combinations(self.parts, 2):
                # i1, i2 = p1.active_dims, p2.active_dims
                # white doesn;t combine with anything
                if isinstance(p1, White) or isinstance(p2, White):
                    pass
                # rbf X bias
                #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
                elif isinstance(p1,  Bias) and isinstance(p2, (RBF, Linear)):
                    tmp = p2.psi1(Z, variational_posterior)
                    psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
                #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
                elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
                    tmp = p1.psi1(Z, variational_posterior)
                    psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
                elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
                    assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
                    tmp1 = p1.psi1(Z, variational_posterior)
                    tmp2 = p2.psi1(Z, variational_posterior)
                    psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
                else:
                    raise NotImplementedError, "psi2 cannot be computed for this kernel"
            return psi2
        def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
            from static import White, Bias
            for p1 in self.parts:
                #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
                eff_dL_dpsi1 = dL_dpsi1.copy()
                for p2 in self.parts:
                    if p2 is p1:
                        continue
                    if isinstance(p2, White):
                        continue
                    elif isinstance(p2, Bias):
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                    else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
                p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
        def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
            from static import White, Bias
            target = np.zeros(Z.shape)
            for p1 in self.parts:
                #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
                eff_dL_dpsi1 = dL_dpsi1.copy()
                for p2 in self.parts:
                    if p2 is p1:
                        continue
                    if isinstance(p2, White):
                        continue
                    elif isinstance(p2, Bias):
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                    else:
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
                target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
            return target
        def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
            from static import White, Bias
            target_mu = np.zeros(variational_posterior.shape)
            target_S = np.zeros(variational_posterior.shape)
            for p1 in self._parameters_:
                #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
                eff_dL_dpsi1 = dL_dpsi1.copy()
                for p2 in self._parameters_:
                    if p2 is p1:
                        continue
                    if isinstance(p2, White):
                        continue
                    elif isinstance(p2, Bias):
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
                    else:
                        eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
                a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
                target_mu += a
                target_S += b
            return target_mu, target_S
        def _getstate(self):
            """
            Get the current state of the class,
            here just all the indices, rest can get recomputed
            """
            return super(Add, self)._getstate()
        def _setstate(self, state):
            super(Add, self)._setstate(state)
        def add(self, other, name='sum'):
            if isinstance(other, Add):
                other_params = other._parameters_.copy()
                for p in other_params:
                    other.remove_parameter(p)
                self.add_parameters(*other_params)
            else: self.add_parameter(other)
            return self
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -7,16 +7,17 @@ from student_t import StudentT
 from likelihood import Likelihood
 from mixed_noise import MixedNoise
 # TODO need to fix this in a config file.
-try:
+# TODO need to add the files to the git repo!
-    import sympy as sym
+#try:
-    sympy_available=True
+    #import sympy as sym
-except ImportError:
+    #sympy_available=True
-    sympy_available=False
+#except ImportError:
-if sympy_available:
+    #sympy_available=False
-    # These are likelihoods that rely on symbolic.
+#if sympy_available:
-    from symbolic import Symbolic
+    ## These are likelihoods that rely on symbolic.
-    from sstudent_t import SstudentT
+    #from symbolic import Symbolic
-    from negative_binomial import Negative_binomial
+    #from sstudent_t import SstudentT
-    from skew_normal import Skew_normal
+    #from negative_binomial import Negative_binomial
-    from skew_exponential import Skew_exponential
+    ##from skew_normal import Skew_normal
    #from skew_exponential import Skew_exponential
    #from null_category import Null_category
--- a/GPy/likelihoods/skew_exponential.py
+++ b/GPy/likelihoods/skew_exponential.py
@ -0,0 +1,45 @@
 # Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import sympy as sym
 from GPy.util.symbolic import normcdfln
 import numpy as np
 from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
 #from GPy.util.functions import clip_exp
 import link_functions
 from symbolic import Symbolic
 from scipy import stats
 class Skew_exponential(Symbolic):
    """
    Negative binomial
    .. math::
    .. Note::
        Y takes real values.
        link function is identity
    .. See also::
        symbolic.py, for the parent class
    """
    def __init__(self, gp_link=None, shape=1.0, scale=1.0):
        parameters={'scale':scale, 'shape':shape}
        if gp_link is None:
            gp_link = link_functions.Identity()
        #func_modules = [{'exp':clip_exp}]
        scale = sym.Symbol('scale', positive=True, real=True)
        shape = sym.Symbol('shape', real=True)
        y_0 = sym.Symbol('y_0', real=True)
        f_0 = sym.Symbol('f_0', real=True) 
        log_pdf=sym.log(shape)-sym.log(scale)-((y_0-f_0)/scale) + normcdfln(shape*(y_0-f_0)/scale) 
        super(Skew_exponential, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Skew_exponential', parameters=parameters)
        # TODO: Check this.
        self.log_concave = True
--- a/GPy/likelihoods/skew_normal.py
+++ b/GPy/likelihoods/skew_normal.py
@ -0,0 +1,53 @@
 # Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 try:
    import sympy as sym
    sympy_available=True
    from sympy.utilities.lambdify import lambdify
    from GPy.util.symbolic import normcdfln, normcdf
 except ImportError:
    sympy_available=False
 import numpy as np
 from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
 from GPy.util.functions import clip_exp
 import link_functions
 from symbolic import Symbolic
 from scipy import stats
 class Skew_normal(Symbolic):
    """
    Skew Normal distribution.
    .. math::
    .. Note::
    Y takes real values.
    link function is identity
    .. See also::
    symbolic.py, for the parent class
    """
    def __init__(self, gp_link=None, shape=1.0, scale=1.0):
        parameters = {'scale': scale, 'shape':shape}
        if gp_link is None:
            gp_link = link_functions.Identity()
        # # this likelihood has severe problems with likelihoods saturating exponentials, so clip_exp is used in place of the true exp as a solution for dealing with the numerics.
        # func_modules = [{'exp':clip_exp}]
        func_modules = []
        scale = sym.Symbol('scale', positive=True, real=True)
        shape = sym.Symbol('shape', real=True)
        y_0 = sym.Symbol('y_0', real=True)
        f_0 = sym.Symbol('f_0', real=True) 
        log_pdf=-sym.log(scale)-1./2*sym.log(2*sym.pi)-1./2*((y_0-f_0)/scale)**2 + sym.log(2) + normcdfln(shape*(y_0-f_0)/scale) 
        super(Skew_normal, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Skew_normal', func_modules=func_modules)
        self.log_concave = True
--- a/GPy/likelihoods/sstudent_t.py
+++ b/GPy/likelihoods/sstudent_t.py
@ -0,0 +1,45 @@
 # Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import sympy as sym
 from sympy.utilities.lambdify import lambdify
 from GPy.util.symbolic import gammaln
 import numpy as np
 import link_functions
 from symbolic import Symbolic
 from scipy import stats
 class SstudentT(Symbolic):
    """
    Symbolic variant of the Student-t distribution.
    .. math::
    .. Note::
        Y takes real values.
        link function is identity
    .. See also::
        symbolic.py, for the parent class
    """
    def __init__(self, gp_link=None, deg_free=5.0, t_scale2=1.0):
        parameters = {'deg_free':5.0, 't_scale2':1.0}
        if gp_link is None:
            gp_link = link_functions.Identity()
        # this likelihood has severe problems with likelihoods saturating ...
        y_0 = sym.Symbol('y_0', real=True)
        f_0 = sym.Symbol('f_0', real=True)
        nu = sym.Symbol('nu', positive=True, real=True)
        t_scale2 = sym.Symbol('t_scale2', positive=True, real=True)
        log_pdf = (gammaln((nu + 1) * 0.5)
                - gammaln(nu * 0.5)
                - 0.5*sym.log(t_scale2 * nu * sym.pi)
                   - 0.5*(nu + 1)*sym.log(1 + (1/nu)*(((y_0-f_0)**2)/t_scale2)))
        super(SstudentT, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='SstudentT')
        self.log_concave = False
--- a/GPy/mappings/symbolic.py
+++ b/GPy/mappings/symbolic.py
@ -36,7 +36,8 @@ class Symbolic(Mapping, Symbolic_core):
        self.eval_update_cache(X=X)
    def update_gradients(self, partial, X=None):
-        self.eval_update_gradients('f', partial, X=X)
+        for name, val in self.eval_update_gradients('f', partial, X=X).iteritems():
            setattr(getattr(self, name), 'gradient', val)
    def gradients_X(self, partial, X=None):
        return self.eval_gradients_X('f', partial, X=X)
--- a/GPy/plotting/matplot_dep/base_plots.py
+++ b/GPy/plotting/matplot_dep/base_plots.py
@ -16,10 +16,9 @@ def ax_default(fignum, ax):
 def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
    _, axes = ax_default(fignum, ax)
    #here's the mean
    return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
-def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],ax=None,fignum=None,xlabel='x',ylabel='y',**kwargs):
+def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue'], ax=None, fignum=None, **kwargs):
    _, axes = ax_default(fignum, ax)
    mu = mu.flatten()
@ -42,9 +41,6 @@ def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.co
    plots.append(meanplot(x, upper,color=edgecol,linewidth=0.2,ax=axes))
    plots.append(meanplot(x, lower,color=edgecol,linewidth=0.2,ax=axes))
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    return plots
@ -53,11 +49,13 @@ def removeRightTicks(ax=None):
    for i, line in enumerate(ax.get_yticklines()):
        if i%2 == 1:   # odd indices
            line.set_visible(False)
 def removeUpperTicks(ax=None):
    ax = ax or pb.gca()
    for i, line in enumerate(ax.get_xticklines()):
        if i%2 == 1:   # odd indices
            line.set_visible(False)
 def fewerXticks(ax=None,divideby=2):
    ax = ax or pb.gca()
    ax.set_xticks(ax.get_xticks()[::divideby])
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@ -3,6 +3,20 @@ import numpy as np
 import sympy as sym
 from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
 ########################################
 ## Try to do some matrix functions: problem, you can't do derivatives
 ## with respect to matrix functions :-(
 class selector(Function):
    """A function that returns an element of a Matrix depending on input indices."""
    nargs = 3
    @classmethod
    def eval(cls, X, i, j):
        if i.is_Number and j.is_Number:
            return X[i, j]
 ##################################################    
 class logistic(Function):
    """The logistic function as a symbolic function."""
    nargs = 1