diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py index a09d4bfc..3d6517a8 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/sympykern.py @@ -1,16 +1,34 @@ -import numpy as np -import sympy as sp -from sympy.utilities.codegen import codegen +# Check Matthew Rocklin's blog post. +try: + import sympy as sp + sympy_available=True +except ImportError: + sympy_available=False + exit() + from sympy.core.cache import clear_cache -from scipy import weave -import re +from sympy.utilities.codegen import codegen + +try: + from scipy import weave + weave_available = True +except ImportError: + weave_available = False + import os -import sys current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +import sys +import numpy as np +import re import tempfile import pdb import ast + from kernpart import Kernpart +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +# TODO have this set up in a set up file! +user_code_storage = tempfile.gettempdir() class spkern(Kernpart): """ @@ -28,96 +46,117 @@ class spkern(Kernpart): - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): + if name is None: - self.name='sympykern' - else: - self.name = name + name='sympykern' if k is None: raise ValueError, "You must provide an argument for the covariance function." + super(spkern, self).__init__(input_dim, name) + self._sp_k = k + + # pull the variable names out of the symbolic covariance function. sp_vars = [e for e in k.atoms() if e.is_Symbol] self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) + # Check that variable names make sense. assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) - self.input_dim = len(self._sp_x) - self._real_input_dim = self.input_dim - if output_dim > 1: - self.input_dim += 1 - assert self.input_dim == input_dim + x_dim=len(self._sp_x) + + # If it is a multi-output covariance, add an input for indexing the outputs. + self._real_input_dim = x_dim + # Check input dim is number of xs + 1 if output_dim is >1 + assert self.input_dim == x_dim + int(output_dim > 1) self.output_dim = output_dim - # extract parameter names + + # extract parameter names from the covariance thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - # Look for parameters with index. + # Look for parameters with index (subscripts), they are associated with different outputs. if self.output_dim>1: self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) + # Make sure parameter appears with both indices! assert len(self._sp_theta_i)==len(self._sp_theta_j) assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) - # Extract names of shared parameters + # Extract names of shared parameters (those without a subscript) self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] self.num_split_params = len(self._sp_theta_i) self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] for theta in self._split_theta_names: - setattr(self, theta, np.ones(self.output_dim)) + setattr(self, theta, Param(theta, np.ones(self.output_dim), None)) + self.add_parameters(getattr(self, theta)) + + #setattr(self, theta, np.ones(self.output_dim)) self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params+self.num_split_params*self.output_dim + #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim else: self.num_split_params = 0 self._split_theta_names = [] self._sp_theta = thetas self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params - + #self.num_params = self.num_shared_params + + # Add parameters to the model. for theta in self._sp_theta: val = 1.0 if param is not None: if param.has_key(theta): val = param[theta] - setattr(self, theta.name, val) + #setattr(self, theta.name, val) + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) #deal with param - self._set_params(self._get_params()) + #self._set_params(self._get_params()) - #Differentiate! + # Differentiate with respect to parameters. self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] if self.output_dim > 1: self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] - + + # differentiate with respect to input variables. self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] + # psi_stats aren't yet implemented. if False: self.compute_psi_stats() + self._code = {} + + # generate the code for the covariance functions self._gen_code() - if False: - extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] - else: - extra_compile_args = [] + if weave_available: + if False: + extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] + else: + extra_compile_args = [] - self.weave_kwargs = { - 'support_code':self._function_code, - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], - 'headers':['"sympy_helpers.h"'], - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], - 'extra_compile_args':extra_compile_args, - 'extra_link_args':['-lgomp'], - 'verbose':True} + self.weave_kwargs = { + 'support_code': None, #self._function_code, + 'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], + 'verbose':True} + self.parameters_changed() # initializes caches + def __add__(self,other): return spkern(self._sp_k+other._sp_k) def _gen_code(self): - #generate c functions from sympy objects + argument_sequence = self._sp_x+self._sp_z+self._sp_theta code_list = [('k',self._sp_k)] # gradients with respect to covariance input @@ -128,194 +167,268 @@ class spkern(Kernpart): if self.output_dim > 1: argument_sequence += self._sp_theta_i + self._sp_theta_j code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] + # generate c functions from sympy objects + if weave_available: + code_type = "C" + else: + code_type = "PYTHON" + # Need to add the sympy_helpers header in here. (foo_c,self._function_code), (foo_h,self._function_header) = \ - codegen(code_list, "C",'foobar',argument_sequence=argument_sequence) - #put the header file where we can find it - f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w') - f.write(self._function_header) + codegen(code_list, + code_type, + self.name, + argument_sequence=argument_sequence) + + + # Use weave to compute the underlying functions. + if weave_available: + # put the header file where we can find it + f = file(os.path.join(user_code_storage, self.name + '.h'),'w') + f.write(self._function_header) + f.close() + + + if weave_available: + # Substitute any known derivatives which sympy doesn't compute + self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + # put the cpp file in user code storage (defaults to temp file location) + f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w') + else: + # put the python file in user code storage + f = file(os.path.join(user_code_storage, self.name + '.py'),'w') + f.write(self._function_code) f.close() - # Substitute any known derivatives which sympy doesn't compute - self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + if weave_available: + # arg_list will store the arguments required for the C code. + input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - # This is the basic argument construction for the C code. - #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] - # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) - arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] - + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - if self.output_dim>1: - reverse_arg_list = list(arg_list) - reverse_arg_list.reverse() + # for multiple outputs reverse argument list is also required + if self.output_dim>1: + reverse_input_arg_list = list(input_arg_list) + reverse_input_arg_list.reverse() - param_arg_list = [shared_params.name for shared_params in self._sp_theta] - arg_list += param_arg_list + # This gives the parameters for the arg list. + param_arg_list = [shared_params.name for shared_params in self._sp_theta] + arg_list = input_arg_list + param_arg_list - precompute_list=[] - if self.output_dim > 1: - reverse_arg_list+=list(param_arg_list) - split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] - split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] - arg_list += split_param_arg_list - reverse_arg_list += split_param_reverse_arg_list - # Extract the right output indices from the inputs. - c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] - precompute_list += c_define_output_indices - reverse_arg_string = ", ".join(reverse_arg_list) - arg_string = ", ".join(arg_list) - precompute_string = "\n".join(precompute_list) - # Here's the code to do the looping for K - self._K_code =\ - """ - // _K_code - // Code for computing the covariance function. - int i; - int j; - int N = target_array->dimensions[0]; - int num_inducing = target_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;i 1: + reverse_arg_list= reverse_input_arg_list + list(param_arg_list) + # For multiple outputs, also need the split parameters. + split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] + split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] + arg_list += split_param_arg_list + reverse_arg_list += split_param_reverse_arg_list + # Extract the right output indices from the inputs. + c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] + precompute_list += c_define_output_indices + reverse_arg_string = ", ".join(reverse_arg_list) + arg_string = ", ".join(arg_list) + precompute_string = "\n".join(precompute_list) + + # Now we use the arguments in code that computes the separate parts. + + # Any precomputations will be done here eventually. + self._precompute = \ + """ + // Precompute code would go here. It will be called when parameters are updated. + """ + + # Here's the code to do the looping for K + self._code['K'] =\ + """ + // _K_code + // Code for computing the covariance function. + int i; + int j; + int n = target_array->dimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for - for (i=0;i1: - grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) - grad_func_string = '\n'.join(grad_func_list) - - self._dK_dtheta_code =\ - """ - // _dK_dtheta_code - // Code for computing gradient of covariance with respect to parameters. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for + for (i=0;i1: + for i, theta in enumerate(self._sp_theta_i): + grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)] + grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)] + grad_func_list = c_define_output_indices+grad_func_list + + grad_func_string = '\n'.join(grad_func_list) + self._code['dK_d' + theta.name] =\ + """ + int i; + int j; + int n = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;idimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;i1: + gradX_func_list += c_define_output_indices + gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_string = "\n".join(gradX_func_list) + + self._code['dK_dX'] = \ + """ + // _dK_dX_code + // Code for computing gradient of covariance with respect to inputs. + int i; + int j; + int n = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;i1: - gradX_func_list += c_define_output_indices - gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] - gradX_func_string = "\n".join(gradX_func_list) - - self._dK_dX_code = \ - """ - // _dK_dX_code - // Code for computing gradient of covariance with respect to inputs. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (int i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (int i=0;i1: - for i, split_params in enumerate(self._split_theta_names): - start = self.num_shared_params + i*self.output_dim - end = self.num_shared_params + (i+1)*self.output_dim - setattr(self, split_params, param[start:end]) - - - def _get_params(self): - params = np.zeros(0) + def update_gradients_full(self, dL_dK, X): + # Need to extract parameters to local variables first + self._K_computations(X, None) for shared_params in self._sp_theta: - params = np.hstack((params, getattr(self, shared_params.name))) - if self.output_dim>1: - for split_params in self._split_theta_names: - params = np.hstack((params, getattr(self, split_params).flatten())) - return params + parameter = getattr(self, shared_params.name) + code = self._code['dK_d' + shared_params.name] + setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) + + for split_params in self._split_theta_names: + parameter = getattr(self, split_params.name) + code = self._code['dK_d' + split_params.name] + setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) + - def _get_param_names(self): - if self.output_dim>1: - return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] + # def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + # #contributions from Kdiag + # self.variance.gradient = np.sum(dL_dKdiag) + + # #from Knm + # self._K_computations(X, Z) + # self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) + # if self.ARD: + # self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) + + # else: + # self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) + + # #from Kmm + # self._K_computations(Z, None) + # self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) + # if self.ARD: + # self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) + # else: + # self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) + + + #---------------------------------------# + # Precomputations # + #---------------------------------------# + + def _K_computations(self, X, Z): + if Z is None: + self._generate_inline(self._precompute, X) else: - return [x.name for x in self._sp_theta] + self._generate_inline(self._precompute, X, Z=Z)