diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 15a49b77..62ea24f9 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -283,13 +283,13 @@ def Brownian(input_dim, variance=1.): part = parts.Brownian.Brownian(input_dim, variance) return kern(input_dim, [part]) -try: - import sympy as sp - from sympykern import spkern - from sympy.parsing.sympy_parser import parse_expr - sympy_available = True -except ImportError: - sympy_available = False +#try: +import sympy as sp +from parts.sympykern import spkern +from sympy.parsing.sympy_parser import parse_expr +sympy_available = True +#except ImportError: +# sympy_available = False if sympy_available: def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.): @@ -298,24 +298,35 @@ if sympy_available: """ X = [sp.var('x%i' % i) for i in range(input_dim)] Z = [sp.var('z%i' % i) for i in range(input_dim)] - rbf_variance = sp.var('rbf_variance',positive=True) + variance = sp.var('variance',positive=True) if ARD: - rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)] - dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) + lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)] + dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) - f = rbf_variance*sp.exp(-dist/2.) + f = variance*sp.exp(-dist/2.) else: - rbf_lengthscale = sp.var('rbf_lengthscale',positive=True) + lengthscale = sp.var('lengthscale',positive=True) dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) - f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2)) - return kern(input_dim, [spkern(input_dim, f)]) + f = variance*sp.exp(-dist/(2*lengthscale**2)) + return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')]) - def sympykern(input_dim, k): + def sympykern(input_dim, k,name=None): """ - A kernel from a symbolic sympy representation + A base kernel object, where all the hard work in done by sympy. + + :param k: the covariance function + :type k: a positive definite sympy function of x1, z1, x2, z2... + + To construct a new sympy kernel, you'll need to define: + - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z). + - that's it! we'll extract the variables from the function k. + + Note: + - to handle multiple inputs, call them x1, z1, etc + - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO """ - return kern(input_dim, [spkern(input_dim, k)]) + return kern(input_dim, [spkern(input_dim, k,name)]) del sympy_available def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi): diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/parts/eq_ode1.py index 6fd5fb91..70e3c49d 100644 --- a/GPy/kern/parts/eq_ode1.py +++ b/GPy/kern/parts/eq_ode1.py @@ -120,14 +120,75 @@ class Eq_ode1(Kernpart): t2_mat = self._t2[None, self._rorder2] target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat)) - - def Kdiag(self,index,target): #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] pass - def dK_dtheta(self,dL_dK,index,index2,target): - pass + def dK_dtheta(self,dL_dK,X,X2,target): + + # First extract times and indices. + self._extract_t_indices(X, X2, dL_dK=dL_dK) + self._dK_ode_dtheta(target) + + + def _dK_ode_dtheta(self, target): + """Do all the computations for the ode parts of the covariance function.""" + t_ode = self._t[self._index>0] + dL_dK_ode = self._dL_dK[self._index>0, :] + index_ode = self._index[self._index>0]-1 + if self._t2 is None: + if t_ode.size==0: + return + t2_ode = t_ode + dL_dK_ode = dL_dK_ode[:, self._index>0] + index2_ode = index_ode + else: + t2_ode = self._t2[self._index2>0] + dL_dK_ode = dL_dK_ode[:, self._index2>0] + if t_ode.size==0 or t2_ode.size==0: + return + index2_ode = self._index2[self._index2>0]-1 + + h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True) + #self._dK_ddelay = self._dh_ddelay + self._dK_dsigma = self._dh_dsigma + + if self._t2 is None: + h2 = h1 + else: + h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True) + + #self._dK_ddelay += self._dh_ddelay.T + self._dK_dsigma += self._dh_dsigma.T + # C1 = self.sensitivity + # C2 = self.sensitivity + + # K = 0.5 * (h1 + h2.T) + # var2 = C1*C2 + # if self.is_normalized: + # dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2 + # dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2 + # dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma)) + # dk_dC1 = C2 * sum(sum(dL_dK.*K)) + # dk_dC2 = C1 * sum(sum(dL_dK.*K)) + # else: + # K = np.sqrt(np.pi) * K + # dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K)) + # dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K)) + + + # dk_dSim1Variance = dk_dC1 + # Last element is the length scale. + (dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2) + + target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum() + + + # # only pass the gradient with respect to the inverse width to one + # # of the gradient vectors ... otherwise it is counted twice. + # g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance]) + # g2 = real([dk_dD2 0 dk_dSim2Variance]) + # return g1, g2""" def dKdiag_dtheta(self,dL_dKdiag,index,target): pass @@ -135,7 +196,7 @@ class Eq_ode1(Kernpart): def dK_dX(self,dL_dK,X,X2,target): pass - def _extract_t_indices(self, X, X2=None): + def _extract_t_indices(self, X, X2=None, dL_dK=None): """Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """ # TODO: some fast checking here to see if this needs recomputing? @@ -153,6 +214,7 @@ class Eq_ode1(Kernpart): if X2 is None: self._t2 = None self._index2 = None + self._order2 = self._order self._rorder2 = self._rorder else: if not X2.shape[1] == 2: @@ -164,6 +226,10 @@ class Eq_ode1(Kernpart): self._t2 = self._t2[self._order2] self._rorder2 = self._order2.argsort() # rorder2 is for reversing order + if dL_dK is not None: + self._dL_dK = dL_dK[self._order, :] + self._dL_dK = self._dL_dK[:, self._order2] + def _K_computations(self, X, X2): """Perform main body of computations for the ode1 covariance function.""" # First extract times and indices. @@ -279,16 +345,16 @@ class Eq_ode1(Kernpart): decay_vals = self.decay[index_ode][:, None] half_sigma_d_i = 0.5*self.sigma*decay_vals - if self.is_stationary == False: - ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True) - else: + if self.is_stationary: ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True) + else: + ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True) sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part) sK *= 0.5 if not self.is_normalized: - sK *= sqrt(pi)*self.sigma + sK *= np.sqrt(np.pi)*self.sigma if transpose: @@ -315,24 +381,84 @@ class Eq_ode1(Kernpart): index2_ode = self._index2[self._index2>0]-1 # When index is identical - if self.is_stationary: - h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode) - else: - h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode) + h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary) if self._t2 is None: self._K_ode = 0.5 * (h + h.T) else: - if self.is_stationary: - h2 = self._compute_H_stat(t2_ode, index2_ode, t_ode, index_ode) - else: - h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode) + h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary) self._K_ode = 0.5 * (h + h2.T) if not self.is_normalized: self._K_ode *= np.sqrt(np.pi)*self.sigma + def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False): + """Helper function for computing H for the diagonal only. + :param t: time input. + :type t: array + :param index: first output indices + :type index: array of int. + :param index: second output indices + :type index: array of int. + :param update_derivatives: whether or not to update the derivative portions (default False). + :type update_derivatives: bool + :param stationary: whether to compute the stationary version of the covariance (default False). + :type stationary: bool""" + + """if delta_i~=delta_j: + [h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary)) + else: + Decay = self.decay[index] + if self.delay is not None: + t = t - self.delay[index] + + t_squared = t*t + half_sigma_decay = 0.5*self.sigma*Decay + [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma, + half_sigma_decay) - def _compute_H(self, t, index, t2, index2, update_derivatives=False): + [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay, + half_sigma_decay - t/self.sigma) + + h = (sign1*np.exp(half_sigma_decay*half_sigma_decay + + ln_part_1 + - log(Decay + D_j)) + - sign2*np.exp(half_sigma_decay*half_sigma_decay + - (Decay + D_j)*t + + ln_part_2 + - log(Decay + D_j))) + + sigma2 = self.sigma*self.sigma + + if update_derivatives: + + dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h + + t*sign2*np.exp( + half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2 + ) + + self.sigma/np.sqrt(np.pi)* + (-1 + np.exp(-t_squared/sigma2-Decay*t) + + np.exp(-t_squared/sigma2-D_j*t) + - np.exp(-(Decay + D_j)*t))) + + dh_dD_i = (dh_dD_i/(Decay+D_j)).real + + + + dh_dD_j = (t*sign2*np.exp( + half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2 + ) + -h) + dh_dD_j = (dh_dD_j/(Decay + D_j)).real + + dh_dsigma = 0.5*Decay*Decay*self.sigma*h \ + + 2/(np.sqrt(np.pi)*(Decay+D_j))\ + *((-Decay/2) \ + + (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \ + - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \ + - Decay/2*np.exp(-(Decay+D_j)*t))""" + pass + + def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False): """Helper function for computing part of the ode1 covariance function. :param t: first time input. @@ -348,44 +474,16 @@ class Eq_ode1(Kernpart): :rtype: ndarray """ - + if stationary: + raise NotImplementedError, "Error, stationary version of this covariance not yet implemented." # Vector of decays and delays associated with each output. - Decay = np.zeros(t.shape[0]) - Decay2 = np.zeros(t2.shape[0]) - Delay = np.zeros(t.shape[0]) - Delay2 = np.zeros(t2.shape[0]) - code=""" - for(int i=0;i double DiracDelta(double x){ - if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills + // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil + if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills return 1.0; else return 0.0; diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index def1bc5f..9755e37b 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -26,8 +26,11 @@ class spkern(Kernpart): - to handle multiple inputs, call them x1, z1, etc - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO """ - def __init__(self,input_dim,k,param=None): - self.name='sympykern' + def __init__(self,input_dim,k,name=None,param=None): + if name is None: + self.name='sympykern' + else: + self.name = name self._sp_k = k sp_vars = [e for e in k.atoms() if e.is_Symbol] self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:])) @@ -56,9 +59,9 @@ class spkern(Kernpart): self.weave_kwargs = {\ 'support_code':self._function_code,\ - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\ + 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\ 'headers':['"sympy_helpers.h"'],\ - 'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\ + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\ #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\ 'extra_compile_args':[],\ 'extra_link_args':['-lgomp'],\ @@ -109,14 +112,15 @@ class spkern(Kernpart): f.write(self._function_header) f.close() - #get rid of derivatives of DiracDelta + # Substitute any known derivatives which sympy doesn't compute self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - #Here's some code to do the looping for K - arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\ - + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\ - + ["param[%i]"%i for i in range(self.num_params)]) + # Here's the code to do the looping for K + arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x] + + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z] + + ["param[%i]"%i for i in range(self.num_params)]) + self._K_code =\ """ int i; @@ -133,9 +137,14 @@ class spkern(Kernpart): %s """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed + # Similar code when only X is provided. + self._K_code_X = self._K_code.replace('Z[', 'X[') + + + # Code to compute diagonal of covariance. diag_arglist = re.sub('Z','X',arglist) diag_arglist = re.sub('j','i',diag_arglist) - #Here's some code to do the looping for Kdiag + # Code to do the looping for Kdiag self._Kdiag_code =\ """ int i; @@ -148,8 +157,9 @@ class spkern(Kernpart): %s """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed - #here's some code to compute gradients + # Code to compute gradients funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)]) + self._dK_dtheta_code =\ """ int i; @@ -164,9 +174,12 @@ class spkern(Kernpart): } } %s - """%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed + """%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed - #here's some code to compute gradients for Kdiag TODO: thius is yucky. + # Similar code when only X is provided, change argument lists. + self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[') + + # Code to compute gradients for Kdiag TODO: needs clean up diag_funclist = re.sub('Z','X',funclist,count=0) diag_funclist = re.sub('j','i',diag_funclist) diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist) @@ -181,8 +194,12 @@ class spkern(Kernpart): %s """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed - #Here's some code to do gradients wrt x + # Code for gradients wrt X gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)]) + if False: + gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));} + if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}""" + self._dK_dX_code = \ """ int i; @@ -192,30 +209,34 @@ class spkern(Kernpart): int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int num_inducing = 0; int input_dim = X_array->dimensions[1]; - for (i=0;i