diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index e6952186..a1252052 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -302,8 +302,8 @@ if sympy_available: Z = sp.symbols('z_:' + str(input_dim)) variance = sp.var('variance',positive=True) if ARD: - lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)] - dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) + lengthscales = sp.symbols('lengthscale_:' + str(input_dim)) + dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) f = variance*sp.exp(-dist/2.) else: @@ -313,6 +313,25 @@ if sympy_available: f = variance*sp.exp(-dist/(2*lengthscale**2)) return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')]) + def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.): + """ + Exponentiated quadratic with multiple outputs. + """ + X = sp.symbols('x_:' + str(input_dim)) + Z = sp.symbols('z_:' + str(input_dim)) + variance = sp.var('variance',positive=True) + if ARD: + lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(input_dim)] + dist_string = ' + '.join(['(x_%i-z_%i)**2/(lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(input_dim)]) + dist = parse_expr(dist_string) + f = variance*sp.exp(-dist/2.) + else: + lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True) + dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)]) + dist = parse_expr(dist_string) + f = variance*sp.exp(-dist/(2*lengthscale**2)) + return kern(input_dim, [spkern(input_dim, f, name='eq_sympy')]) + def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): """ TODO: Not clear why this isn't working, suggests argument of sinc is not a number. diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 5a8882dd..97084aa9 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -672,8 +672,13 @@ def kern_test(kern, X=None, X2=None, verbose=False): pass_checks = True if X==None: X = np.random.randn(10, kern.input_dim) + for ind in kern.output_indicator: + X[:, ind] = np.random.randint(kern.output_dim, X.shape[0]) if X2==None: X2 = np.random.randn(20, kern.input_dim) + for ind in kern.output_indicator: + X2[:, ind] = np.random.randint(kern.output_dim, X2.shape[0]) + if verbose: print("Checking covariance function is positive definite.") result = Kern_check_model(kern, X=X).is_positive_definite() diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py index 475d835f..95deeb81 100644 --- a/GPy/kern/parts/kernpart.py +++ b/GPy/kern/parts/kernpart.py @@ -5,15 +5,20 @@ class Kernpart(object): def __init__(self,input_dim): """ - The base class for a kernpart: a positive definite function which forms part of a kernel + The base class for a kernpart: a positive definite function which forms part of a covariance function (kernel). :param input_dim: the number of input dimensions to the function :type input_dim: int Do not instantiate. """ + # stores indices of any inputs that are for indicating outputs + self.output_indicator = [] + # the input dimensionality for the covariance self.input_dim = input_dim + # the number of optimisable parameters self.num_params = 1 + # the name of the covariance function. self.name = 'unnamed' def _get_params(self): diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index dc6a5390..a9f73436 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -27,7 +27,7 @@ class spkern(Kernpart): - to handle multiple inputs, call them x_1, z_1, etc - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ - def __init__(self,input_dim, k=None, output_dim=1, name=None, param=None): + def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): if name is None: self.name='sympykern' else: @@ -44,7 +44,9 @@ class spkern(Kernpart): assert len(self._sp_x)==len(self._sp_z) self.input_dim = len(self._sp_x) if output_dim > 1: + self.output_indicator=[self.input_dim] self.input_dim += 1 + assert self.input_dim == input_dim self.output_dim = output_dim # extract parameter names @@ -63,26 +65,28 @@ class spkern(Kernpart): self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] self.num_split_params = len(self._sp_theta_i) - self._split_param_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] - for params in self._split_param_names: - setattr(self, params, np.ones(self.output_dim)) + self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] + for theta in self._split_theta_names: + setattr(self, theta, np.ones(self.output_dim)) self.num_shared_params = len(self._sp_theta) self.num_params = self.num_shared_params+self.num_split_params*self.output_dim else: self.num_split_params = 0 - self._split_param_names = [] + self._split_theta_names = [] self._sp_theta = thetas self.num_shared_params = len(self._sp_theta) self.num_params = self.num_shared_params - - #deal with param - if param is None: - param = np.ones(self.num_params) - - assert param.size==self.num_params - self._set_params(param) + + for theta in self._sp_theta: + val = 1.0 + if param is not None: + if param.has_key(theta): + val = param[theta] + setattr(self, theta, val) + #deal with param + self._set_params(self._get_params()) #Differentiate! self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] @@ -90,53 +94,29 @@ class spkern(Kernpart): self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] - #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z] - #self.compute_psi_stats() + if False: + self.compute_psi_stats() + self._gen_code() - self.weave_kwargs = {\ - 'support_code':self._function_code,\ - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\ - 'headers':['"sympy_helpers.h"'],\ - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\ - #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\ - 'extra_compile_args':[],\ - 'extra_link_args':['-lgomp'],\ + if False: + extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] + else: + extra_compile_args = [] + + self.weave_kwargs = { + 'support_code':self._function_code, + 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], 'verbose':True} def __add__(self,other): return spkern(self._sp_k+other._sp_k) - def compute_psi_stats(self): - #define some normal distributions - mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] - Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] - normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] - - #do some integration! - #self._sp_psi0 = ?? - self._sp_psi1 = self._sp_k - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi1 *= normals[i] - self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi1 = self._sp_psi1.simplify() - - #and here's psi2 (eek!) - zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] - self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi2 *= normals[i] - self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi2 = self._sp_psi2.simplify() - - def _gen_code(self): #generate c functions from sympy objects argument_sequence = self._sp_x+self._sp_z+self._sp_theta @@ -201,8 +181,10 @@ class spkern(Kernpart): # Code to compute diagonal of covariance. diag_arg_string = re.sub('Z','X',arg_string) + diag_arg_string = re.sub('int jj','//int jj',diag_arg_string) diag_arg_string = re.sub('j','i',diag_arg_string) - diag_precompute_string = re.sub('Z','X',precompute_string) + diag_precompute_string = re.sub('int jj','//int jj',precompute_string) + diag_precompute_string = re.sub('Z','X',diag_precompute_string) diag_precompute_string = re.sub('j','i',diag_precompute_string) # Code to do the looping for Kdiag self._Kdiag_code =\ @@ -245,6 +227,7 @@ class spkern(Kernpart): # Code to compute gradients for Kdiag TODO: needs clean up diag_func_string = re.sub('Z','X',func_string,count=0) + diag_func_string = re.sub('int jj','//int jj',diag_func_string) diag_func_string = re.sub('j','i',diag_func_string) diag_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_func_string) self._dKdiag_dtheta_code =\ @@ -279,6 +262,7 @@ class spkern(Kernpart): diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0) + diag_gradient_funcs = re.sub('int jj','//int jj',diag_gradient_funcs) diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs) diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs) @@ -312,7 +296,7 @@ class spkern(Kernpart): if partial is not None: arg_names += ['partial'] if self.output_dim>1: - arg_names += self._split_param_names + arg_names += self._split_theta_names arg_names += ['output_dim'] return arg_names @@ -320,7 +304,7 @@ class spkern(Kernpart): param, output_dim = self._shared_params, self.output_dim # Need to extract parameters first - for split_params in self._split_param_names: + for split_params in self._split_theta_names: locals()[split_params] = getattr(self, split_params) arg_names = self._get_arg_names(Z, partial) weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs) @@ -353,21 +337,55 @@ class spkern(Kernpart): def dKdiag_dX(self,partial,X,target): self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial) - def _set_params(self,param): - #print param.flags['C_CONTIGUOUS'] + def compute_psi_stats(self): + #define some normal distributions + mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] + Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] + normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] + + #do some integration! + #self._sp_psi0 = ?? + self._sp_psi1 = self._sp_k + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) + sys.stdout.flush() + self._sp_psi1 *= normals[i] + self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) + clear_cache() + self._sp_psi1 = self._sp_psi1.simplify() + + #and here's psi2 (eek!) + zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] + self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) + sys.stdout.flush() + self._sp_psi2 *= normals[i] + self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) + clear_cache() + self._sp_psi2 = self._sp_psi2.simplify() + + + def _set_params(self,param): assert param.size == (self.num_params) - self._shared_params = param[0:self.num_shared_params] + for i, shared_params in enumerate(self._sp_theta): + start = i + end = i+1 + setattr(self, shared_params, param[start:end]) + if self.output_dim>1: - for i, split_params in enumerate(self._split_param_names): + for i, split_params in enumerate(self._split_theta_names): start = self.num_shared_params + i*self.output_dim end = self.num_shared_params + (i+1)*self.output_dim setattr(self, split_params, param[start:end]) def _get_params(self): - params = self._shared_params + params = np.zeros(0) + for shared_params in self._sp_theta: + params = np.hstack((params, getattr(self, shared_params))) if self.output_dim>1: - for split_params in self._split_param_names: + for split_params in self._split_theta_names: params = np.hstack((params, getattr(self, split_params).flatten())) return params diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 87d4a20e..e0a87169 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -25,6 +25,14 @@ class KernelTests(unittest.TestCase): kern = GPy.kern.rbf_sympy(5) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + def test_eq_sympykernel(self): + kern = GPy.kern.eq_sympy(5, 3) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_sinckernel(self): + kern = GPy.kern.sinc(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + def test_rbf_invkernel(self): kern = GPy.kern.rbf_inv(5) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 79bc3fc3..2ff168b3 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -491,11 +491,11 @@ def ripley_synth(data_set='ripley_prnn_data'): def osu_run1(data_set='osu_run1', sample_every=4): if not data_available(data_set): download_data(data_set) - zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r') + zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r') path = os.path.join(data_path, data_set) for name in zip.namelist(): zip.extract(name, path) - Y, connect = GPy.util.mocap.load_text_data('Aug210107', path) + Y, connect = GPy.util.mocap.load_text_data('Aug210106', path) Y = Y[0:-1:sample_every, :] return data_details_return({'Y': Y, 'connect' : connect}, data_set)