From d1b6d18ddf341ee879083ded17d62ce90e4aa120 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 18 Feb 2014 18:49:13 -0500 Subject: [PATCH 01/38] Changes to sympy covariance. --- GPy/kern/parts/sympykern.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index a09d4bfc..2d015b27 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -2,6 +2,7 @@ import numpy as np import sympy as sp from sympy.utilities.codegen import codegen from sympy.core.cache import clear_cache + from scipy import weave import re import os @@ -28,39 +29,47 @@ class spkern(Kernpart): - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): + if name is None: - self.name='sympykern' - else: - self.name = name + name='sympykern' if k is None: raise ValueError, "You must provide an argument for the covariance function." + super(spkern, self).__init__(input_dim, name) + self._sp_k = k + + # pull the variable names out of the symbolic covariance function. sp_vars = [e for e in k.atoms() if e.is_Symbol] self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) + # Check that variable names make sense. assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) - self.input_dim = len(self._sp_x) + assert len(self._sp_x)==input_dim + + # If it is a multi-output covariance, add an input for indexing the outputs. self._real_input_dim = self.input_dim if output_dim > 1: self.input_dim += 1 assert self.input_dim == input_dim self.output_dim = output_dim - # extract parameter names + + # extract parameter names from the covariance thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - # Look for parameters with index. + # Look for parameters with index (subscripts), they are associated with different outputs. if self.output_dim>1: self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) + # Make sure parameter appears with both indices! assert len(self._sp_theta_i)==len(self._sp_theta_j) assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) - # Extract names of shared parameters + # Extract names of shared parameters (those without a subscript) self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] self.num_split_params = len(self._sp_theta_i) @@ -77,7 +86,8 @@ class spkern(Kernpart): self._sp_theta = thetas self.num_shared_params = len(self._sp_theta) self.num_params = self.num_shared_params - + + # Add parameters to the model. for theta in self._sp_theta: val = 1.0 if param is not None: @@ -87,18 +97,22 @@ class spkern(Kernpart): #deal with param self._set_params(self._get_params()) - #Differentiate! + # Differentiate with respect to parameters. self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] if self.output_dim > 1: self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] - + + # differentiate with respect to input variables. self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] + # psi_stats aren't yet implemented. if False: self.compute_psi_stats() + # generate the code for the covariance functions self._gen_code() + if weave if False: extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] else: From f6484bcbd03110e5d7a0d27a84463e803038a9fc Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 18 Feb 2014 19:37:53 -0500 Subject: [PATCH 02/38] Using params class with sympy covariance. Adding conditional statements for presence of weave. --- GPy/kern/parts/sympykern.py | 511 ++++++++++++++++++++---------------- 1 file changed, 284 insertions(+), 227 deletions(-) diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index 2d015b27..a5bb7b1d 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -1,17 +1,31 @@ -import numpy as np -import sympy as sp -from sympy.utilities.codegen import codegen -from sympy.core.cache import clear_cache +try: + import sympy as sp + sympy_available=True +except ImportError: + sympy_available=False + exit() + +from sympy.core.cache import clear_cache +from sympy.utilities.codegen import codegen + +try: + from scipy import weave + weave_available = True +except ImportError: + weave_available = False -from scipy import weave -import re import os -import sys current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +import sys +import numpy as np +import re import tempfile import pdb import ast + from kernpart import Kernpart +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class spkern(Kernpart): """ @@ -75,17 +89,20 @@ class spkern(Kernpart): self.num_split_params = len(self._sp_theta_i) self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] for theta in self._split_theta_names: - setattr(self, theta, np.ones(self.output_dim)) + setattr(self, theta, Param(theta, np.ones(self.output_dim), None)) + self.add_parameters(getattr(self, theta)) + + #setattr(self, theta, np.ones(self.output_dim)) self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params+self.num_split_params*self.output_dim + #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim else: self.num_split_params = 0 self._split_theta_names = [] self._sp_theta = thetas self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params + #self.num_params = self.num_shared_params # Add parameters to the model. for theta in self._sp_theta: @@ -93,9 +110,12 @@ class spkern(Kernpart): if param is not None: if param.has_key(theta): val = param[theta] - setattr(self, theta.name, val) + #setattr(self, theta.name, val) + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) + self.parameters_changed() # initializes cache #deal with param - self._set_params(self._get_params()) + #self._set_params(self._get_params()) # Differentiate with respect to parameters. self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] @@ -112,26 +132,26 @@ class spkern(Kernpart): # generate the code for the covariance functions self._gen_code() - if weave - if False: - extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] - else: - extra_compile_args = [] + if weave_available: + if False: + extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] + else: + extra_compile_args = [] - self.weave_kwargs = { - 'support_code':self._function_code, - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], - 'headers':['"sympy_helpers.h"'], - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], - 'extra_compile_args':extra_compile_args, - 'extra_link_args':['-lgomp'], - 'verbose':True} + self.weave_kwargs = { + 'support_code':self._function_code, + 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], + 'verbose':True} def __add__(self,other): return spkern(self._sp_k+other._sp_k) def _gen_code(self): - #generate c functions from sympy objects + argument_sequence = self._sp_x+self._sp_z+self._sp_theta code_list = [('k',self._sp_k)] # gradients with respect to covariance input @@ -142,193 +162,224 @@ class spkern(Kernpart): if self.output_dim > 1: argument_sequence += self._sp_theta_i + self._sp_theta_j code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] + # generate c functions from sympy objects + if weave_available: + code_type = "C" + else: + code_type = "PYTHON" (foo_c,self._function_code), (foo_h,self._function_header) = \ - codegen(code_list, "C",'foobar',argument_sequence=argument_sequence) - #put the header file where we can find it - f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w') - f.write(self._function_header) - f.close() + codegen(code_list, + code_type, + self.name, + argument_sequence=argument_sequence) + + # Use weave to compute the underlying functions. + if weave_available: + # put the header file where we can find it + f = file(os.path.join(tempfile.gettempdir(), self.name + '.h'),'w') + f.write(self._function_header) + f.close() + + # Substitute any known derivatives which sympy doesn't compute self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - # This is the basic argument construction for the C code. - #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] - # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) - arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] - + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - if self.output_dim>1: - reverse_arg_list = list(arg_list) - reverse_arg_list.reverse() + if weave_available: + # arg_list will store the arguments required for the C code. + arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - param_arg_list = [shared_params.name for shared_params in self._sp_theta] - arg_list += param_arg_list + # for multiple outputs reverse argument list is also required + if self.output_dim>1: + reverse_arg_list = list(arg_list) + reverse_arg_list.reverse() - precompute_list=[] - if self.output_dim > 1: - reverse_arg_list+=list(param_arg_list) - split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] - split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] - arg_list += split_param_arg_list - reverse_arg_list += split_param_reverse_arg_list - # Extract the right output indices from the inputs. - c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] - precompute_list += c_define_output_indices - reverse_arg_string = ", ".join(reverse_arg_list) - arg_string = ", ".join(arg_list) - precompute_string = "\n".join(precompute_list) - # Here's the code to do the looping for K - self._K_code =\ - """ - // _K_code - // Code for computing the covariance function. - int i; - int j; - int N = target_array->dimensions[0]; - int num_inducing = target_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;i 1: + reverse_arg_list+=list(param_arg_list) + # For multiple outputs, also need the split parameters. + split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] + split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] + arg_list += split_param_arg_list + reverse_arg_list += split_param_reverse_arg_list + # Extract the right output indices from the inputs. + c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] + precompute_list += c_define_output_indices + reverse_arg_string = ", ".join(reverse_arg_list) + arg_string = ", ".join(arg_list) + precompute_string = "\n".join(precompute_list) + + # Now we use the arguments in code that computes the separate parts. + + # Any precomputations will be done here eventually. + self._precompute = \ + """ + // Precompute code would go here. It will be called when parameters are updated. + """ + + # Here's the code to do the looping for K + self._K_code =\ + """ + // _K_code + // Code for computing the covariance function. + int i; + int j; + int N = target_array->dimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for - for (i=0;i1: - grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) - grad_func_string = '\n'.join(grad_func_list) - - self._dK_dtheta_code =\ - """ - // _dK_dtheta_code - // Code for computing gradient of covariance with respect to parameters. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for + for (i=0;i1: + grad_func_list += c_define_output_indices + grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) + grad_func_string = '\n'.join(grad_func_list) + + self._dK_dtheta_code =\ + """ + // _dK_dtheta_code + // Code for computing gradient of covariance with respect to parameters. + int i; + int j; + int N = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;i1: + gradX_func_list += c_define_output_indices + gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_string = "\n".join(gradX_func_list) + + self._dK_dX_code = \ + """ + // _dK_dX_code + // Code for computing gradient of covariance with respect to inputs. + int i; + int j; + int N = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;i1: - gradX_func_list += c_define_output_indices - gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] - gradX_func_string = "\n".join(gradX_func_list) - - self._dK_dX_code = \ - """ - // _dK_dX_code - // Code for computing gradient of covariance with respect to inputs. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (int i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (int i=0;i1: - for i, split_params in enumerate(self._split_theta_names): - start = self.num_shared_params + i*self.output_dim - end = self.num_shared_params + (i+1)*self.output_dim - setattr(self, split_params, param[start:end]) + # if self.output_dim>1: + # for i, split_params in enumerate(self._split_theta_names): + # start = self.num_shared_params + i*self.output_dim + # end = self.num_shared_params + (i+1)*self.output_dim + # setattr(self, split_params, param[start:end]) - def _get_params(self): - params = np.zeros(0) - for shared_params in self._sp_theta: - params = np.hstack((params, getattr(self, shared_params.name))) - if self.output_dim>1: - for split_params in self._split_theta_names: - params = np.hstack((params, getattr(self, split_params).flatten())) - return params + # def _get_params(self): + # params = np.zeros(0) + # for shared_params in self._sp_theta: + # params = np.hstack((params, getattr(self, shared_params.name))) + # if self.output_dim>1: + # for split_params in self._split_theta_names: + # params = np.hstack((params, getattr(self, split_params).flatten())) + # return params - def _get_param_names(self): - if self.output_dim>1: - return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] - else: - return [x.name for x in self._sp_theta] + # def _get_param_names(self): + # if self.output_dim>1: + # return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] + # else: + # return [x.name for x in self._sp_theta] From 0082acad6392f98fd1d5c6335a7adb19d7679aca Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 10:51:12 +0000 Subject: [PATCH 03/38] minor edits --- .../latent_function_inference/dtc.py | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index dbbff6d0..1a811de6 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -32,7 +32,7 @@ class DTC(object): #make sure the noise is not hetero beta = 1./np.squeeze(likelihood.variance) if beta.size <1: - raise NotImplementedError, "no hetero noise with this implementatino of DTC" + raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) Knn = kern.Kdiag(X) @@ -89,4 +89,85 @@ class DTC(object): return post, log_marginal, grad_dict +class vDTC(object): + def __init__(self): + self.const_jitter = 1e-6 + + def inference(self, kern, X, X_variance, Z, likelihood, Y): + assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." + + #TODO: MAX! fix this! + from ...util.misc import param_to_array + Y = param_to_array(Y) + + num_inducing, _ = Z.shape + num_data, output_dim = Y.shape + + #make sure the noise is not hetero + beta = 1./np.squeeze(likelihood.variance) + if beta.size <1: + raise NotImplementedError, "no hetero noise with this implementation of DTC" + + Kmm = kern.K(Z) + Knn = kern.Kdiag(X) + Knm = kern.K(X, Z) + U = Knm + Uy = np.dot(U.T,Y) + + #factor Kmm + Kmmi, L, Li, _ = pdinv(Kmm) + + # Compute A + LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) + A_ = tdot(LiUTbeta) + trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_)) + A = A_ + np.eye(num_inducing) + + # factor A + LA = jitchol(A) + + # back substutue to get b, P, v + tmp, _ = dtrtrs(L, Uy, lower=1) + b, _ = dtrtrs(LA, tmp*beta, lower=1) + tmp, _ = dtrtrs(LA, b, lower=1, trans=1) + v, _ = dtrtrs(L, tmp, lower=1, trans=1) + tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) + P = tdot(tmp.T) + + #compute log marginal + log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ + -np.sum(np.log(np.diag(LA)))*output_dim + \ + 0.5*num_data*output_dim*np.log(beta) + \ + -0.5*beta*np.sum(np.square(Y)) + \ + 0.5*np.sum(np.square(b)) + \ + trace_term + + # Compute dL_dKmm + vvT_P = tdot(v.reshape(-1,1)) + P + LAL = Li.T.dot(A).dot(Li) + dL_dK = Kmmi - 0.5*(vvT_P + LAL) + + # Compute dL_dU + vY = np.dot(v.reshape(-1,1),Y.T) + #dL_dU = vY - np.dot(vvT_P, U.T) + dL_dU = vY - np.dot(vvT_P - Kmmi, U.T) + dL_dU *= beta + + #compute dL_dR + Uv = np.dot(U, v) + dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 + dL_dR -=beta*trace_term/num_data + + grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T} + + #update gradients + kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + likelihood.update_gradients(dL_dR) + + #construct a posterior object + post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) + + + return post, log_marginal, grad_dict + From 89e216b6a67cf7c8dd0c2e274299239e94d90ebe Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 13:38:36 +0000 Subject: [PATCH 04/38] moved stuff. much breakage. Ow. --- GPy/kern/{parts => }/Brownian.py | 0 GPy/kern/{parts => }/Matern32.py | 0 GPy/kern/{parts => }/Matern52.py | 0 GPy/kern/{parts => }/ODE_1.py | 0 GPy/kern/__init__.py | 38 +++++++++++++++----- GPy/kern/__init__old.py | 9 +++++ GPy/kern/{parts => }/bias.py | 0 GPy/kern/{parts => }/coregionalize.py | 0 GPy/kern/{parts => }/eq_ode1.py | 0 GPy/kern/{parts => }/exponential.py | 0 GPy/kern/{parts => }/finite_dimensional.py | 0 GPy/kern/{parts => }/fixed.py | 0 GPy/kern/{parts => }/gibbs.py | 0 GPy/kern/{parts => }/hetero.py | 0 GPy/kern/{parts => }/hierarchical.py | 0 GPy/kern/{parts => }/independent_outputs.py | 0 GPy/kern/{parts => }/kernpart.py | 0 GPy/kern/{parts => }/linear.py | 0 GPy/kern/{parts => }/mlp.py | 0 GPy/kern/{parts => }/odekern1.c | 0 GPy/kern/parts/__init__.py | 29 --------------- GPy/kern/{parts => }/periodic_Matern32.py | 0 GPy/kern/{parts => }/periodic_Matern52.py | 0 GPy/kern/{parts => }/periodic_exponential.py | 0 GPy/kern/{parts => }/poly.py | 0 GPy/kern/{parts => }/prod.py | 0 GPy/kern/{parts => }/prod_orthogonal.py | 0 GPy/kern/{parts => }/rational_quadratic.py | 0 GPy/kern/{parts => }/rbf.py | 0 GPy/kern/{parts => }/rbf_inv.py | 0 GPy/kern/{parts => }/rbfcos.py | 0 GPy/kern/{parts => }/spline.py | 0 GPy/kern/{parts => }/ss_rbf.py | 0 GPy/kern/{parts => }/symmetric.py | 0 GPy/kern/{parts => }/sympy_helpers.cpp | 0 GPy/kern/{parts => }/sympy_helpers.h | 0 GPy/kern/{parts => }/sympykern.py | 0 GPy/kern/{parts => }/white.py | 0 38 files changed, 38 insertions(+), 38 deletions(-) rename GPy/kern/{parts => }/Brownian.py (100%) rename GPy/kern/{parts => }/Matern32.py (100%) rename GPy/kern/{parts => }/Matern52.py (100%) rename GPy/kern/{parts => }/ODE_1.py (100%) create mode 100644 GPy/kern/__init__old.py rename GPy/kern/{parts => }/bias.py (100%) rename GPy/kern/{parts => }/coregionalize.py (100%) rename GPy/kern/{parts => }/eq_ode1.py (100%) rename GPy/kern/{parts => }/exponential.py (100%) rename GPy/kern/{parts => }/finite_dimensional.py (100%) rename GPy/kern/{parts => }/fixed.py (100%) rename GPy/kern/{parts => }/gibbs.py (100%) rename GPy/kern/{parts => }/hetero.py (100%) rename GPy/kern/{parts => }/hierarchical.py (100%) rename GPy/kern/{parts => }/independent_outputs.py (100%) rename GPy/kern/{parts => }/kernpart.py (100%) rename GPy/kern/{parts => }/linear.py (100%) rename GPy/kern/{parts => }/mlp.py (100%) rename GPy/kern/{parts => }/odekern1.c (100%) delete mode 100644 GPy/kern/parts/__init__.py rename GPy/kern/{parts => }/periodic_Matern32.py (100%) rename GPy/kern/{parts => }/periodic_Matern52.py (100%) rename GPy/kern/{parts => }/periodic_exponential.py (100%) rename GPy/kern/{parts => }/poly.py (100%) rename GPy/kern/{parts => }/prod.py (100%) rename GPy/kern/{parts => }/prod_orthogonal.py (100%) rename GPy/kern/{parts => }/rational_quadratic.py (100%) rename GPy/kern/{parts => }/rbf.py (100%) rename GPy/kern/{parts => }/rbf_inv.py (100%) rename GPy/kern/{parts => }/rbfcos.py (100%) rename GPy/kern/{parts => }/spline.py (100%) rename GPy/kern/{parts => }/ss_rbf.py (100%) rename GPy/kern/{parts => }/symmetric.py (100%) rename GPy/kern/{parts => }/sympy_helpers.cpp (100%) rename GPy/kern/{parts => }/sympy_helpers.h (100%) rename GPy/kern/{parts => }/sympykern.py (100%) rename GPy/kern/{parts => }/white.py (100%) diff --git a/GPy/kern/parts/Brownian.py b/GPy/kern/Brownian.py similarity index 100% rename from GPy/kern/parts/Brownian.py rename to GPy/kern/Brownian.py diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/Matern32.py similarity index 100% rename from GPy/kern/parts/Matern32.py rename to GPy/kern/Matern32.py diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/Matern52.py similarity index 100% rename from GPy/kern/parts/Matern52.py rename to GPy/kern/Matern52.py diff --git a/GPy/kern/parts/ODE_1.py b/GPy/kern/ODE_1.py similarity index 100% rename from GPy/kern/parts/ODE_1.py rename to GPy/kern/ODE_1.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index eb4076c3..0a758f1e 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,9 +1,29 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * +import bias +import Brownian +import coregionalize +import exponential +import eq_ode1 +import finite_dimensional +import fixed +import gibbs +import hetero +import hierarchical +import independent_outputs +import linear +import Matern32 +import Matern52 +import mlp +import ODE_1 +import periodic_exponential +import periodic_Matern32 +import periodic_Matern52 +import poly +import prod_orthogonal +import prod +import rational_quadratic +import rbfcos +import rbf +import rbf_inv +import spline +import symmetric +import white diff --git a/GPy/kern/__init__old.py b/GPy/kern/__init__old.py new file mode 100644 index 00000000..eb4076c3 --- /dev/null +++ b/GPy/kern/__init__old.py @@ -0,0 +1,9 @@ +# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from constructors import * +try: + from constructors import rbf_sympy, sympykern # these depend on sympy +except: + pass +from kern import * diff --git a/GPy/kern/parts/bias.py b/GPy/kern/bias.py similarity index 100% rename from GPy/kern/parts/bias.py rename to GPy/kern/bias.py diff --git a/GPy/kern/parts/coregionalize.py b/GPy/kern/coregionalize.py similarity index 100% rename from GPy/kern/parts/coregionalize.py rename to GPy/kern/coregionalize.py diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/eq_ode1.py similarity index 100% rename from GPy/kern/parts/eq_ode1.py rename to GPy/kern/eq_ode1.py diff --git a/GPy/kern/parts/exponential.py b/GPy/kern/exponential.py similarity index 100% rename from GPy/kern/parts/exponential.py rename to GPy/kern/exponential.py diff --git a/GPy/kern/parts/finite_dimensional.py b/GPy/kern/finite_dimensional.py similarity index 100% rename from GPy/kern/parts/finite_dimensional.py rename to GPy/kern/finite_dimensional.py diff --git a/GPy/kern/parts/fixed.py b/GPy/kern/fixed.py similarity index 100% rename from GPy/kern/parts/fixed.py rename to GPy/kern/fixed.py diff --git a/GPy/kern/parts/gibbs.py b/GPy/kern/gibbs.py similarity index 100% rename from GPy/kern/parts/gibbs.py rename to GPy/kern/gibbs.py diff --git a/GPy/kern/parts/hetero.py b/GPy/kern/hetero.py similarity index 100% rename from GPy/kern/parts/hetero.py rename to GPy/kern/hetero.py diff --git a/GPy/kern/parts/hierarchical.py b/GPy/kern/hierarchical.py similarity index 100% rename from GPy/kern/parts/hierarchical.py rename to GPy/kern/hierarchical.py diff --git a/GPy/kern/parts/independent_outputs.py b/GPy/kern/independent_outputs.py similarity index 100% rename from GPy/kern/parts/independent_outputs.py rename to GPy/kern/independent_outputs.py diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/kernpart.py similarity index 100% rename from GPy/kern/parts/kernpart.py rename to GPy/kern/kernpart.py diff --git a/GPy/kern/parts/linear.py b/GPy/kern/linear.py similarity index 100% rename from GPy/kern/parts/linear.py rename to GPy/kern/linear.py diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/mlp.py similarity index 100% rename from GPy/kern/parts/mlp.py rename to GPy/kern/mlp.py diff --git a/GPy/kern/parts/odekern1.c b/GPy/kern/odekern1.c similarity index 100% rename from GPy/kern/parts/odekern1.c rename to GPy/kern/odekern1.c diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py deleted file mode 100644 index 0a758f1e..00000000 --- a/GPy/kern/parts/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white diff --git a/GPy/kern/parts/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py similarity index 100% rename from GPy/kern/parts/periodic_Matern32.py rename to GPy/kern/periodic_Matern32.py diff --git a/GPy/kern/parts/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py similarity index 100% rename from GPy/kern/parts/periodic_Matern52.py rename to GPy/kern/periodic_Matern52.py diff --git a/GPy/kern/parts/periodic_exponential.py b/GPy/kern/periodic_exponential.py similarity index 100% rename from GPy/kern/parts/periodic_exponential.py rename to GPy/kern/periodic_exponential.py diff --git a/GPy/kern/parts/poly.py b/GPy/kern/poly.py similarity index 100% rename from GPy/kern/parts/poly.py rename to GPy/kern/poly.py diff --git a/GPy/kern/parts/prod.py b/GPy/kern/prod.py similarity index 100% rename from GPy/kern/parts/prod.py rename to GPy/kern/prod.py diff --git a/GPy/kern/parts/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py similarity index 100% rename from GPy/kern/parts/prod_orthogonal.py rename to GPy/kern/prod_orthogonal.py diff --git a/GPy/kern/parts/rational_quadratic.py b/GPy/kern/rational_quadratic.py similarity index 100% rename from GPy/kern/parts/rational_quadratic.py rename to GPy/kern/rational_quadratic.py diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/rbf.py similarity index 100% rename from GPy/kern/parts/rbf.py rename to GPy/kern/rbf.py diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/rbf_inv.py similarity index 100% rename from GPy/kern/parts/rbf_inv.py rename to GPy/kern/rbf_inv.py diff --git a/GPy/kern/parts/rbfcos.py b/GPy/kern/rbfcos.py similarity index 100% rename from GPy/kern/parts/rbfcos.py rename to GPy/kern/rbfcos.py diff --git a/GPy/kern/parts/spline.py b/GPy/kern/spline.py similarity index 100% rename from GPy/kern/parts/spline.py rename to GPy/kern/spline.py diff --git a/GPy/kern/parts/ss_rbf.py b/GPy/kern/ss_rbf.py similarity index 100% rename from GPy/kern/parts/ss_rbf.py rename to GPy/kern/ss_rbf.py diff --git a/GPy/kern/parts/symmetric.py b/GPy/kern/symmetric.py similarity index 100% rename from GPy/kern/parts/symmetric.py rename to GPy/kern/symmetric.py diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/sympy_helpers.cpp similarity index 100% rename from GPy/kern/parts/sympy_helpers.cpp rename to GPy/kern/sympy_helpers.cpp diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/sympy_helpers.h similarity index 100% rename from GPy/kern/parts/sympy_helpers.h rename to GPy/kern/sympy_helpers.h diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/sympykern.py similarity index 100% rename from GPy/kern/parts/sympykern.py rename to GPy/kern/sympykern.py diff --git a/GPy/kern/parts/white.py b/GPy/kern/white.py similarity index 100% rename from GPy/kern/parts/white.py rename to GPy/kern/white.py From 20f02a80b420696c131222e0fbf44046dcd2c3ab Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 15:00:48 +0000 Subject: [PATCH 05/38] rbf and white seem to work --- GPy/core/gp.py | 2 +- GPy/kern/__init__.py | 61 +- GPy/kern/__init__old.py | 9 - GPy/kern/{ => _src}/Brownian.py | 0 GPy/kern/{ => _src}/Matern32.py | 0 GPy/kern/{ => _src}/Matern52.py | 0 GPy/kern/{ => _src}/ODE_1.py | 0 GPy/kern/_src/add.py | 264 ++++++++ GPy/kern/{ => _src}/bias.py | 0 GPy/kern/{ => _src}/constructors.py | 0 GPy/kern/{ => _src}/coregionalize.py | 0 GPy/kern/{ => _src}/eq_ode1.py | 0 GPy/kern/{ => _src}/exponential.py | 0 GPy/kern/{ => _src}/finite_dimensional.py | 0 GPy/kern/{ => _src}/fixed.py | 0 GPy/kern/{ => _src}/gibbs.py | 0 GPy/kern/{ => _src}/hetero.py | 0 GPy/kern/{ => _src}/hierarchical.py | 0 GPy/kern/{ => _src}/independent_outputs.py | 0 GPy/kern/_src/kern.py | 336 ++++++++++ GPy/kern/_src/kernpart.py | 60 ++ GPy/kern/{ => _src}/linear.py | 12 +- GPy/kern/{ => _src}/mlp.py | 0 GPy/kern/{ => _src}/odekern1.c | 0 GPy/kern/{ => _src}/periodic_Matern32.py | 0 GPy/kern/{ => _src}/periodic_Matern52.py | 0 GPy/kern/{ => _src}/periodic_exponential.py | 0 GPy/kern/{ => _src}/poly.py | 0 GPy/kern/{ => _src}/prod.py | 6 +- GPy/kern/{ => _src}/prod_orthogonal.py | 0 GPy/kern/{ => _src}/rational_quadratic.py | 0 GPy/kern/{ => _src}/rbf.py | 49 +- GPy/kern/{ => _src}/rbf_inv.py | 0 GPy/kern/{ => _src}/rbfcos.py | 0 GPy/kern/{ => _src}/spline.py | 0 GPy/kern/{ => _src}/ss_rbf.py | 0 GPy/kern/{ => _src}/symmetric.py | 0 GPy/kern/{ => _src}/sympy_helpers.cpp | 0 GPy/kern/{ => _src}/sympy_helpers.h | 0 GPy/kern/{ => _src}/sympykern.py | 0 GPy/kern/{ => _src}/white.py | 28 +- GPy/kern/kern.py | 680 -------------------- GPy/kern/kernpart.py | 176 ----- GPy/models/mrd.py | 6 +- GPy/plotting/matplot_dep/kernel_plots.py | 2 +- 45 files changed, 737 insertions(+), 954 deletions(-) delete mode 100644 GPy/kern/__init__old.py rename GPy/kern/{ => _src}/Brownian.py (100%) rename GPy/kern/{ => _src}/Matern32.py (100%) rename GPy/kern/{ => _src}/Matern52.py (100%) rename GPy/kern/{ => _src}/ODE_1.py (100%) create mode 100644 GPy/kern/_src/add.py rename GPy/kern/{ => _src}/bias.py (100%) rename GPy/kern/{ => _src}/constructors.py (100%) rename GPy/kern/{ => _src}/coregionalize.py (100%) rename GPy/kern/{ => _src}/eq_ode1.py (100%) rename GPy/kern/{ => _src}/exponential.py (100%) rename GPy/kern/{ => _src}/finite_dimensional.py (100%) rename GPy/kern/{ => _src}/fixed.py (100%) rename GPy/kern/{ => _src}/gibbs.py (100%) rename GPy/kern/{ => _src}/hetero.py (100%) rename GPy/kern/{ => _src}/hierarchical.py (100%) rename GPy/kern/{ => _src}/independent_outputs.py (100%) create mode 100644 GPy/kern/_src/kern.py create mode 100644 GPy/kern/_src/kernpart.py rename GPy/kern/{ => _src}/linear.py (98%) rename GPy/kern/{ => _src}/mlp.py (100%) rename GPy/kern/{ => _src}/odekern1.c (100%) rename GPy/kern/{ => _src}/periodic_Matern32.py (100%) rename GPy/kern/{ => _src}/periodic_Matern52.py (100%) rename GPy/kern/{ => _src}/periodic_exponential.py (100%) rename GPy/kern/{ => _src}/poly.py (100%) rename GPy/kern/{ => _src}/prod.py (98%) rename GPy/kern/{ => _src}/prod_orthogonal.py (100%) rename GPy/kern/{ => _src}/rational_quadratic.py (100%) rename GPy/kern/{ => _src}/rbf.py (92%) rename GPy/kern/{ => _src}/rbf_inv.py (100%) rename GPy/kern/{ => _src}/rbfcos.py (100%) rename GPy/kern/{ => _src}/spline.py (100%) rename GPy/kern/{ => _src}/ss_rbf.py (100%) rename GPy/kern/{ => _src}/symmetric.py (100%) rename GPy/kern/{ => _src}/sympy_helpers.cpp (100%) rename GPy/kern/{ => _src}/sympy_helpers.h (100%) rename GPy/kern/{ => _src}/sympykern.py (100%) rename GPy/kern/{ => _src}/white.py (77%) delete mode 100644 GPy/kern/kern.py delete mode 100644 GPy/kern/kernpart.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index d769678e..10ba8e6b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -43,7 +43,7 @@ class GP(Model): else: self.Y_metadata = None - assert isinstance(kernel, kern.kern) + assert isinstance(kernel, kern.Kern) self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 0a758f1e..2098bd76 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,29 +1,32 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white +from rbf import RBF +from white import White +from kern import Kern +#import bias +#import Brownian +#import coregionalize +#import exponential +#import eq_ode1 +#import finite_dimensional +#import fixed +#import gibbs +#import hetero +#import hierarchical +#import independent_outputs +#import linear +#import Matern32 +#import Matern52 +#import mlp +#import ODE_1 +#import periodic_exponential +#import periodic_Matern32 +#import periodic_Matern52 +#import poly +#import prod_orthogonal +#import prod +#import rational_quadratic +#import rbfcos +#import rbf +#import rbf_inv +#import spline +#import symmetric +#import white diff --git a/GPy/kern/__init__old.py b/GPy/kern/__init__old.py deleted file mode 100644 index eb4076c3..00000000 --- a/GPy/kern/__init__old.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * diff --git a/GPy/kern/Brownian.py b/GPy/kern/_src/Brownian.py similarity index 100% rename from GPy/kern/Brownian.py rename to GPy/kern/_src/Brownian.py diff --git a/GPy/kern/Matern32.py b/GPy/kern/_src/Matern32.py similarity index 100% rename from GPy/kern/Matern32.py rename to GPy/kern/_src/Matern32.py diff --git a/GPy/kern/Matern52.py b/GPy/kern/_src/Matern52.py similarity index 100% rename from GPy/kern/Matern52.py rename to GPy/kern/_src/Matern52.py diff --git a/GPy/kern/ODE_1.py b/GPy/kern/_src/ODE_1.py similarity index 100% rename from GPy/kern/ODE_1.py rename to GPy/kern/_src/ODE_1.py diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py new file mode 100644 index 00000000..8d916941 --- /dev/null +++ b/GPy/kern/_src/add.py @@ -0,0 +1,264 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from linear import Linear +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param +from kern import Kern + +class Add(Kern): + def __init__(self, subkerns, tensor): + assert all([isinstance(k, Kern) for k in subkerns]) + if tensor: + input_dim = sum([k.input_dim for k in subkerns]) + self.input_slices = [] + n = 0 + for k in subkerns: + self.input_slices.append(slice(n, n+k.input_dim)) + n += k.input_dim + else: + assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + input_dim = subkerns[0].input_dim + self.input_slices = [slice(None) for k in subkerns] + super(Add, self).__init__(input_dim, 'add') + self.add_parameters(*subkerns) + + + def K(self, X, X2=None, which_parts='all'): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handles this as X2 == X. + :param which_parts: a list of booleans detailing whether to include + each of the part functions. By default, 'all' + indicates all parts + """ + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0])) + [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + else: + target = np.zeros((X.shape[0], X2.shape[0])) + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return target + + def update_gradients_full(self, dL_dK, X): + [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] + + def _param_grad_helper(self, dL_dK, X, X2=None): + """ + Compute the gradient of the covariance function with respect to the parameters. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: Np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim) + + returns: dL_dtheta + """ + assert X.shape[1] == self.input_dim + target = np.zeros(self.size) + if X2 is None: + [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + else: + [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + + return self._transform_gradients(target) + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros_like(X) + if X2 is None: + [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + else: + [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def Kdiag(self, X, which_parts='all'): + """Compute the diagonal of the covariance function for inputs X.""" + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + target = np.zeros(X.shape[0]) + [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] + return target + + def dKdiag_dtheta(self, dL_dKdiag, X): + """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" + assert X.shape[1] == self.input_dim + assert dL_dKdiag.size == X.shape[0] + target = np.zeros(self.size) + [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + return self._transform_gradients(target) + + def dKdiag_dX(self, dL_dKdiag, X): + assert X.shape[1] == self.input_dim + target = np.zeros_like(X) + [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def psi0(self, Z, mu, S): + target = np.zeros(mu.shape[0]) + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): + target = np.zeros(self.size) + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): + target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi1(self, Z, mu, S): + target = np.zeros((mu.shape[0], Z.shape[0])) + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): + target = np.zeros((self.size)) + [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): + """return shapes are num_samples,num_inducing,input_dim""" + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi2(self, Z, mu, S): + """ + Computer the psi2 statistics for the covariance function. + + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) + :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) + :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) + + """ + target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) + [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: input_slices needed + crossterms = 0 + + for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): + if i_s1 == i_s2: + # TODO psi1 this must be faster/better/precached/more nice + tmp1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) + tmp2 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + + prod = np.multiply(tmp1, tmp2) + crossterms += prod[:, :, None] + prod[:, None, :] + + target += crossterms + return target + + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): + """Gradient of the psi2 statistics with respect to the parameters.""" + target = np.zeros(self.size) + [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + + # compute the "cross" terms + # TODO: better looping, input_slices + for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): + p1, p2 = self._parameters_[i1], self._parameters_[i2] +# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] + ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + + return self._transform_gradients(target) + + def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + # target *= 2 + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) + + return target * 2 + + def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + + return target_mu, target_S + + def plot(self, *args, **kwargs): + """ + See GPy.plotting.matplot_dep.plot + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + kernel_plots.plot(self,*args) + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return Parameterized._getstate(self) + [#self._parameters_, + self.input_dim, + self.input_slices, + self._param_slices_ + ] + + def _setstate(self, state): + self._param_slices_ = state.pop() + self.input_slices = state.pop() + self.input_dim = state.pop() + Parameterized._setstate(self, state) + + diff --git a/GPy/kern/bias.py b/GPy/kern/_src/bias.py similarity index 100% rename from GPy/kern/bias.py rename to GPy/kern/_src/bias.py diff --git a/GPy/kern/constructors.py b/GPy/kern/_src/constructors.py similarity index 100% rename from GPy/kern/constructors.py rename to GPy/kern/_src/constructors.py diff --git a/GPy/kern/coregionalize.py b/GPy/kern/_src/coregionalize.py similarity index 100% rename from GPy/kern/coregionalize.py rename to GPy/kern/_src/coregionalize.py diff --git a/GPy/kern/eq_ode1.py b/GPy/kern/_src/eq_ode1.py similarity index 100% rename from GPy/kern/eq_ode1.py rename to GPy/kern/_src/eq_ode1.py diff --git a/GPy/kern/exponential.py b/GPy/kern/_src/exponential.py similarity index 100% rename from GPy/kern/exponential.py rename to GPy/kern/_src/exponential.py diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/_src/finite_dimensional.py similarity index 100% rename from GPy/kern/finite_dimensional.py rename to GPy/kern/_src/finite_dimensional.py diff --git a/GPy/kern/fixed.py b/GPy/kern/_src/fixed.py similarity index 100% rename from GPy/kern/fixed.py rename to GPy/kern/_src/fixed.py diff --git a/GPy/kern/gibbs.py b/GPy/kern/_src/gibbs.py similarity index 100% rename from GPy/kern/gibbs.py rename to GPy/kern/_src/gibbs.py diff --git a/GPy/kern/hetero.py b/GPy/kern/_src/hetero.py similarity index 100% rename from GPy/kern/hetero.py rename to GPy/kern/_src/hetero.py diff --git a/GPy/kern/hierarchical.py b/GPy/kern/_src/hierarchical.py similarity index 100% rename from GPy/kern/hierarchical.py rename to GPy/kern/_src/hierarchical.py diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/_src/independent_outputs.py similarity index 100% rename from GPy/kern/independent_outputs.py rename to GPy/kern/_src/independent_outputs.py diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py new file mode 100644 index 00000000..af362498 --- /dev/null +++ b/GPy/kern/_src/kern.py @@ -0,0 +1,336 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param + + +class Kern(Parameterized): + def __init__(self,input_dim,name): + """ + The base class for a kernel: a positive definite function + which forms of a covariance function (kernel). + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + super(Kern, self).__init__(name) + self.input_dim = input_dim + + def K(self,X,X2,target): + raise NotImplementedError + def Kdiag(self,X,target): + raise NotImplementedError + def _param_grad_helper(self,dL_dK,X,X2,target): + raise NotImplementedError + def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? + # In the base case compute this by calling _param_grad_helper. Need to + # override for stationary covariances (for example) to save + # time. + for i in range(X.shape[0]): + self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) + def psi0(self,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi1(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi2(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def gradients_X(self, dL_dK, X, X2, target): + raise NotImplementedError + def dKdiag_dX(self, dL_dK, X, target): + raise NotImplementedError + def update_gradients_full(self, dL_dK, X): + """Set the gradients of all parameters when doing full (N) inference.""" + raise NotImplementedError + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + """Set the gradients of all parameters when doing sparse (M) inference.""" + raise NotImplementedError + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" + raise NotImplementedError + + def plot_ARD(self, *args): + """If an ARD kernel is present, plot a bar representation using matplotlib + + See GPy.plotting.matplot_dep.plot_ARD + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + return kernel_plots.plot_ARD(self,*args) + + + def __add__(self, other): + """ Overloading of the '+' operator. for more control, see self.add """ + return self.add(other) + + def add(self, other, tensor=False): + """ + Add another kernel to this one. + + If Tensor is False, both kernels are defined on the same _space_. then + the created kernel will have the same number of inputs as self and + other (which must be the same). + + If Tensor is True, then the dimensions are stacked 'horizontally', so + that the resulting kernel has self.input_dim + other.input_dim + + :param other: the other kernel to be added + :type other: GPy.kern + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from add import Add + return Add([self, other], tensor) + + def __call__(self, X, X2=None): + return self.K(X, X2) + + def __mul__(self, other): + """ Here we overload the '*' operator. See self.prod for more information""" + return self.prod(other) + + def __pow__(self, other, tensor=False): + """ + Shortcut for tensor `prod`. + """ + return self.prod(other, tensor=True) + + def prod(self, other, tensor=False): + """ + Multiply two kernels (either on the same space, or on the tensor product of the input space). + + :param other: the other kernel to be added + :type other: GPy.kern + :param tensor: whether or not to use the tensor space (default is false). + :type tensor: bool + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from prod import Prod + return Prod(self, other, tensor) + + +from GPy.core.model import Model + +class Kern_check_model(Model): + """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Model.__init__(self, 'kernel_test_model') + num_samples = 20 + num_samples2 = 10 + if kernel==None: + kernel = GPy.kern.rbf(1) + if X==None: + X = np.random.randn(num_samples, kernel.input_dim) + if dL_dK==None: + if X2==None: + dL_dK = np.ones((X.shape[0], X.shape[0])) + else: + dL_dK = np.ones((X.shape[0], X2.shape[0])) + + self.kernel=kernel + self.add_parameter(kernel) + self.X = X + self.X2 = X2 + self.dL_dK = dL_dK + + def is_positive_definite(self): + v = np.linalg.eig(self.kernel.K(self.X))[0] + if any(v<-10*sys.float_info.epsilon): + return False + else: + return True + + def log_likelihood(self): + return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() + + def _log_likelihood_gradients(self): + raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." + +class Kern_check_dK_dtheta(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + + def _log_likelihood_gradients(self): + return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) + + + + + +class Kern_check_dKdiag_dtheta(Kern_check_model): + """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" + def __init__(self, kernel=None, dL_dK=None, X=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + def parameters_changed(self): + self.kernel.update_gradients_full(self.dL_dK, self.X) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) + +class Kern_check_dK_dX(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.remove_parameter(kernel) + self.X = Param('X', self.X) + self.add_parameter(self.X) + def _log_likelihood_gradients(self): + return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() + +class Kern_check_dKdiag_dX(Kern_check_dK_dX): + """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() + +def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): + """ + This function runs on kernels to check the correctness of their + implementation. It checks that the covariance function is positive definite + for a randomly generated data set. + + :param kern: the kernel to be tested. + :type kern: GPy.kern.Kernpart + :param X: X input values to test the covariance function. + :type X: ndarray + :param X2: X2 input values to test the covariance function. + :type X2: ndarray + + """ + pass_checks = True + if X==None: + X = np.random.randn(10, kern.input_dim) + if output_ind is not None: + X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) + if X2==None: + X2 = np.random.randn(20, kern.input_dim) + if output_ind is not None: + X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) + + if verbose: + print("Checking covariance function is positive definite.") + result = Kern_check_model(kern, X=X).is_positive_definite() + if result and verbose: + print("Check passed.") + if not result: + print("Positive definite check failed for " + kern.name + " covariance function.") + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt theta.") + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt X.") + try: + result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + return pass_checks diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py new file mode 100644 index 00000000..097ed741 --- /dev/null +++ b/GPy/kern/_src/kernpart.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +#from ...core.parameterized.Parameterized import set_as_parameter +from ...core.parameterization import Parameterized + +class Kernpart_stationary(Kernpart): + def __init__(self, input_dim, lengthscale=None, ARD=False): + self.input_dim = input_dim + self.ARD = ARD + if not ARD: + self.num_params = 2 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" + else: + self.lengthscale = np.ones(1) + else: + self.num_params = self.input_dim + 1 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" + else: + self.lengthscale = np.ones(self.input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + + def _set_params(self, x): + self.lengthscale = x + self.lengthscale2 = np.square(self.lengthscale) + # reset cached results + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + + + def dKdiag_dtheta(self, dL_dKdiag, X, target): + # For stationary covariances, derivative of diagonal elements + # wrt lengthscale is 0. + target[0] += np.sum(dL_dKdiag) + + def dKdiag_dX(self, dL_dK, X, target): + pass # true for all stationary kernels + + +class Kernpart_inner(Kernpart): + def __init__(self,input_dim): + """ + The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + Kernpart.__init__(self, input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/linear.py b/GPy/kern/_src/linear.py similarity index 98% rename from GPy/kern/linear.py rename to GPy/kern/_src/linear.py index 828ece11..ab77d4e6 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/_src/linear.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class Linear(Kernpart): +class Linear(Kern): """ Linear kernel diff --git a/GPy/kern/mlp.py b/GPy/kern/_src/mlp.py similarity index 100% rename from GPy/kern/mlp.py rename to GPy/kern/_src/mlp.py diff --git a/GPy/kern/odekern1.c b/GPy/kern/_src/odekern1.c similarity index 100% rename from GPy/kern/odekern1.c rename to GPy/kern/_src/odekern1.c diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/_src/periodic_Matern32.py similarity index 100% rename from GPy/kern/periodic_Matern32.py rename to GPy/kern/_src/periodic_Matern32.py diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/_src/periodic_Matern52.py similarity index 100% rename from GPy/kern/periodic_Matern52.py rename to GPy/kern/_src/periodic_Matern52.py diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/_src/periodic_exponential.py similarity index 100% rename from GPy/kern/periodic_exponential.py rename to GPy/kern/_src/periodic_exponential.py diff --git a/GPy/kern/poly.py b/GPy/kern/_src/poly.py similarity index 100% rename from GPy/kern/poly.py rename to GPy/kern/_src/poly.py diff --git a/GPy/kern/prod.py b/GPy/kern/_src/prod.py similarity index 98% rename from GPy/kern/prod.py rename to GPy/kern/_src/prod.py index 364c91b3..08221de7 100644 --- a/GPy/kern/prod.py +++ b/GPy/kern/_src/prod.py @@ -1,17 +1,17 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern from coregionalize import Coregionalize import numpy as np import hashlib -class Prod(Kernpart): +class Prod(Kern): """ Computes the product of 2 kernels :param k1, k2: the kernels to multiply - :type k1, k2: Kernpart + :type k1, k2: Kern :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :type tensor: Boolean :rtype: kernel object diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/_src/prod_orthogonal.py similarity index 100% rename from GPy/kern/prod_orthogonal.py rename to GPy/kern/_src/prod_orthogonal.py diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/_src/rational_quadratic.py similarity index 100% rename from GPy/kern/rational_quadratic.py rename to GPy/kern/_src/rational_quadratic.py diff --git a/GPy/kern/rbf.py b/GPy/kern/_src/rbf.py similarity index 92% rename from GPy/kern/rbf.py rename to GPy/kern/_src/rbf.py index 027aa382..36e454e3 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class RBF(Kernpart): +class RBF(Kern): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -52,30 +52,16 @@ class RBF(Kernpart): lengthscale = np.ones(self.input_dim) self.variance = Param('variance', variance, Logexp()) - + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.lengthscale.add_observer(self, self.update_lengthscale) self.update_lengthscale(self.lengthscale) - + self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() # initializes cache - #self.update_inv_lengthscale(self.lengthscale) - #self.parameters_changed() - # initialize cache - #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) - - # a set of optional args to pass to weave - # self.weave_options = {'headers' : [''], - # 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - # 'extra_link_args' : ['-lgomp']} self.weave_options = {} - def on_input_change(self, X): - #self._K_computations(X, None) - pass - def update_lengthscale(self, l): self.lengthscale2 = np.square(self.lengthscale) @@ -84,13 +70,16 @@ class RBF(Kernpart): self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def K(self, X, X2, target): + def K(self, X, X2=None): self._K_computations(X, X2) - target += self.variance * self._K_dvar + return self.variance * self._K_dvar - def Kdiag(self, X, target): - np.add(target, self.variance, target) + def Kdiag(self, X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret + #TODO: remove TARGET! def psi0(self, Z, mu, S, target): target += self.variance @@ -165,7 +154,7 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: @@ -173,10 +162,10 @@ class RBF(Kernpart): else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) + return np.sum(gradients_X * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self, dL_dKdiag, X, target): - pass + def dKdiag_dX(self, dL_dKdiag, X): + return np.zeros(X.shape[0]) #---------------------------------------# # PSI statistics # diff --git a/GPy/kern/rbf_inv.py b/GPy/kern/_src/rbf_inv.py similarity index 100% rename from GPy/kern/rbf_inv.py rename to GPy/kern/_src/rbf_inv.py diff --git a/GPy/kern/rbfcos.py b/GPy/kern/_src/rbfcos.py similarity index 100% rename from GPy/kern/rbfcos.py rename to GPy/kern/_src/rbfcos.py diff --git a/GPy/kern/spline.py b/GPy/kern/_src/spline.py similarity index 100% rename from GPy/kern/spline.py rename to GPy/kern/_src/spline.py diff --git a/GPy/kern/ss_rbf.py b/GPy/kern/_src/ss_rbf.py similarity index 100% rename from GPy/kern/ss_rbf.py rename to GPy/kern/_src/ss_rbf.py diff --git a/GPy/kern/symmetric.py b/GPy/kern/_src/symmetric.py similarity index 100% rename from GPy/kern/symmetric.py rename to GPy/kern/_src/symmetric.py diff --git a/GPy/kern/sympy_helpers.cpp b/GPy/kern/_src/sympy_helpers.cpp similarity index 100% rename from GPy/kern/sympy_helpers.cpp rename to GPy/kern/_src/sympy_helpers.cpp diff --git a/GPy/kern/sympy_helpers.h b/GPy/kern/_src/sympy_helpers.h similarity index 100% rename from GPy/kern/sympy_helpers.h rename to GPy/kern/_src/sympy_helpers.h diff --git a/GPy/kern/sympykern.py b/GPy/kern/_src/sympykern.py similarity index 100% rename from GPy/kern/sympykern.py rename to GPy/kern/_src/sympykern.py diff --git a/GPy/kern/white.py b/GPy/kern/_src/white.py similarity index 77% rename from GPy/kern/white.py rename to GPy/kern/_src/white.py index c7e4c6dd..7750267f 100644 --- a/GPy/kern/white.py +++ b/GPy/kern/_src/white.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class White(Kernpart): +class White(Kern): """ White noise kernel. @@ -22,12 +22,14 @@ class White(Kernpart): self.add_parameters(self.variance) self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2,target): + def K(self,X,X2): if X2 is None: - target += np.eye(X.shape[0])*self.variance + return np.eye(X.shape[0])*self.variance - def Kdiag(self,X,target): - target += self.variance + def Kdiag(self,X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret def update_gradients_full(self, dL_dK, X): self.variance.gradient = np.trace(dL_dK) @@ -38,14 +40,8 @@ class White(Kernpart): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - pass - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass + def gradients_X(self,dL_dK,X,X2): + return np.zeros_like(X) def psi0(self,Z,mu,S,target): pass # target += self.variance diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py deleted file mode 100644 index 53728d0d..00000000 --- a/GPy/kern/kern.py +++ /dev/null @@ -1,680 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import sys -import numpy as np -import itertools -from parts.prod import Prod as prod -from parts.linear import Linear -from parts.kernpart import Kernpart -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param - -class kern(Parameterized): - def __init__(self, input_dim, parts=[], input_slices=None): - """ - This is the main kernel class for GPy. It handles multiple - (additive) kernel functions, and keeps track of various things - like which parameters live where. - - The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts - are used. - - :param input_dim: The dimensionality of the kernel's input space - :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects - :param input_slices: the slices on the inputs which apply to each kernel - :type input_slices: list of slice objects, or list of bools - - """ - super(kern, self).__init__('kern') - self.add_parameters(*parts) - self.input_dim = input_dim - - if input_slices is None: - self.input_slices = [slice(None) for p in self._parameters_] - else: - assert len(input_slices) == len(self._parameters_) - self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - - for p in self._parameters_: - assert isinstance(p, Kernpart), "bad kernel part" - - def parameters_changed(self): - [p.parameters_changed() for p in self._parameters_] - - def connect_input(self, Xparam): - [p.connect_input(Xparam) for p in self._parameters_] - - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return Parameterized._getstate(self) + [#self._parameters_, - #self.num_params, - self.input_dim, - self.input_slices, - self._param_slices_ - ] - - def _setstate(self, state): - self._param_slices_ = state.pop() - self.input_slices = state.pop() - self.input_dim = state.pop() - #self.num_params = state.pop() - #self._parameters_ = state.pop() - Parameterized._setstate(self, state) - - - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - -# def _transform_gradients(self, g): -# """ -# Apply the transformations of the kernel so that the returned vector -# represents the gradient in the transformed space (i.e. that given by -# get_params_transformed()) -# -# :param g: the gradient vector for the current model, usually created by _param_grad_helper -# """ -# x = self._get_params() -# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) -# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__] -# # for constraint, index in self.constraints.iteritems(): -# # if constraint != __fixed__: -# # g[index] = g[index] * constraint.gradfactor(x[index]) -# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]] -# # if len(self.tied_indices) or len(self.fixed_indices): -# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# # return np.delete(g, to_remove) -# # else: -# if self._fixes_ is not None: return g[self._fixes_] -# return g -# x = self._get_params() -# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] -# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# if len(self.tied_indices) or len(self.fixed_indices): -# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# return np.delete(g, to_remove) -# else: -# return g - - def __add__(self, other): - """ Overloading of the '+' operator. for more control, see self.add """ - return self.add(other) - - def add(self, other, tensor=False): - """ - Add another kernel to this one. - - If Tensor is False, both kernels are defined on the same _space_. then - the created kernel will have the same number of inputs as self and - other (which must be the same). - - If Tensor is True, then the dimensions are stacked 'horizontally', so - that the resulting kernel has self.input_dim + other.input_dim - - :param other: the other kernel to be added - :type other: GPy.kern - - """ - if tensor: - D = self.input_dim + other.input_dim - self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - - newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) - - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.constraints = self.constraints + other.constraints -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - else: - assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - - [newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()] - [newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()] - newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None - - return newkern - - def __call__(self, X, X2=None): - return self.K(X, X2) - - def __mul__(self, other): - """ Here we overload the '*' operator. See self.prod for more information""" - return self.prod(other) - - def __pow__(self, other, tensor=False): - """ - Shortcut for tensor `prod`. - """ - return self.prod(other, tensor=True) - - def prod(self, other, tensor=False): - """ - Multiply two kernels (either on the same space, or on the tensor product of the input space). - - :param other: the other kernel to be added - :type other: GPy.kern - :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool - - """ - K1 = self - K2 = other - #K1 = self.copy() - #K2 = other.copy() - - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim - s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] - - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] - - if tensor: - newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) - else: - newkern = kern(K1.input_dim, newkernparts, slices) - - #newkern._follow_constrains(K1, K2) - return newkern - -# def _follow_constrains(self, K1, K2): -# -# # Build the array that allows to go from the initial indices of the param to the new ones -# K1_param = [] -# n = 0 -# for k1 in K1.parts: -# K1_param += [range(n, n + k1.num_params)] -# n += k1.num_params -# n = 0 -# K2_param = [] -# for k2 in K2.parts: -# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] -# n += k2.num_params -# index_param = [] -# for p1 in K1_param: -# for p2 in K2_param: -# index_param += p1 + p2 -# index_param = np.array(index_param) -# -# # Get the ties and constrains of the kernels before the multiplication -# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices] -# -# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices] -# prev_constr = K1.constraints + K2.constraints -# -# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices] -# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values -# -# # follow the previous ties -# for arr in prev_ties: -# for j in arr: -# index_param[np.where(index_param == j)[0]] = arr[0] -# -# # ties and constrains -# for i in range(K1.num_params + K2.num_params): -# index = np.where(index_param == i)[0] -# if index.size > 1: -# self.tie_params(index) -# for i, t in zip(prev_constr_ind, prev_constr): -# self.constrain(np.where(index_param == i)[0], t) -# -# def _get_params(self): -# return np.hstack(self._parameters_) -# return np.hstack([p._get_params() for p in self._parameters_]) - -# def _set_params(self, x): -# import ipdb;ipdb.set_trace() -# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)] - -# def _get_param_names(self): -# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count -# part_names = np.array([k.name for k in self._parameters_], dtype=np.str) -# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] -# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] -# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] -# -# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) - - def K(self, X, X2=None, which_parts='all'): - """ - Compute the kernel function. - - :param X: the first set of inputs to the kernel - :param X2: (optional) the second set of arguments to the kernel. If X2 - is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts - """ - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target - - def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - - def gradients_X(self, dL_dK, X, X2=None): - """Compute the gradient of the objective function with respect to X. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim)""" - - target = np.zeros_like(X) - if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def Kdiag(self, X, which_parts='all'): - """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 - - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target - - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) - - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) - - return target_mu, target_S - - def plot(self, *args, **kwargs): - """ - See GPy.plotting.matplot_dep.plot - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - kernel_plots.plot(self,*args) - -from GPy.core.model import Model - -class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Model.__init__(self, 'kernel_test_model') - num_samples = 20 - num_samples2 = 10 - if kernel==None: - kernel = GPy.kern.rbf(1) - if X==None: - X = np.random.randn(num_samples, kernel.input_dim) - if dL_dK==None: - if X2==None: - dL_dK = np.ones((X.shape[0], X.shape[0])) - else: - dL_dK = np.ones((X.shape[0], X2.shape[0])) - - self.kernel=kernel - self.add_parameter(kernel) - self.X = X - self.X2 = X2 - self.dL_dK = dL_dK - - def is_positive_definite(self): - v = np.linalg.eig(self.kernel.K(self.X))[0] - if any(v<-10*sys.float_info.epsilon): - return False - else: - return True - - def log_likelihood(self): - return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() - - def _log_likelihood_gradients(self): - raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." - -class Kern_check_dK_dtheta(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) - -class Kern_check_dKdiag_dtheta(Kern_check_model): - """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" - def __init__(self, kernel=None, dL_dK=None, X=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - def parameters_changed(self): - self.kernel.update_gradients_full(self.dL_dK, self.X) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) - -class Kern_check_dK_dX(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - self.remove_parameter(kernel) - self.X = Param('X', self.X) - self.add_parameter(self.X) - def _log_likelihood_gradients(self): - return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() - -class Kern_check_dKdiag_dX(Kern_check_dK_dX): - """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() - -def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): - """ - This function runs on kernels to check the correctness of their - implementation. It checks that the covariance function is positive definite - for a randomly generated data set. - - :param kern: the kernel to be tested. - :type kern: GPy.kern.Kernpart - :param X: X input values to test the covariance function. - :type X: ndarray - :param X2: X2 input values to test the covariance function. - :type X2: ndarray - - """ - pass_checks = True - if X==None: - X = np.random.randn(10, kern.input_dim) - if output_ind is not None: - X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: - X2 = np.random.randn(20, kern.input_dim) - if output_ind is not None: - X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) - - if verbose: - print("Checking covariance function is positive definite.") - result = Kern_check_model(kern, X=X).is_positive_definite() - if result and verbose: - print("Check passed.") - if not result: - print("Positive definite check failed for " + kern.name + " covariance function.") - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt theta.") - result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt X.") - try: - result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - return pass_checks diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py deleted file mode 100644 index 06f1446b..00000000 --- a/GPy/kern/kernpart.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart(Parameterized): - def __init__(self,input_dim,name): - """ - The base class for a kernpart: a positive definite function - which forms part of a covariance function (kernel). - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - super(Kernpart, self).__init__(name) - # the input dimensionality for the covariance - self.input_dim = input_dim - # the number of optimisable parameters - # the name of the covariance function. - # link to parameterized objects - #self._X = None - - def connect_input(self, X): - X.add_observer(self, self.on_input_change) - #self._X = X - - def on_input_change(self, X): - """ - During optimization this function will be called when - the inputs X changed. Use this to update caches dependent - on the inputs X. - """ - # overwrite this to update kernel when inputs X change - pass - - -# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs): -# """ -# :param names: name of parameter to set as parameter -# :param gradient: gradient method to get the gradient of this parameter -# :param index: index of where to place parameter in printing -# :param args, kwargs: additional arguments to gradient -# -# Convenience method to connect Kernpart parameters: -# parameter with name (attribute of this Kernpart) will be set as parameter with following name: -# -# kernel_name + _ + parameter_name -# -# To add the kernels name to the parameter name use this method to -# add parameters. -# """ -# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs) -# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs): -# """ -# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter` -# -# Note: this method adds the kernels name in front of the parameter. -# """ -# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs) -# if index is None: -# self._parameters_.append(p) -# else: -# self._parameters_.insert(index, p) -# self.__dict__[name] = p - #set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable -# def _get_params(self): -# raise NotImplementedError -# def _set_params(self,x): -# raise NotImplementedError -# def _get_param_names(self): -# raise NotImplementedError - def K(self,X,X2,target): - raise NotImplementedError - def Kdiag(self,X,target): - raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): - raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) - def psi0(self,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi1(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dtheta(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi2(self,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def gradients_X(self, dL_dK, X, X2, target): - raise NotImplementedError - def dKdiag_dX(self, dL_dK, X, target): - raise NotImplementedError - def update_gradients_full(self, dL_dK, X): - """Set the gradients of all parameters when doing full (N) inference.""" - raise NotImplementedError - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - """Set the gradients of all parameters when doing sparse (M) inference.""" - raise NotImplementedError - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" - raise NotImplementedError - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..b4f987ea 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import kern +from GPy.kern.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): @@ -48,11 +48,11 @@ class MRD(Model): # sort out the kernels if kernels is None: kernels = [None] * len(likelihood_or_Y_list) - elif isinstance(kernels, kern): + elif isinstance(kernels, Kern): kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] else: assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" - assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" + assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!" assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 19c96bc0..80350475 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.parts.linear import Linear +from ...kern.linear import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): From 493506408ca09dc62e9871b8d3c06019a046fa75 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 15:01:35 +0000 Subject: [PATCH 06/38] =?UTF-8?q?init=20for=20src=20dir=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPy/kern/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 2098bd76..7760f48f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,6 @@ -from rbf import RBF -from white import White -from kern import Kern +from _src.rbf import RBF +from _src.white import White +from _src.kern import Kern #import bias #import Brownian #import coregionalize From c4f6b0dbe7e391256a5ae7f729ea649ce48efcf1 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 19 Feb 2014 15:32:16 +0000 Subject: [PATCH 07/38] copy and missing data --- GPy/core/__init__.py | 4 +- GPy/core/model.py | 6 +- GPy/core/parameterization/param.py | 13 ++- GPy/core/parameterization/parameter_core.py | 82 +++++++++++++++++-- GPy/core/parameterization/parameterized.py | 57 ++----------- GPy/core/sparse_gp.py | 17 ++-- GPy/examples/dimensionality_reduction.py | 11 ++- .../latent_function_inference/__init__.py | 16 +++- .../latent_function_inference/var_dtc.py | 54 +++++++----- GPy/models/mrd.py | 16 ++++ 10 files changed, 179 insertions(+), 97 deletions(-) diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index 839529d6..a42d76ed 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -2,7 +2,9 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from model import * -from parameterization.parameterized import * +from parameterization.parameterized import adjust_name_for_printing, Parameterizable +from parameterization.param import Param, ParamConcatenation + from gp import GP from sparse_gp import SparseGP from svigp import SVIGP diff --git a/GPy/core/model.py b/GPy/core/model.py index 55083aaf..c067d51d 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -4,12 +4,8 @@ from .. import likelihoods from ..inference import optimization -from ..util.linalg import jitchol from ..util.misc import opt_wrapper from parameterization import Parameterized -from parameterization.parameterized import UNFIXED -from parameterization.domains import _POSITIVE, _REAL -from parameterization.index_operations import ParameterIndexOperations import multiprocessing as mp import numpy as np from numpy.linalg.linalg import LinAlgError @@ -240,7 +236,7 @@ class Model(Parameterized): constrained positive. """ raise DeprecationWarning, 'parameters now have default constraints' - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] + #positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] # param_names = self._get_param_names() # for s in positive_strings: diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..49d6682c 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -3,7 +3,7 @@ import itertools import numpy -from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing +from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from array_core import ObservableArray, ParamList ###### printing @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable): +class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable): """ Parameter object for GPy models. @@ -114,7 +114,14 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self._parent_index_ = state.pop() self._direct_parent_ = state.pop() self.name = state.pop() - + + def copy(self, *args): + constr = self.constraints.copy() + priors = self.priors.copy() + p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) + p.constraints = constr + p.priors = priors + return p #=========================================================================== # get/set parameters #=========================================================================== diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..9002adc3 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -68,6 +68,10 @@ class Parentable(object): return self return self._direct_parent_._highest_parent_ + def _notify_parameters_changed(self): + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + class Nameable(Parentable): _name = None def __init__(self, name, direct_parent=None, parent_index=None): @@ -80,22 +84,47 @@ class Nameable(Parentable): @name.setter def name(self, name): from_name = self.name + assert isinstance(name, str) self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) - + self._direct_parent_._name_changed(self, from_name) class Parameterizable(Parentable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() + self._added_names_ = set() def parameter_names(self, add_name=False): if add_name: return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] + + def _add_parameter_name(self, param): + pname = adjust_name_for_printing(param.name) + # and makes sure to not delete programmatically added parameters + if pname in self.__dict__: + if not (param is self.__dict__[pname]): + if pname in self._added_names_: + del self.__dict__[pname] + self._add_parameter_name(param) + else: + self.__dict__[pname] = param + self._added_names_.add(pname) + + def _remove_parameter_name(self, param=None, pname=None): + assert param is None or pname is None, "can only delete either param by name, or the name of a param" + pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) + if pname in self._added_names_: + del self.__dict__[pname] + self._added_names_.remove(pname) + self._connect_parameters() + def _name_changed(self, param, old_name): + self._remove_parameter_name(None, old_name) + self._add_parameter_name(param) + def _collect_gradient(self, target): import itertools [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] @@ -113,6 +142,38 @@ class Parameterizable(Parentable): [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] self.parameters_changed() + def copy(self): + """Returns a (deep) copy of the current model""" + import copy + from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView + from .array_core import ParamList + dc = dict() + for k, v in self.__dict__.iteritems(): + if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): + if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + dc[k] = v.copy() + else: + dc[k] = copy.deepcopy(v) + if k == '_parameters_': + params = [p.copy() for p in v] + #dc = copy.deepcopy(self.__dict__) + dc['_direct_parent_'] = None + dc['_parent_index_'] = None + dc['_parameters_'] = ParamList() + s = self.__new__(self.__class__) + s.__dict__ = dc + #import ipdb;ipdb.set_trace() + for p in params: + s.add_parameter(p) + #dc._notify_parent_change() + return s + #return copy.deepcopy(self) + + def _notify_parameters_changed(self): + self.parameters_changed() + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + def parameters_changed(self): """ This method gets called when parameters have changed. @@ -122,11 +183,6 @@ class Parameterizable(Parentable): """ pass - def _notify_parameters_changed(self): - self.parameters_changed() - if self.has_parent(): - self._direct_parent_._notify_parameters_changed() - class Gradcheckable(Parentable): #=========================================================================== @@ -157,7 +213,7 @@ class Indexable(object): """ raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable, Parameterizable): +class Constrainable(Nameable, Indexable, Parentable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) self._default_constraint_ = default_constraint @@ -167,6 +223,16 @@ class Constrainable(Nameable, Indexable, Parameterizable): if self._default_constraint_ is not None: self.constrain(self._default_constraint_) + def _disconnect_parent(self, constr=None): + if constr is None: + constr = self.constraints.copy() + self.constraints.clear() + self.constraints = constr + self._direct_parent_ = None + self._parent_index_ = None + self._connect_fixes() + self._notify_parent_change() + #=========================================================================== # Fixing Parameters: #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index c8a841c0..cef1daa2 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -3,16 +3,15 @@ import numpy; np = numpy -import copy import cPickle import itertools from re import compile, _pattern_type -from param import ParamConcatenation, Param -from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable -from transformations import __fixed__, FIXED, UNFIXED +from param import ParamConcatenation +from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable +from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): +class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable): """ Parameterized class @@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): self._fixes_ = None self._param_slices_ = [] self._connect_parameters() - self._added_names_ = set() del self._in_init_ def add_parameter(self, param, index=None): @@ -117,17 +115,10 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) del self._parameters_[param._parent_index_] self.size -= param.size - constr = param.constraints.copy() - param.constraints.clear() - param.constraints = constr - param._direct_parent_ = None - param._parent_index_ = None - param._connect_fixes() - param._notify_parent_change() - pname = adjust_name_for_printing(param.name) - if pname in self._added_names_: - del self.__dict__[pname] - self._connect_parameters() + + param._disconnect_parent() + self._remove_parameter_name(param) + #self._notify_parent_change() self._connect_fixes() @@ -145,19 +136,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): for i, p in enumerate(self._parameters_): p._direct_parent_ = self p._parent_index_ = i - not_unique = [] sizes.append(p.size + sizes[-1]) self._param_slices_.append(slice(sizes[-2], sizes[-1])) - pname = adjust_name_for_printing(p.name) - # and makes sure to not delete programmatically added parameters - if pname in self.__dict__: - if isinstance(self.__dict__[pname], (Parameterized, Param)): - if not p is self.__dict__[pname]: - not_unique.append(pname) - del self.__dict__[pname] - elif not (pname in not_unique): - self.__dict__[pname] = p - self._added_names_.add(pname) + self._add_parameter_name(p) #=========================================================================== # Pickling operations @@ -174,19 +155,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): cPickle.dump(self, f, protocol) else: cPickle.dump(self, f, protocol) - def copy(self): - """Returns a (deep) copy of the current model """ - # dc = dict() - # for k, v in self.__dict__.iteritems(): - # if k not in ['_highest_parent_', '_direct_parent_']: - # dc[k] = copy.deepcopy(v) - # dc = copy.deepcopy(self.__dict__) - # dc['_highest_parent_'] = None - # dc['_direct_parent_'] = None - # s = self.__class__.new() - # s.__dict__ = dc - return copy.deepcopy(self) def __getstate__(self): if self._has_get_set_state(): return self._getstate() @@ -265,14 +234,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] return p - def _name_changed(self, param, old_name): - if hasattr(self, old_name) and old_name in self._added_names_: - delattr(self, old_name) - self._added_names_.remove(old_name) - pname = adjust_name_for_printing(param.name) - if pname not in self.__dict__: - self._added_names_.add(pname) - self.__dict__[pname] = param #=========================================================================== # Indexable Handling #=========================================================================== diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..1d436c53 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -33,12 +33,12 @@ class SparseGP(GP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'): - #pick a sensible inference method + # pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): inference_method = var_dtc.VarDTC() else: - #inference_method = ?? + # inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" @@ -54,7 +54,7 @@ class SparseGP(GP): self.parameters_changed() def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + # The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) @@ -77,13 +77,14 @@ class SparseGP(GP): mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx), Kx, [1,0]).T else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)[:, None] + #import ipdb;ipdb.set_trace() + var = Kxx - (np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T * Kx.T[:,:,None]).sum(1) else: # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" @@ -91,7 +92,7 @@ class SparseGP(GP): Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var[:,None] + return mu, var def _getstate(self): diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..2924386f 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -1,9 +1,9 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as _np -default_seed = _np.random.seed(123344) +#default_seed = _np.random.seed(123344) -def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False): +def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): """ model for testing purposes. Samples from a GP with rbf kernel and learns the samples with a new kernel. Normally not for optimization, just model cheking @@ -41,7 +41,7 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, if nan: m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() - m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan + m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan m.parameters_changed() #=========================================================================== @@ -186,6 +186,8 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, return m def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): + _np.random.seed(1234) + x = _np.linspace(0, 4 * _np.pi, N)[:, None] s1 = _np.vectorize(lambda x: _np.sin(x)) s2 = _np.vectorize(lambda x: _np.cos(x)) @@ -293,10 +295,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, Y = Ylist[0] k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - inan = _np.random.binomial(1, .3, size=Y.shape) + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan + m.q.variance *= .1 m.parameters_changed() if optimize: diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 337a8477..a633c381 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference is not tractable. We then resort to a Laplace approximation (laplace.py) or expectation propagation (ep.py). -The inference methods return a "Posterior" instance, which is a simple +The inference methods return a +:class:`~GPy.inference.latent_function_inference.posterior.Posterior` +instance, which is a simple structure which contains a summary of the posterior. The model classes can then use this posterior object for making predictions, optimizing hyper-parameters, etc. @@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO from GPy.inference.latent_function_inference.var_dtc import VarDTC from dtc import DTC from fitc import FITC + +# class FullLatentFunctionData(object): +# +# +# class LatentFunctionInference(object): +# def inference(self, kern, X, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" \ No newline at end of file diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 264f7fc3..2f11cb08 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -139,7 +139,8 @@ class VarDTCMissingData(object): dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) partial_for_likelihood = 0 - LB_all = Cpsi1Vf_all = 0 + woodbury_vector = np.zeros((num_inducing, Y.shape[1])) + woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1])) dL_dKmm = 0 log_marginal = 0 @@ -153,6 +154,8 @@ class VarDTCMissingData(object): VVT_factor_all = np.empty(Y.shape) full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] + if not full_VVT_factor: + psi1V = np.dot(Y.T*beta_all, psi1_all).T for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] @@ -185,8 +188,7 @@ class VarDTCMissingData(object): psi1Vf = psi1.T.dot(VVT_factor) _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf - LB_all += LB + #LB_all[ind, :,:] = LB # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) @@ -219,6 +221,21 @@ class VarDTCMissingData(object): psi0, psi1, beta, data_fit, num_data, output_dim, trYYT) + if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf + else: + print 'foobar' + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] + + #import ipdb;ipdb.set_trace() + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + from ...util import diag + diag.add(Bi, 1) + woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] + # gradients: likelihood.update_gradients(partial_for_likelihood) @@ -231,23 +248,22 @@ class VarDTCMissingData(object): #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? - if full_VVT_factor: - woodbury_vector = Cpsi1Vf_all # == Cpsi1V - else: - print 'foobar' - psi1V = np.dot(Y.T*beta_all, psi1_all).T - tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - tmp, _ = dpotrs(LB_all, tmp, lower=1) - woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - - Bi, _ = dpotri(LB_all, lower=1) - symmetrify(Bi) - Bi = -dpotri(LB_all, lower=1)[0] - from ...util import diag - diag.add(Bi, 1) + #if not full_VVT_factor: + # print 'foobar' + # psi1V = np.dot(Y.T*beta_all, psi1_all).T + # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + # tmp, _ = dpotrs(LB_all, tmp, lower=1) + # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + #import ipdb;ipdb.set_trace() + #Bi, _ = dpotri(LB_all, lower=1) + #symmetrify(Bi) + #Bi = -dpotri(LB_all, lower=1)[0] + #from ...util import diag + #diag.add(Bi, 1) - woodbury_inv = backsub_both_sides(Lm, Bi) - post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + #woodbury_inv = backsub_both_sides(Lm, Bi) + + post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..511ce5aa 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -10,6 +10,22 @@ import pylab from GPy.kern.kern import kern from GPy.models.bayesian_gplvm import BayesianGPLVM +class MRD2(Model): + """ + Apply MRD to all given datasets Y in Ylist. + + Y_i in [n x p_i] + + The samples n in the datasets need + to match up, whereas the dimensionality p_d can differ. + + :param [array-like] Ylist: List of datasets to apply MRD on + :param array-like q_mean: mean of starting latent space q in [n x q] + :param array-like q_variance: variance of starting latent space q in [n x q] + :param :class:`~GPy.inference.latent_function_inference + """ + + class MRD(Model): """ Do MRD on given Datasets in Ylist. From fd0dd8df85a3045bba76b8567ecdcd96d2b583c7 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 19 Feb 2014 15:50:13 +0000 Subject: [PATCH 08/38] updated naming to be consistent --- GPy/core/parameterization/param.py | 7 +------ GPy/core/parameterization/parameter_core.py | 4 ++++ GPy/core/parameterization/parameterized.py | 20 ++++++-------------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 49d6682c..e49dbe2e 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -238,11 +238,6 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable @property def _ties_str(self): return [t._short() for t in self._tied_to_] or [''] - @property - def name_hirarchical(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - return adjust_name_for_printing(self.name) def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( x=self.name_hirarchical) @@ -284,7 +279,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) def _short(self): # short string to print - name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + name = self.hirarchy_name() if self._realsize_ < 2: return name ind = self._indices() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 9002adc3..2b2283c2 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -88,6 +88,10 @@ class Nameable(Parentable): self._name = name if self.has_parent(): self._direct_parent_._name_changed(self, from_name) + def hirarchy_name(self): + if self.has_parent(): + return self._direct_parent_.hirarchy_name() + "." + adjust_name_for_printing(self.name) + return adjust_name_for_printing(self.name) class Parameterizable(Parentable): def __init__(self, *args, **kwargs): diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index cef1daa2..f510d330 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -7,11 +7,11 @@ import cPickle import itertools from re import compile, _pattern_type from param import ParamConcatenation -from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable +from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, Parentable, adjust_name_for_printing, Gradcheckable from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable): +class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable, Parentable): """ Parameterized class @@ -212,7 +212,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame # Optimization handles: #=========================================================================== def _get_param_names(self): - n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) + n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) return n def _get_param_names_transformed(self): n = self._get_param_names() @@ -296,10 +296,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame # you can retrieve the original param through this method, by passing # the copy here return self._parameters_[param._parent_index_] - def hirarchy_name(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "." - return '' #=========================================================================== # Get/set parameters: #=========================================================================== @@ -309,8 +305,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame """ if not isinstance(regexp, _pattern_type): regexp = compile(regexp) found_params = [] - for p in self._parameters_: - if regexp.match(p.name) is not None: + for p in self.flattened_parameters: + if regexp.match(p.hirarchy_name()) is not None: found_params.append(p) if isinstance(p, Parameterized): found_params.extend(p.grep_param_names(regexp)) @@ -352,11 +348,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame # Printing: #=========================================================================== def _short(self): - # short string to print - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - else: - return adjust_name_for_printing(self.name) + return self.hirarchy_name() @property def flattened_parameters(self): return [xi for x in self._parameters_ for xi in x.flattened_parameters] From 1c3fe0c51e9a6741f3ca25496cf24471aaf37686 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 19 Feb 2014 16:54:25 +0000 Subject: [PATCH 09/38] regexp now on all parameters --- GPy/core/parameterization/param.py | 12 +++++---- GPy/core/parameterization/parameter_core.py | 19 ++++++++------ GPy/core/parameterization/parameterized.py | 28 ++++++--------------- 3 files changed, 27 insertions(+), 32 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index e49dbe2e..75d9faf2 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -221,7 +221,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable def _description_str(self): if self.size <= 1: return ["%f" % self] else: return [str(self.shape)] - def parameter_names(self, add_name=False): + def parameter_names(self, add_self=False, adjust_for_printing=False): + if adjust_for_printing: + return [adjust_name_for_printing(self.name)] return [self.name] @property def flattened_parameters(self): @@ -240,7 +242,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable return [t._short() for t in self._tied_to_] or [''] def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( - x=self.name_hirarchical) + x=self.hirarchy_name()) return name + super(Param, self).__repr__(*args, **kwargs) def _ties_for(self, rav_index): # size = sum(p.size for p in self._tied_to_) @@ -274,7 +276,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable gen = map(lambda x: " ".join(map(str, x)), gen) return reduce(lambda a, b:max(a, len(b)), gen, len(header)) def _max_len_values(self): - return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical)) + return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name())) def _max_len_index(self, ind): return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) def _short(self): @@ -302,8 +304,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable if lp is None: lp = self._max_len_names(prirs, __tie_name__) sep = '-' header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" - if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing - else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing + if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing + else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing if not ties: ties = itertools.cycle(['']) return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices # except: return super(Param, self).__str__() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 2b2283c2..9a10f317 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -88,10 +88,12 @@ class Nameable(Parentable): self._name = name if self.has_parent(): self._direct_parent_._name_changed(self, from_name) - def hirarchy_name(self): + def hirarchy_name(self, adjust_for_printing=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x if self.has_parent(): - return self._direct_parent_.hirarchy_name() + "." + adjust_name_for_printing(self.name) - return adjust_name_for_printing(self.name) + return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) + return adjust(self.name) class Parameterizable(Parentable): def __init__(self, *args, **kwargs): @@ -100,10 +102,13 @@ class Parameterizable(Parentable): _parameters_ = ParamList() self._added_names_ = set() - def parameter_names(self, add_name=False): - if add_name: - return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] - return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names def _add_parameter_name(self, param): pname = adjust_name_for_printing(param.name) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index f510d330..12bf936c 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -305,13 +305,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame """ if not isinstance(regexp, _pattern_type): regexp = compile(regexp) found_params = [] - for p in self.flattened_parameters: - if regexp.match(p.hirarchy_name()) is not None: + for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters): + if regexp.match(n) is not None: found_params.append(p) - if isinstance(p, Parameterized): - found_params.extend(p.grep_param_names(regexp)) return found_params - return [param for param in self._parameters_ if regexp.match(param.name) is not None] + def __getitem__(self, name, paramlist=None): if paramlist is None: paramlist = self.grep_param_names(name) @@ -323,26 +321,16 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame return ParamConcatenation(paramlist) return paramlist[-1] return ParamConcatenation(paramlist) + def __setitem__(self, name, value, paramlist=None): try: param = self.__getitem__(name, paramlist) except AttributeError as a: raise a param[:] = value -# def __getattr__(self, name): -# return self.__getitem__(name) -# def __getattribute__(self, name): -# #try: -# return object.__getattribute__(self, name) - # except AttributeError: - # _, a, tb = sys.exc_info() - # try: - # return self.__getitem__(name) - # except AttributeError: - # raise AttributeError, a.message, tb def __setattr__(self, name, val): - # override the default behaviour, if setting a param, so broadcasting can by used - if hasattr(self, "_parameters_"): - paramlist = self.grep_param_names(name) - if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return + # override the default behaviour, if setting a param, so broadcasting can by used + if hasattr(self, '_parameters_'): + pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) + if name in pnames: self._parameters_[pnames.index(name)][:] = val; return object.__setattr__(self, name, val); #=========================================================================== # Printing: From 92d71384b77aca1a5a2190ce2062624670fea9a8 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 17:37:18 +0000 Subject: [PATCH 10/38] deleted kernpart, prod and add seem to work okay. --- GPy/core/gp.py | 26 ++++----- GPy/examples/regression.py | 38 ++++++------ GPy/kern/__init__.py | 1 + GPy/kern/_src/add.py | 70 ++++------------------ GPy/kern/_src/coregionalize.py | 12 ++-- GPy/kern/_src/kern.py | 4 +- GPy/kern/_src/kernpart.py | 60 ------------------- GPy/kern/_src/linear.py | 8 +-- GPy/kern/_src/prod.py | 74 +++++++----------------- GPy/kern/_src/rbf.py | 8 +-- GPy/kern/_src/white.py | 6 +- GPy/models/gp_regression.py | 2 +- GPy/models/mrd.py | 2 +- GPy/plotting/matplot_dep/kernel_plots.py | 2 +- GPy/plotting/matplot_dep/models_plots.py | 16 +++-- GPy/util/datasets.py | 4 +- 16 files changed, 95 insertions(+), 238 deletions(-) delete mode 100644 GPy/kern/_src/kernpart.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 10ba8e6b..2dcf0e14 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -70,7 +70,7 @@ class GP(Model): def log_likelihood(self): return self._log_marginal_likelihood - def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): + def _raw_predict(self, _Xnew, full_cov=False): """ Internal helper function for making predictions, does not account for normalization or likelihood @@ -80,29 +80,27 @@ class GP(Model): diagonal of the covariance is returned. """ - Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T + Kx = self.kern.K(_Xnew, self.X).T #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) WiKx = np.dot(self.posterior.woodbury_inv, Kx) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(_Xnew, which_parts=which_parts) + Kxx = self.kern.K(_Xnew) #var = Kxx - tdot(LiKx.T) var = np.dot(Kx.T, WiKx) else: - Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(_Xnew) #var = Kxx - np.sum(LiKx*LiKx, 0) var = Kxx - np.sum(WiKx*Kx, 0) var = var.reshape(-1, 1) return mu, var - def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): + def predict(self, Xnew, full_cov=False, **likelihood_args): """ Predict the function(s) at the new point(s) Xnew. :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool @@ -118,13 +116,13 @@ class GP(Model): """ #predict the latent function values - mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + mu, var = self._raw_predict(Xnew, full_cov=full_cov) # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) return mean, var, _025pm, _975pm - def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. @@ -132,13 +130,11 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov) + m, v = self._raw_predict(X, full_cov=full_cov) v = v.reshape(m.size,-1) if len(v.shape)==3 else v if not full_cov: Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T @@ -147,7 +143,7 @@ class GP(Model): return Ysim - def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None): + def posterior_samples(self,X,size=10, full_cov=True,noise_model=None): """ Samples the posterior GP at the points X. @@ -155,15 +151,13 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :param noise_model: for mixed noise likelihood, the noise model to use in the samples. :type noise_model: integer. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov) + Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) if isinstance(self.likelihood, Gaussian): noise_std = np.sqrt(self.likelihood._get_params()) Ysim += np.random.normal(0,noise_std,Ysim.shape) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 55567051..5cac1857 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True): Y = np.vstack((Y1, Y2)) #build the kernel - k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k1 = GPy.kern.RBF(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalize(2,1) k = k1**k2 m = GPy.models.GPRegression(X, Y, kernel=k) @@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True): # Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 # Y = np.vstack((Y1, Y2)) # -# k1 = GPy.kern.rbf(1) +# k1 = GPy.kern.RBF(1) # m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) # m.constrain_fixed('.*rbf_var', 1.) # m.optimize(max_iters=100) @@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True): Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], np.random.randint(0, 4, num_inducing)[:, None])) - k1 = GPy.kern.rbf(1) + k1 = GPy.kern.RBF(1) k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k = k1**k2 @@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 data['Y'] = data['Y'] - np.mean(data['Y']) - lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) + lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF) if plot: pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() @@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) - kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) + # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m['noise_variance'] = np.random.uniform(1e-3, 1) @@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 ax.set_ylim(ylim) return m # (models, lls) -def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF): """ Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. @@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): optimizer='scg' x_len = 30 X = np.linspace(0, 10, x_len)[:, None] - f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X)) Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] - kern = GPy.kern.rbf(1) + kern = GPy.kern.RBF(1) poisson_lik = GPy.likelihoods.Poisson() laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() @@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) - kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) + kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1]) m = GPy.models.GPRegression(X, Y, kernel) # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) @@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) #kernel += GPy.kern.bias(X.shape[1]) X_variance = np.ones(X.shape) * 0.5 m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) @@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti X = np.random.uniform(-3., 3., (num_samples, 1)) Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 # construct kernel - rbf = GPy.kern.rbf(1) + rbf = GPy.kern.RBF(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m.checkgrad(verbose=1) @@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt Y[inan] = np.nan # construct kernel - rbf = GPy.kern.rbf(2) + rbf = GPy.kern.RBF(2) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) @@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): # likelihood = GPy.likelihoods.Gaussian(Y) Z = np.random.uniform(-3., 3., (7, 1)) - k = GPy.kern.rbf(1) + k = GPy.kern.RBF(1) # create simple GP Model - no input uncertainty on this one - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) @@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): print m # the same Model with uncertainty - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) if plot: diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 7760f48f..214e230f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern +Linear = 'foo' #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d916941..8d81674b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -5,8 +5,8 @@ import sys import numpy as np import itertools from linear import Linear -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param from kern import Kern class Add(Kern): @@ -27,7 +27,7 @@ class Add(Kern): self.add_parameters(*subkerns) - def K(self, X, X2=None, which_parts='all'): + def K(self, X, X2=None): """ Compute the kernel function. @@ -35,52 +35,22 @@ class Add(Kern): :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts """ - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target + return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -93,33 +63,15 @@ class Add(Kern): target = np.zeros_like(X) if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target - def Kdiag(self, X, which_parts='all'): + def Kdiag(self, X): """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target + return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index 8b2f17e8..69fc27ef 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, James Hensman and Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np from scipy import weave from ...core.parameterization import Param -class Coregionalize(Kernpart): +class Coregionalize(Kern): """ Covariance function for intrinsic/linear coregionalization models @@ -133,6 +133,8 @@ class Coregionalize(Kernpart): #dkappa = dL_dKdiag_small #target += np.hstack([dW.flatten(),dkappa]) - def gradients_X(self,dL_dK,X,X2,target): - #NOTE In this case, pass is equivalent to returning zero. - pass + def gradients_X(self,dL_dK,X,X2): + if X2 is None: + return np.zeros((X.shape[0], X.shape[0])) + else: + return np.zeros((X.shape[0], X2.shape[0])) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index af362498..b5b84305 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -4,8 +4,8 @@ import sys import numpy as np import itertools -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param class Kern(Parameterized): diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py deleted file mode 100644 index 097ed741..00000000 --- a/GPy/kern/_src/kernpart.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index ab77d4e6..5083c8de 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class Linear(Kern): """ diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 08221de7..e0d069b2 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -35,64 +35,36 @@ class Prod(Kern): self._X, self._X2 = np.empty(shape=(2,1)) self._params = None - def K(self,X,X2,target): + def K(self, X, X2=None): self._K_computations(X,X2) - target += self._K1 * self._K2 - - def K1(self,X, X2): - """Compute the part of the kernel associated with k1.""" - self._K_computations(X, X2) - return self._K1 - - def K2(self, X, X2): - """Compute the part of the kernel associated with k2.""" - self._K_computations(X, X2) - return self._K2 + return self._K1 * self._K2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def _param_grad_helper(self,dL_dK,X,X2,target): - """Derivative of the covariance matrix with respect to the parameters.""" - self._K_computations(X,X2) - if X2 is None: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) - else: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) - - def Kdiag(self,X,target): + def Kdiag(self, X): """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros(X.shape[0]) - target2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],target1) - self.k2.Kdiag(X[:,self.slice2],target2) - target += target1 * target2 + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self): + pass + #wtf goes here?? + #def dKdiag_dtheta(self,dL_dKdiag,X,target): + #K1 = np.zeros(X.shape[0]) + #K2 = np.zeros(X.shape[0]) + #self.k1.Kdiag(X[:,self.slice1],K1) + #self.k2.Kdiag(X[:,self.slice2],K2) + #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) + #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - def dKdiag_dtheta(self,dL_dKdiag,X,target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) if X2 is None: - if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) - else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): - #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei] - X2 = X - self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) else: self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) @@ -112,14 +84,10 @@ class Prod(Kern): self._params == self._get_params().copy() if X2 is None: self._X2 = None - self._K1 = np.zeros((X.shape[0],X.shape[0])) - self._K2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X[:,self.slice1],None,self._K1) - self.k2.K(X[:,self.slice2],None,self._K2) + self._K1 = self.k1.K(X[:,self.slice1],None) + self._K2 = self.k2.K(X[:,self.slice2],None) else: self._X2 = X2.copy() - self._K1 = np.zeros((X.shape[0],X2.shape[0])) - self._K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) - self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) + self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) + self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 36e454e3..eb713433 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class RBF(Kern): """ diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 7750267f..2be73389 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -3,8 +3,8 @@ from kern import Kern import numpy as np -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class White(Kern): """ @@ -25,6 +25,8 @@ class White(Kern): def K(self,X,X2): if X2 is None: return np.eye(X.shape[0])*self.variance + else: + return np.zeros((X.shape[0], X2.shape[0])) def Kdiag(self,X): ret = np.ones(X.shape[0]) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index a72acc1a..f8957906 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -23,7 +23,7 @@ class GPRegression(GP): def __init__(self, X, Y, kernel=None): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index b4f987ea..acc6e11a 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import Kern +from GPy.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 80350475..30157294 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.linear import Linear +from ...kern import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index c9896116..75ba39d9 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -9,7 +9,7 @@ from ...util.misc import param_to_array def plot_fit(model, plot_limits=None, which_data_rows='all', - which_data_ycols='all', which_parts='all', fixed_inputs=[], + which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): @@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rowsm which_data_ycols and which_parts + using which_data_rowsm which_data_ycols. :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :type plot_limits: np.array @@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type which_data_rows: 'all' or a slice object to slice model.X, model.Y :param which_data_ycols: when the data has several columns (independant outputs), only plot these :type which_data_rows: 'all' or a list of integers - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D @@ -76,12 +74,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #make a prediction on the frame and plot it if plot_raw: - m, v = model._raw_predict(Xgrid, which_parts=which_parts) + m, v = model._raw_predict(Xgrid) lower = m - 2*np.sqrt(v) upper = m + 2*np.sqrt(v) Y = model.Y else: - m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts) + m, v, lower, upper = model.predict(Xgrid) Y = model.Y for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) @@ -89,7 +87,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #optionally plot some samples if samples: #NOTE not tested with fixed_inputs - Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts) + Ysim = model.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. @@ -131,10 +129,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: - m, _ = model._raw_predict(Xgrid, which_parts=which_parts) + m, _ = model._raw_predict(Xgrid) Y = model.Y else: - m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) + m, _, _, _ = model.predict(Xgrid) Y = model.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 059a39c3..23f5d0c8 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500): num_in = 1 X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X.sort(axis=0) - rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) - white = GPy.kern.white(num_in, variance=1e-2) + rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,))) + white = GPy.kern.White(num_in, variance=1e-2) kernel = rbf + white K = kernel.K(X) y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1)) From de51ad638a0ea12469a881d32f7524eeb4ac3082 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 22:23:07 +0000 Subject: [PATCH 11/38] prod now seems to work for sparse --- GPy/core/sparse_gp.py | 13 ++++++------- GPy/kern/_src/add.py | 2 +- GPy/kern/_src/linear.py | 16 ++++++++-------- GPy/kern/_src/prod.py | 39 +++++++++++++++++---------------------- GPy/kern/_src/rbf.py | 2 +- GPy/kern/_src/white.py | 3 +-- 6 files changed, 34 insertions(+), 41 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..128dfca3 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -68,22 +68,21 @@ class SparseGP(GP): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) self._update_gradients_Z(add=False) - def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ Make a prediction for the latent function values """ if X_variance_new is None: - Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + Kx = self.kern.K(self.Z, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(Xnew) var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) else: - # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d81674b..edb82ef0 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -46,7 +46,7 @@ class Add(Kern): [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 5083c8de..b3765774 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -43,16 +43,16 @@ class Linear(Kern): assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim" else: variances = np.ones(self.input_dim) - + self.variances = Param('variances', variances, Logexp()) - self.variances.gradient = np.zeros(self.variances.shape) + #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) self.variances.add_observer(self, self.update_variance) # initialize cache self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._X, self._X2 = np.empty(shape=(2, 1)) - + def update_variance(self, v): self.variances2 = np.square(self.variances) @@ -62,7 +62,7 @@ class Linear(Kern): def update_gradients_full(self, dL_dK, X): self.variances.gradient[:] = 0 self._param_grad_helper(dL_dK, X, None, self.variances.gradient) - + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): tmp = dL_dKdiag[:, None] * X ** 2 if self.ARD: @@ -71,7 +71,7 @@ class Linear(Kern): self.variances.gradient = tmp.sum() self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) - + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): self._psi_computations(Z, mu, S) # psi0: @@ -87,7 +87,7 @@ class Linear(Kern): #from Kmm self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - + def K(self, X, X2, target): if self.ARD: XX = X * np.sqrt(self.variances) @@ -224,7 +224,7 @@ class Linear(Kern): weave_options = {'headers' : [''], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - + N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], @@ -281,7 +281,7 @@ class Linear(Kern): self._X2 = None else: self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e0d069b2..67637770 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -36,38 +36,33 @@ class Prod(Kern): self._params = None def K(self, X, X2=None): - self._K_computations(X,X2) + self._K_computations(X, X2) return self._K1 * self._K2 + def Kdiag(self, X): + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def Kdiag(self, X): - """Compute the diagonal of the covariance matrix associated to X.""" - return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) + self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) - def update_gradients_sparse(self): - pass - #wtf goes here?? - #def dKdiag_dtheta(self,dL_dKdiag,X,target): - #K1 = np.zeros(X.shape[0]) - #K2 = np.zeros(X.shape[0]) - #self.k1.Kdiag(X[:,self.slice1],K1) - #self.k2.Kdiag(X[:,self.slice2],K2) - #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2): + def gradients_X(self, dL_dK, X, X2=None): """derivative of the covariance matrix with respect to X.""" - self._K_computations(X,X2) + self._K_computations(X, X2) + target = np.zeros(X.shape) if X2 is None: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) else: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) + + return target def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) @@ -78,7 +73,7 @@ class Prod(Kern): self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - def _K_computations(self,X,X2): + def _K_computations(self, X, X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): self._X = X.copy() self._params == self._get_params().copy() diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index eb713433..02640fdc 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -154,7 +154,7 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2): + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 2be73389..d20e2fe1 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -20,9 +20,8 @@ class White(Kern): self.input_dim = input_dim self.variance = Param('variance', variance, Logexp()) self.add_parameters(self.variance) - self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2): + def K(self, X, X2=None): if X2 is None: return np.eye(X.shape[0])*self.variance else: From 5214c3c1ac46d60b1818e614394e10106e117bc8 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Wed, 19 Feb 2014 19:39:24 -0500 Subject: [PATCH 12/38] Adding update_gradients to sympy.py. --- GPy/kern/parts/rbf.py | 2 +- GPy/kern/parts/sympykern.py | 199 +++++++++++++++++++++--------------- 2 files changed, 115 insertions(+), 86 deletions(-) diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 027aa382..8811b74a 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -109,7 +109,7 @@ class RBF(Kernpart): self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None) else: self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - +b def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): #contributions from Kdiag self.variance.gradient = np.sum(dL_dKdiag) diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index a5bb7b1d..52813ecd 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -26,6 +26,8 @@ import ast from kernpart import Kernpart from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +# TODO have this set up in a set up file! +user_code_storage = tempfile.gettempdir() class spkern(Kernpart): """ @@ -61,13 +63,12 @@ class spkern(Kernpart): assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) - assert len(self._sp_x)==input_dim + x_dim=len(self._sp_x) # If it is a multi-output covariance, add an input for indexing the outputs. - self._real_input_dim = self.input_dim - if output_dim > 1: - self.input_dim += 1 - assert self.input_dim == input_dim + self._real_input_dim = x_dim + # Check input dim is number of xs + 1 if output_dim is >1 + assert self.input_dim == x_dim + int(output_dim > 1) self.output_dim = output_dim # extract parameter names from the covariance @@ -113,7 +114,6 @@ class spkern(Kernpart): #setattr(self, theta.name, val) setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) - self.parameters_changed() # initializes cache #deal with param #self._set_params(self._get_params()) @@ -139,13 +139,15 @@ class spkern(Kernpart): extra_compile_args = [] self.weave_kwargs = { - 'support_code':self._function_code, - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], - 'headers':['"sympy_helpers.h"'], - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'support_code': None, #self._function_code, + 'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')], 'extra_compile_args':extra_compile_args, 'extra_link_args':['-lgomp'], 'verbose':True} + self.parameters_changed() # initializes caches + def __add__(self,other): return spkern(self._sp_k+other._sp_k) @@ -177,31 +179,39 @@ class spkern(Kernpart): # Use weave to compute the underlying functions. if weave_available: # put the header file where we can find it - f = file(os.path.join(tempfile.gettempdir(), self.name + '.h'),'w') + f = file(os.path.join(user_code_storage, self.name + '.h'),'w') f.write(self._function_header) f.close() - - # Substitute any known derivatives which sympy doesn't compute - self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + + if weave_available: + # Substitute any known derivatives which sympy doesn't compute + self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + # put the cpp file in user code storage (defaults to temp file location) + f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w') + else: + # put the python file in user code storage + f = file(os.path.join(user_code_storage, self.name + '.py'),'w') + f.write(self._function_code) + f.close() if weave_available: # arg_list will store the arguments required for the C code. - arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) # for multiple outputs reverse argument list is also required if self.output_dim>1: - reverse_arg_list = list(arg_list) - reverse_arg_list.reverse() + reverse_input_arg_list = list(input_arg_list) + reverse_input_arg_list.reverse() # This gives the parameters for the arg list. param_arg_list = [shared_params.name for shared_params in self._sp_theta] - arg_list += param_arg_list + arg_list = input_arg_list + param_arg_list precompute_list=[] if self.output_dim > 1: - reverse_arg_list+=list(param_arg_list) + reverse_arg_list= reverse_input_arg_list + list(param_arg_list) # For multiple outputs, also need the split parameters. split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] @@ -218,9 +228,9 @@ class spkern(Kernpart): # Any precomputations will be done here eventually. self._precompute = \ - """ - // Precompute code would go here. It will be called when parameters are updated. - """ + """ + // Precompute code would go here. It will be called when parameters are updated. + """ # Here's the code to do the looping for K self._K_code =\ @@ -229,11 +239,11 @@ class spkern(Kernpart): // Code for computing the covariance function. int i; int j; - int N = target_array->dimensions[0]; + int n = target_array->dimensions[0]; int num_inducing = target_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int n = target_array->dimensions[0]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for - for (i=0;i1: grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) + grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) grad_func_string = '\n'.join(grad_func_list) self._dK_dtheta_code =\ @@ -285,17 +296,18 @@ class spkern(Kernpart): // Code for computing gradient of covariance with respect to parameters. int i; int j; - int N = partial_array->dimensions[0]; + int n = partial_array->dimensions[0]; int num_inducing = partial_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int n = partial_array->dimensions[0]; int input_dim = X_array->dimensions[1]; - for (i=0;idimensions[0]; + int n = partial_array->dimensions[0]; int num_inducing = partial_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int n = partial_array->dimensions[0]; int input_dim = X_array->dimensions[1]; - for (int i=0;i1: - # for i, split_params in enumerate(self._split_theta_names): - # start = self.num_shared_params + i*self.output_dim - # end = self.num_shared_params + (i+1)*self.output_dim - # setattr(self, split_params, param[start:end]) + #---------------------------------------# + # Precomputations # + #---------------------------------------# - - # def _get_params(self): - # params = np.zeros(0) - # for shared_params in self._sp_theta: - # params = np.hstack((params, getattr(self, shared_params.name))) - # if self.output_dim>1: - # for split_params in self._split_theta_names: - # params = np.hstack((params, getattr(self, split_params).flatten())) - # return params - - # def _get_param_names(self): - # if self.output_dim>1: - # return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] - # else: - # return [x.name for x in self._sp_theta] + def _K_computations(self, X, Z): + if Z is None: + self._generate_inline(self._precompute, X) + else: + self._generate_inline(self._precompute, X, Z=Z) From 46f59f9f6427af69db0097957e1374c1a03f27d6 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 20 Feb 2014 08:38:14 +0000 Subject: [PATCH 13/38] gradients now lazy instantiated --- GPy/core/parameterization/param.py | 11 +++++++++-- GPy/examples/dimensionality_reduction.py | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 75d9faf2..c052099d 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -54,7 +54,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable obj._tied_to_me_ = SetDict() obj._tied_to_ = [] obj._original_ = True - obj.gradient = None + obj._gradient_ = None return obj def __init__(self, name, input_array, default_constraint=None): @@ -76,10 +76,17 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable self._updated_ = getattr(obj, '_updated_', None) self._original_ = getattr(obj, '_original_', None) self._name = getattr(obj, 'name', None) - self.gradient = getattr(obj, 'gradient', None) + self._gradient_ = getattr(obj, '_gradient_', None) self.constraints = getattr(obj, 'constraints', None) self.priors = getattr(obj, 'priors', None) + + @property + def gradient(self): + if self._gradient_ is None: + self._gradient_ = numpy.zeros(self._realshape_) + return self._gradient_ + #=========================================================================== # Pickling operations #=========================================================================== diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 2924386f..4d42026d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -296,11 +296,12 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) - m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) + m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan m.q.variance *= .1 m.parameters_changed() + m.Yreal = Y if optimize: print "Optimizing model:" From d636c8c30ce696ad27360b4f31a439263b98c2b5 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 14:04:16 +0000 Subject: [PATCH 14/38] everything is broken --- GPy/core/gp.py | 1 + GPy/core/parameterization/param.py | 10 +- GPy/core/parameterization/parameter_core.py | 46 ++--- GPy/core/sparse_gp.py | 15 +- GPy/examples/dimensionality_reduction.py | 26 +-- GPy/kern/__init__.py | 2 +- GPy/kern/_src/add.py | 203 ++++++++++---------- GPy/kern/_src/kern.py | 16 +- GPy/kern/_src/linear.py | 85 ++++---- GPy/kern/_src/prod.py | 59 ++---- GPy/kern/_src/rbf.py | 78 ++++---- GPy/models/bayesian_gplvm.py | 14 +- GPy/util/caching.py | 93 ++++++--- 13 files changed, 325 insertions(+), 323 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 2dcf0e14..13336ef5 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -44,6 +44,7 @@ class GP(Model): self.Y_metadata = None assert isinstance(kernel, kern.Kern) + assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..016ecbf6 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri :param input_array: array which this parameter handles :type input_array: numpy.ndarray :param default_constraint: The default constraint for this parameter - :type default_constraint: + :type default_constraint: You can add/remove constraints by calling constrain on the parameter itself, e.g: @@ -59,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri def __init__(self, name, input_array, default_constraint=None): super(Param, self).__init__(name=name, default_constraint=default_constraint) - + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return @@ -192,7 +192,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri return numpy.r_[a] return numpy.r_[:b] return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) - + #=========================================================================== # Convenience #=========================================================================== @@ -260,7 +260,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + clean_curr_slice += range(self._realshape_[i]) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), @@ -426,4 +426,4 @@ class ParamConcatenation(object): start = False return "\n".join(strings) def __repr__(self): - return "\n".join(map(repr,self.params)) \ No newline at end of file + return "\n".join(map(repr,self.params)) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..5acdec58 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -18,8 +18,8 @@ class Observable(object): def remove_observer(self, observer): del self._observers_[observer] def _notify_observers(self): - [callble(self) for callble in self._observers_.itervalues()] - + [callble(self) for callble in self._observers_.values()] + class Pickleable(object): def _getstate(self): """ @@ -51,7 +51,7 @@ class Parentable(object): super(Parentable,self).__init__() self._direct_parent_ = direct_parent self._parent_index_ = parent_index - + def has_parent(self): return self._direct_parent_ is not None @@ -82,7 +82,7 @@ class Nameable(Parentable): from_name = self.name self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) + self._direct_parent_._name_changed(self, from_name) class Parameterizable(Parentable): @@ -90,7 +90,7 @@ class Parameterizable(Parentable): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() - + def parameter_names(self, add_name=False): if add_name: return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] @@ -142,21 +142,21 @@ class Gradcheckable(Parentable): class Indexable(object): def _raveled_index(self): raise NotImplementedError, "Need to be able to get the raveled Index" - + def _internal_offset(self): return 0 - + def _offset_for(self, param): raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?" - + def _raveled_index_for(self, param): """ get the raveled index for a param that is an int array, containing the indexes for the flattened param inside this parameterized logic. """ - raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" - + raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" + class Constrainable(Nameable, Indexable, Parameterizable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) @@ -166,7 +166,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: self.constrain(self._default_constraint_) - + #=========================================================================== # Fixing Parameters: #=========================================================================== @@ -182,21 +182,21 @@ class Constrainable(Nameable, Indexable, Parameterizable): rav_i = self._highest_parent_._raveled_index_for(self) self._highest_parent_._set_fixed(rav_i) fix = constrain_fixed - + def unconstrain_fixed(self): """ This parameter will no longer be fixed. """ unconstrained = self.unconstrain(__fixed__) - self._highest_parent_._set_unfixed(unconstrained) + self._highest_parent_._set_unfixed(unconstrained) unfix = unconstrain_fixed - + def _set_fixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) self._fixes_[index] = FIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED - + def _set_unfixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) @@ -212,7 +212,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self._fixes_[fixed_indices] = FIXED else: self._fixes_ = None - + def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None @@ -222,17 +222,17 @@ class Constrainable(Nameable, Indexable, Parameterizable): def set_prior(self, prior, warning=True, update=True): repriorized = self.unset_priors() self._add_to_index_operations(self.priors, repriorized, prior, warning, update) - + def unset_priors(self, *priors): return self._remove_from_index_operations(self.priors, priors) - + def log_prior(self): """evaluate the prior""" if self.priors.size > 0: x = self._get_params() return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return 0. - + def _log_prior_gradients(self): """evaluate the gradients of the priors""" import numpy as np @@ -242,7 +242,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret return 0. - + #=========================================================================== # Constrain operations -> done #=========================================================================== @@ -269,7 +269,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): transformats of this parameter object. """ return self._remove_from_index_operations(self.constraints, transforms) - + def constrain_positive(self, warning=True, update=True): """ :param warning: print a warning if re-constraining parameters. @@ -314,7 +314,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): Remove (lower, upper) bounded constrain from this parameter/ """ self.unconstrain(Logistic(lower, upper)) - + def _parent_changed(self, parent): from index_operations import ParameterIndexOperationsView self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) @@ -340,7 +340,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): removed = np.union1d(removed, unconstrained) if t is __fixed__: self._highest_parent_._set_unfixed(unconstrained) - + return removed diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 128dfca3..c72de182 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -53,20 +53,19 @@ class SparseGP(GP): self.add_parameter(self.Z, index=0) self.parameters_changed() - def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + def _gradients_Z(self): + #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: - if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) if self.X_variance is None: - self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) else: - self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) + self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + print self.Z.gradient + print id(self.Z) def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + self.Z.gradient = self._gradients_Z() def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..a5e8615d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -22,18 +22,18 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) lengthscales = _np.random.rand(input_dim) - k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) #+ GPy.kern.white(input_dim, 0.01) ) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T - # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) - # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) p = .3 @@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True): data = GPy.util.datasets.oil_100() Y = data['X'] # create simple GP model - kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) + kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) if optimize: m.optimize('scg', messages=verbose) @@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci Y = Y - Y.mean(0) Y /= Y.std(0) # Create the model - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 (1 - var))) + .001 Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c @@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) Y = data['X'][:N] Yn = Gaussian(Y, normalize=True) m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) @@ -435,7 +435,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() # optimize - back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) + back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) @@ -470,7 +470,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 214e230f..630d74da 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,7 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern -Linear = 'foo' +from _src.linear import Linear #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index edb82ef0..acc69fd4 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -34,7 +34,7 @@ class Add(Kern): :param X: the first set of inputs to the kernel :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. + handLes this as X2 == X. """ assert X.shape[1] == self.input_dim if X2 is None: @@ -48,9 +48,6 @@ class Add(Kern): def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -69,123 +66,125 @@ class Add(Kern): return target def Kdiag(self, X): - """Compute the diagonal of the covariance function for inputs X.""" assert X.shape[1] == self.input_dim return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0) def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))): + tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - target += crossterms + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + + + p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed + + target = np.zeros(Z.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + + + target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) return target - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + target_mu = np.zeros(mu.shape) + target_S = np.zeros(S.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target_mu += a + target_S += b return target_mu, target_S def plot(self, *args, **kwargs): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b5b84305..dd87200e 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -9,7 +9,7 @@ from ...core.parameterization.param import Param class Kern(Parameterized): - def __init__(self,input_dim,name): + def __init__(self, input_dim, name): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -22,21 +22,15 @@ class Kern(Parameterized): super(Kern, self).__init__(name) self.input_dim = input_dim - def K(self,X,X2,target): + def K(self, X, X2, target): raise NotImplementedError - def Kdiag(self,X,target): + def Kdiag(self, Xa ,target): raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): + def _param_grad_helper(self, dL_dK,X, X2, target): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) def psi0(self,Z,mu,S,target): raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target): raise NotImplementedError def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): raise NotImplementedError diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index b3765774..7f5d43d3 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -9,6 +9,7 @@ from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +from ...util.caching import Cacher, cache_this class Linear(Kern): """ @@ -45,22 +46,35 @@ class Linear(Kern): variances = np.ones(self.input_dim) self.variances = Param('variances', variances, Logexp()) - #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) - self.variances.add_observer(self, self.update_variance) + self.variances.add_observer(self, self._on_changed) - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2 = np.empty(shape=(2, 1)) + def _on_changed(self, obj): + self._notify_observers() - def update_variance(self, v): - self.variances2 = np.square(self.variances) + @cache_this(limit=3, reset_on_self=True) + def K(self, X, X2=None): + if self.ARD: + if X2 is None: + return tdot(X*np.sqrt(self.variances)) + else: + rv = np.sqrt(self.variances) + return np.dot(X*rv, (X2*rv).T) + else: + return self._dot_product(X, X2) * self.variances - def on_input_change(self, X): - self._K_computations(X, None) + @cache_this(limit=3, reset_on_self=False) + def _dot_product(self, X, X2=None): + if X2 is None: + return tdot(X) + else: + return np.dot(X, X2.T) + + def Kdiag(self, X): + return np.sum(self.variances * np.square(X), -1) def update_gradients_full(self, dL_dK, X): - self.variances.gradient[:] = 0 + self.variances.gradient = np.zeros(self.variances.size) self._param_grad_helper(dL_dK, X, None, self.variances.gradient) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): @@ -68,7 +82,7 @@ class Linear(Kern): if self.ARD: self.variances.gradient = tmp.sum(0) else: - self.variances.gradient = tmp.sum() + self.variances.gradient = np.atleast_1d(tmp.sum()) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) @@ -85,25 +99,8 @@ class Linear(Kern): if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) else: self.variances.gradient += tmp.sum() #from Kmm - self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - def K(self, X, X2, target): - if self.ARD: - XX = X * np.sqrt(self.variances) - if X2 is None: - target += tdot(XX) - else: - XX2 = X2 * np.sqrt(self.variances) - target += np.dot(XX, XX2.T) - else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += self.variances * self._dot_product - - def Kdiag(self, X, target): - np.add(target, np.sum(self.variances * np.square(X), -1), target) - def _param_grad_helper(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: @@ -112,18 +109,16 @@ class Linear(Kern): product = X[:, None, :] * X2[None, :, :] target += (dL_dK[:, :, None] * product).sum(0).sum(0) else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += np.sum(self._dot_product * dL_dK) + target += np.sum(self._dot_product(X, X2) * dL_dK) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2=None): if X2 is None: - target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) else: - target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) - def dKdiag_dX(self,dL_dKdiag,X,target): - target += 2.*self.variances*dL_dKdiag[:,None]*X + def gradients_X_diag(self, dL_dKdiag, X): + return 2.*self.variances*dL_dKdiag[:,None]*X #---------------------------------------# # PSI statistics # @@ -273,15 +268,15 @@ class Linear(Kern): # Precomputations # #---------------------------------------# - def _K_computations(self, X, X2): - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): - self._X = X.copy() - if X2 is None: - self._dot_product = tdot(param_to_array(X)) - self._X2 = None - else: - self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + #def _K_computations(self, X, X2): + #if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): + #self._X = X.copy() + #if X2 is None: + ##self._dot_product = tdot(param_to_array(X)) + #self._X2 = None + #else: + #self._X2 = X2.copy() + #self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 67637770..1d033f70 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -2,9 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from kern import Kern -from coregionalize import Coregionalize import numpy as np -import hashlib class Prod(Kern): """ @@ -17,7 +15,7 @@ class Prod(Kern): :rtype: kernel object """ - def __init__(self,k1,k2,tensor=False): + def __init__(self, k1, k2, tensor=False): if tensor: super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) self.slice1 = slice(0,k1.input_dim) @@ -25,64 +23,43 @@ class Prod(Kern): else: assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) - self.slice1 = slice(0,self.input_dim) - self.slice2 = slice(0,self.input_dim) + self.slice1 = slice(0, self.input_dim) + self.slice2 = slice(0, self.input_dim) self.k1 = k1 self.k2 = k2 self.add_parameters(self.k1, self.k2) - #initialize cache - self._X, self._X2 = np.empty(shape=(2,1)) - self._params = None - def K(self, X, X2=None): - self._K_computations(X, X2) - return self._K1 * self._K2 + if X2 is None: + return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) + else: + return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) def Kdiag(self, X): return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) - self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) + self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) + self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) def gradients_X(self, dL_dK, X, X2=None): - """derivative of the covariance matrix with respect to X.""" - self._K_computations(X, X2) target = np.zeros(X.shape) if X2 is None: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None) else: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) - + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2]) return target - def dKdiag_dX(self, dL_dKdiag, X, target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2]) + return target - self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - - def _K_computations(self, X, X2): - if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): - self._X = X.copy() - self._params == self._get_params().copy() - if X2 is None: - self._X2 = None - self._K1 = self.k1.K(X[:,self.slice1],None) - self._K2 = self.k2.K(X[:,self.slice2],None) - else: - self._X2 = X2.copy() - self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) - self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 02640fdc..0508436f 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -79,17 +79,18 @@ class RBF(Kern): ret[:] = self.variance return ret - #TODO: remove TARGET! - def psi0(self, Z, mu, S, target): - target += self.variance + def psi0(self, Z, mu, S): + ret = np.empty(mu.shape[0], dtype=np.float64) + ret[:] = self.variance + return ret - def psi1(self, Z, mu, S, target): + def psi1(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi1 + return self._psi1 - def psi2(self, Z, mu, S, target): + def psi2(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi2 + return self._psi2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) @@ -154,6 +155,37 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + + #psi1 + denominator = (self.lengthscale2 * (self._psi1_denom)) + dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) + grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) + + #psi2 + term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim + term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim + dZ = self._psi2[:, :, :, None] * (term1[None] + term2) + grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) + + return grad + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + #psi1 + tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom + grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) + grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) + + tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom + grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) + grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) + + return grad_mu, grad_S + + + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) @@ -171,36 +203,7 @@ class RBF(Kern): # PSI statistics # #---------------------------------------# - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - #---------------------------------------# + #---------------------------------------# # Precomputations # #---------------------------------------# @@ -362,6 +365,7 @@ class RBF(Kern): #include #include """ + mu = param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 914ca4ae..5fb1ca59 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP._setstate(self, state) - def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) - dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 - dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - - return dL_dmu, dL_dS - def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.dL_dmuS() + dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) # dL: self.q.mean.gradient = dL_dmu diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 51ba56f3..1f10cd64 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,46 +1,89 @@ -from ..core.parameterization.array_core import ObservableArray, ParamList +from ..core.parameterization.parameter_core import Observable +from ..core.parameterization.array_core import ParamList + class Cacher(object): - def __init__(self, operation, limit=5): + def __init__(self, operation, limit=5, reset_on_first=False): self.limit = int(limit) + self._reset_on_first = reset_on_first self.operation=operation - self.cached_inputs = ParamList([]) + self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] - def __call__(self, X): - assert isinstance(X, ObservableArray) - if X in self.cached_inputs: - i = self.cached_inputs.index(X) + def __call__(self, *args): + if self._reset_on_first: + assert isinstance(args[0], Observable) + args[0].add_observer(args[0], self.reset) + cached_args = args + else: + cached_args = args[1:] + + + if not all([isinstance(arg, Observable) for arg in cached_args]): + return self.operation(*args) + if cached_args in self.cached_inputs: + i = self.cached_inputs.index(cached_args) if self.inputs_changed[i]: - self.cached_outputs[i] = self.operation(X) + self.cached_outputs[i] = self.operation(*args) self.inputs_changed[i] = False return self.cached_outputs[i] else: if len(self.cached_inputs) == self.limit: - X_ = self.cached_inputs.pop(0) - X_.remove_observer(self) + args_ = self.cached_inputs.pop(0) + [a.remove_observer(self) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) - self.cached_inputs.append(X) - self.cached_outputs.append(self.operation(X)) + self.cached_inputs.append(cached_args) + self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - X.add_observer(self, self.on_cache_changed) + [a.add_observer(self, self.on_cache_changed) for a in args] return self.cached_outputs[-1] - def on_cache_changed(self, X): - #print id(X) - Xbase = X - while Xbase is not None: - try: - i = self.cached_inputs.index(X) - break - except ValueError: - Xbase = X.base - continue - self.inputs_changed[i] = True + def on_cache_changed(self, arg): + self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] + + def reset(self, obj): + [[a.remove_observer(self) for a in args] for args in self.cached_inputs] + self.cached_inputs = [] + self.cached_outputs = [] + self.inputs_changed = [] + + + + +def cache_this(limit=5, reset_on_self=False): + def limited_cache(f): + c = Cacher(f, limit, reset_on_first=reset_on_self) + def f_wrap(*args): + return c(*args) + f_wrap._cacher = c + return f_wrap + return limited_cache + + + + + + + + + + + + + #Xbase = X + #while Xbase is not None: + #try: + #i = self.cached_inputs.index(X) + #break + #except ValueError: + #Xbase = X.base + #continue + #self.inputs_changed[i] = True + + - From 52ab456bfe9ffea60f8509826f6edeb2366c9337 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 20 Feb 2014 14:09:20 +0000 Subject: [PATCH 15/38] posterior with one covariance per dimension and param gradient fix --- GPy/core/parameterization/param.py | 3 +++ GPy/inference/latent_function_inference/posterior.py | 7 +++++-- GPy/util/warping_functions.py | 3 +-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index c052099d..7ab7e2b4 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -86,6 +86,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable if self._gradient_ is None: self._gradient_ = numpy.zeros(self._realshape_) return self._gradient_ + @gradient.setter + def gradient(self, val): + self.gradient[:] = val #=========================================================================== # Pickling operations diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index f28bf9d1..73741a13 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -81,13 +81,16 @@ class Posterior(object): def covariance(self): if self._covariance is None: #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) - self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) + self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T + #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) return self._covariance @property def precision(self): if self._precision is None: - self._precision, _, _, _ = pdinv(self.covariance) + self._precision = np.zeros(np.atleast_3d(self.covariance).shape) # if one covariance per dimension + for p in xrange(self.covariance.shape[-1]): + self._precision[:,:,p] = pdinv(self.covariance[:,:,p])[0] return self._precision @property diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 35ad3b80..a0a385e0 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -3,8 +3,6 @@ import numpy as np -import scipy as sp -import pylab as plt class WarpingFunction(object): """ @@ -39,6 +37,7 @@ class WarpingFunction(object): def plot(self, psi, xmin, xmax): y = np.arange(xmin, xmax, 0.01) f_y = self.f(y, psi) + from matplotlib import pyplot as plt plt.figure() plt.plot(y, f_y) plt.xlabel('y') From 41b8b7edd814f191fadaf96af3f3c9e7f7f182fb Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 14:10:36 +0000 Subject: [PATCH 16/38] empty init file --- GPy/kern/_src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 GPy/kern/_src/__init__.py diff --git a/GPy/kern/_src/__init__.py b/GPy/kern/_src/__init__.py new file mode 100644 index 00000000..e69de29b From 87ce8fea0b192510045f12fe221b796de2315a97 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 14:24:41 +0000 Subject: [PATCH 17/38] weird Max related stuff is happening --- GPy/core/sparse_gp.py | 1 + GPy/kern/_src/add.py | 4 ++-- GPy/kern/_src/kern.py | 26 ++++++-------------------- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index c72de182..1ae72556 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -58,6 +58,7 @@ class SparseGP(GP): if not self.Z.is_fixed: if self.X_variance is None: self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) + print self.Z.gradient else: self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) print self.Z.gradient diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index acc69fd4..d5515d98 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -71,7 +71,7 @@ class Add(Kern): def psi0(self, Z, mu, S): - return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0) + return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) def psi1(self, Z, mu, S): return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) @@ -93,7 +93,7 @@ class Add(Kern): pass # rbf X bias #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): - elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index dd87200e..63c5b458 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -26,33 +26,15 @@ class Kern(Parameterized): raise NotImplementedError def Kdiag(self, Xa ,target): raise NotImplementedError - def _param_grad_helper(self, dL_dK,X, X2, target): - raise NotImplementedError def psi0(self,Z,mu,S,target): raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target): - raise NotImplementedError - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - raise NotImplementedError def psi1(self,Z,mu,S,target): raise NotImplementedError - def dpsi1_dtheta(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - raise NotImplementedError def psi2(self,Z,mu,S,target): raise NotImplementedError - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): + def gradients_X(self, dL_dK, X, X2): raise NotImplementedError - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def gradients_X(self, dL_dK, X, X2, target): - raise NotImplementedError - def dKdiag_dX(self, dL_dK, X, target): + def gradients_X_diag(self, dL_dK, X): raise NotImplementedError def update_gradients_full(self, dL_dK, X): """Set the gradients of all parameters when doing full (N) inference.""" @@ -63,6 +45,10 @@ class Kern(Parameterized): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" raise NotImplementedError + def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + grad = self.gradients_X(dL_dKmm, Z) + grad += self.gradients_X(dL_dKnm.T, Z, X) + return grad def plot_ARD(self, *args): """If an ARD kernel is present, plot a bar representation using matplotlib From e03b8284666ab466fbea5726869087a5c6fb88fe Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 14:34:14 +0000 Subject: [PATCH 18/38] foo --- GPy/core/sparse_gp.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 1ae72556..e619ad4d 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -53,20 +53,17 @@ class SparseGP(GP): self.add_parameter(self.Z, index=0) self.parameters_changed() - def _gradients_Z(self): + def update_gradients_Z(self): #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: if self.X_variance is None: self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) - print self.Z.gradient else: self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) - print self.Z.gradient - print id(self.Z) def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self.Z.gradient = self._gradients_Z() + self.update_gradients_Z() def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ From 4fb4a38cd11f9f6532e2dd55223566994c928323 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 17:11:44 +0000 Subject: [PATCH 19/38] spellings --- GPy/kern/_src/rbf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0508436f..78b9ffc4 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -169,6 +169,8 @@ class RBF(Kern): dZ = self._psi2[:, :, :, None] * (term1[None] + term2) grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) + grad += self.gradients_X(dL_dKmm, Z, None) + return grad def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): @@ -184,8 +186,6 @@ class RBF(Kern): return grad_mu, grad_S - - def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) @@ -203,7 +203,7 @@ class RBF(Kern): # PSI statistics # #---------------------------------------# - #---------------------------------------# + #---------------------------------------# # Precomputations # #---------------------------------------# From 8ea40a4a1354098cf4d720a585bff65c1d62c646 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 21 Feb 2014 08:03:44 +0000 Subject: [PATCH 20/38] rbf psi 2 --- GPy/core/parameterization/param.py | 2 +- GPy/kern/_src/rbf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 15b077a9..4c2cb469 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -85,7 +85,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable def gradient(self): if self._gradient_ is None: self._gradient_ = numpy.zeros(self._realshape_) - return self._gradient_ + return self._gradient_[self._current_slice_] @gradient.setter def gradient(self, val): self.gradient[:] = val diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0508436f..65b65120 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -177,7 +177,7 @@ class RBF(Kern): tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - + #psi2 tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) From 0c92fca31abf7a35d992502235bd571d26377904 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 21 Feb 2014 09:14:31 +0000 Subject: [PATCH 21/38] linear without caching, derivatives done --- GPy/core/parameterization/array_core.py | 4 +- GPy/core/parameterization/parameter_core.py | 12 ++-- GPy/core/sparse_gp.py | 26 ++++----- .../latent_function_inference/var_dtc.py | 2 - GPy/kern/_src/kern.py | 16 ++++-- GPy/kern/_src/linear.py | 56 ++++++++++++------- GPy/util/caching.py | 9 ++- 7 files changed, 71 insertions(+), 54 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 7892e94a..b12ca59b 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -30,12 +30,12 @@ class ObservableArray(np.ndarray, Observable): def __new__(cls, input_array): obj = np.atleast_1d(input_array).view(cls) cls.__name__ = "ObservableArray\n " - obj._observers_ = {} + obj._observer_callables_ = {} return obj def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return - self._observers_ = getattr(obj, '_observers_', None) + self._observer_callables_ = getattr(obj, '_observer_callables_', None) def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 9a10f317..f8d83edd 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -11,14 +11,14 @@ def adjust_name_for_printing(name): return '' class Observable(object): - _observers_ = {} - def add_observer(self, observer, callble): - self._observers_[observer] = callble + _observer_callables_ = {} + def add_observer(self, callble): + self._observer_callables_.append(callble) #callble(self) - def remove_observer(self, observer): - del self._observers_[observer] + def remove_observer(self, callble): + del self._observer_callables_[callble] def _notify_observers(self): - [callble(self) for callble in self._observers_.itervalues()] + [callble(self) for callble in self._observer_callables_] class Pickleable(object): def _getstate(self): diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index e619ad4d..5ab13251 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -44,26 +44,26 @@ class SparseGP(GP): self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] - - if not (X_variance is None): - assert X_variance.shape == X.shape + self.X_variance = X_variance - + if self.has_uncertain_inputs(): + assert X_variance.shape == X.shape + GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) self.add_parameter(self.Z, index=0) self.parameters_changed() - def update_gradients_Z(self): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) - if not self.Z.is_fixed: - if self.X_variance is None: - self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) - else: - self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + def has_uncertain_inputs(self): + return not (self.X_variance is None) def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self.update_gradients_Z() + if self.has_uncertain_inputs(): + self.kern.update_gradients_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + else: + self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) + self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ @@ -97,12 +97,10 @@ class SparseGP(GP): """ return GP._getstate(self) + [self.Z, self.num_inducing, - self.has_uncertain_inputs, self.X_variance] def _setstate(self, state): self.X_variance = state.pop() - self.has_uncertain_inputs = state.pop() self.num_inducing = state.pop() self.Z = state.pop() GP._setstate(self, state) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 2f11cb08..24f4a5b6 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -70,10 +70,8 @@ class VarDTC(object): if uncertain_inputs: grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} - kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) else: grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} - kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 63c5b458..6e9199dd 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -22,15 +22,15 @@ class Kern(Parameterized): super(Kern, self).__init__(name) self.input_dim = input_dim - def K(self, X, X2, target): + def K(self, X, X2): raise NotImplementedError - def Kdiag(self, Xa ,target): + def Kdiag(self, Xa): raise NotImplementedError - def psi0(self,Z,mu,S,target): + def psi0(self,Z,mu,S): raise NotImplementedError - def psi1(self,Z,mu,S,target): + def psi1(self,Z,mu,S): raise NotImplementedError - def psi2(self,Z,mu,S,target): + def psi2(self,Z,mu,S): raise NotImplementedError def gradients_X(self, dL_dK, X, X2): raise NotImplementedError @@ -49,7 +49,11 @@ class Kern(Parameterized): grad = self.gradients_X(dL_dKmm, Z) grad += self.gradients_X(dL_dKnm.T, Z, X) return grad - + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + raise NotImplementedError + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + raise NotImplementedError + def plot_ARD(self, *args): """If an ARD kernel is present, plot a bar representation using matplotlib diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7f5d43d3..e8cf2e87 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -119,34 +119,55 @@ class Linear(Kern): def gradients_X_diag(self, dL_dKdiag, X): return 2.*self.variances*dL_dKdiag[:,None]*X - + #---------------------------------------# # PSI statistics # + # variational # #---------------------------------------# - def psi0(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += np.sum(self.variances * self.mu2_S, 1) + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + # Kmm + grad = self.gradients_X(dL_dKmm, Z, None) + #psi1 + grad += self.gradients_X(dL_dpsi1.T, Z, mu) + #psi2 + self._weave_dpsi2_dZ(dL_dpsi2, Z, mu, S, grad) + return grad + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + target_mu, target_S = np.zeros(mu.shape), np.zeros(mu.shape) + # psi0 + target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) + target_S += dL_dpsi0[:, None] * self.variances + # psi1 + target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) + # psi2 + self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, target_mu, target_S) + + return target_mu, target_S + + def psi0(self, Z, mu, S): + self._psi_computations(Z, mu, S) + return np.sum(self.variances * self.mu2_S, 1) + + def psi1(self, Z, mu, S): + """the variance, it does nothing""" + self._psi1 = self.K(mu, Z) + return self._psi1 + + def psi2(self, Z, mu, S): + self._psi_computations(Z, mu, S) + return self._psi2 + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) target_S += dL_dpsi0[:, None] * self.variances - def psi1(self, Z, mu, S, target): - """the variance, it does nothing""" - self._psi1 = self.K(mu, Z, target) - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): """Do nothing for S, it does not affect psi1""" self._psi_computations(Z, mu, S) target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self.gradients_X(dL_dpsi1.T, Z, mu, target) - - def psi2(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi2 def psi2_new(self,Z,mu,S,target): tmp = np.zeros((mu.shape[0], Z.shape[0])) @@ -172,7 +193,7 @@ class Linear(Kern): Zs_sq = Zs[:,None,:]*Zs[None,:,:] target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1) - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): + def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): """Think N,num_inducing,num_inducing,input_dim """ self._psi_computations(Z, mu, S) AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] @@ -226,7 +247,7 @@ class Linear(Kern): type_converters=weave.converters.blitz,**weave_options) - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): + def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): self._psi_computations(Z, mu, S) #psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :] #dummy_target = np.zeros_like(target) @@ -261,9 +282,6 @@ class Linear(Kern): type_converters=weave.converters.blitz,**weave_options) - - - #---------------------------------------# # Precomputations # #---------------------------------------# diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 1f10cd64..6bf9aab1 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,5 +1,4 @@ from ..core.parameterization.parameter_core import Observable -from ..core.parameterization.array_core import ParamList class Cacher(object): def __init__(self, operation, limit=5, reset_on_first=False): @@ -13,7 +12,7 @@ class Cacher(object): def __call__(self, *args): if self._reset_on_first: assert isinstance(args[0], Observable) - args[0].add_observer(args[0], self.reset) + args[0].add_observer(self.reset) cached_args = args else: cached_args = args[1:] @@ -30,21 +29,21 @@ class Cacher(object): else: if len(self.cached_inputs) == self.limit: args_ = self.cached_inputs.pop(0) - [a.remove_observer(self) for a in args_] + [a.remove_observer(self.on_cache_changed) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) self.cached_inputs.append(cached_args) self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - [a.add_observer(self, self.on_cache_changed) for a in args] + [a.add_observer(self.on_cache_changed) for a in args] return self.cached_outputs[-1] def on_cache_changed(self, arg): self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] def reset(self, obj): - [[a.remove_observer(self) for a in args] for args in self.cached_inputs] + [[a.remove_observer(self.reset) for a in args] for args in self.cached_inputs] self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] From b19f9b9f33b671ff0a95e111f0fed6318d8d4663 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 21 Feb 2014 10:38:11 +0000 Subject: [PATCH 22/38] gradient operations and cachong --- GPy/core/parameterization/array_core.py | 6 +- GPy/core/parameterization/param.py | 11 +- GPy/core/parameterization/parameter_core.py | 248 ++++++++++---------- GPy/core/parameterization/parameterized.py | 8 +- GPy/kern/_src/kern.py | 4 +- GPy/kern/_src/linear.py | 2 +- GPy/models/bayesian_gplvm.py | 2 +- GPy/util/caching.py | 8 +- 8 files changed, 151 insertions(+), 138 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index b12ca59b..dffe2ed1 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -30,12 +30,16 @@ class ObservableArray(np.ndarray, Observable): def __new__(cls, input_array): obj = np.atleast_1d(input_array).view(cls) cls.__name__ = "ObservableArray\n " - obj._observer_callables_ = {} return obj + + def __init__(self, *a, **kw): + super(ObservableArray, self).__init__(*a, **kw) + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return self._observer_callables_ = getattr(obj, '_observer_callables_', None) + def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 4c2cb469..c2c70f5c 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable): +class Param(Constrainable, ObservableArray, Gradcheckable, Indexable): """ Parameter object for GPy models. @@ -57,8 +57,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable obj._gradient_ = None return obj - def __init__(self, name, input_array, default_constraint=None): - super(Param, self).__init__(name=name, default_constraint=default_constraint) + def __init__(self, name, input_array, default_constraint=None, *a, **kw): + super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw) def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments @@ -144,7 +144,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable return self.flat def _collect_gradient(self, target): - target[:] = self.gradient.flat + target += self.gradient.flat + + def _set_gradient(self, g): + self.gradient = g #=========================================================================== # Array operations -> done diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f8d83edd..5e5e5432 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -7,18 +7,24 @@ __updated__ = '2013-12-16' def adjust_name_for_printing(name): if name is not None: - return name.replace(" ", "_").replace(".", "_").replace("-","").replace("+","").replace("!","").replace("*","").replace("/","") + return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "") return '' class Observable(object): - _observer_callables_ = {} - def add_observer(self, callble): - self._observer_callables_.append(callble) - #callble(self) - def remove_observer(self, callble): - del self._observer_callables_[callble] + def __init__(self, *args, **kwargs): + from collections import defaultdict + self._observer_callables_ = defaultdict(list) + + def add_observer(self, observer, callble): + self._observer_callables_[observer].append(callble) + # callble(self) + + def remove_observer(self, observer, callble): + del self._observer_callables_[observer][callble] + def _notify_observers(self): - [callble(self) for callble in self._observer_callables_] + [[callble(self) for callble in callables] + for callables in self._observer_callables_.itervalues()] class Pickleable(object): def _getstate(self): @@ -47,10 +53,8 @@ class Pickleable(object): #=============================================================================== class Parentable(object): - def __init__(self, direct_parent=None, parent_index=None): - super(Parentable,self).__init__() - self._direct_parent_ = direct_parent - self._parent_index_ = parent_index + _direct_parent_ = None + _parent_index_ = None def has_parent(self): return self._direct_parent_ is not None @@ -73,9 +77,8 @@ class Parentable(object): self._direct_parent_._notify_parameters_changed() class Nameable(Parentable): - _name = None - def __init__(self, name, direct_parent=None, parent_index=None): - super(Nameable,self).__init__(direct_parent, parent_index) + def __init__(self, name, *a, **kw): + super(Nameable, self).__init__(*a, **kw) self._name = name or self.__class__.__name__ @property @@ -95,108 +98,10 @@ class Nameable(Parentable): return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) return adjust(self.name) -class Parameterizable(Parentable): - def __init__(self, *args, **kwargs): - super(Parameterizable, self).__init__(*args, **kwargs) - from GPy.core.parameterization.array_core import ParamList - _parameters_ = ParamList() - self._added_names_ = set() - - def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): - if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) - else: adjust = lambda x: x - if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] - else: names = [adjust(x.name) for x in self._parameters_] - if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) - return names - - def _add_parameter_name(self, param): - pname = adjust_name_for_printing(param.name) - # and makes sure to not delete programmatically added parameters - if pname in self.__dict__: - if not (param is self.__dict__[pname]): - if pname in self._added_names_: - del self.__dict__[pname] - self._add_parameter_name(param) - else: - self.__dict__[pname] = param - self._added_names_.add(pname) - - def _remove_parameter_name(self, param=None, pname=None): - assert param is None or pname is None, "can only delete either param by name, or the name of a param" - pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) - if pname in self._added_names_: - del self.__dict__[pname] - self._added_names_.remove(pname) - self._connect_parameters() - - def _name_changed(self, param, old_name): - self._remove_parameter_name(None, old_name) - self._add_parameter_name(param) - - def _collect_gradient(self, target): - import itertools - [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - - def _get_params(self): - import numpy as np - # don't overwrite this anymore! - if not self.size: - return np.empty(shape=(0,), dtype=np.float64) - return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0]) - - def _set_params(self, params, update=True): - # don't overwrite this anymore! - import itertools - [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - self.parameters_changed() - - def copy(self): - """Returns a (deep) copy of the current model""" - import copy - from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView - from .array_core import ParamList - dc = dict() - for k, v in self.__dict__.iteritems(): - if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): - if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): - dc[k] = v.copy() - else: - dc[k] = copy.deepcopy(v) - if k == '_parameters_': - params = [p.copy() for p in v] - #dc = copy.deepcopy(self.__dict__) - dc['_direct_parent_'] = None - dc['_parent_index_'] = None - dc['_parameters_'] = ParamList() - s = self.__new__(self.__class__) - s.__dict__ = dc - #import ipdb;ipdb.set_trace() - for p in params: - s.add_parameter(p) - #dc._notify_parent_change() - return s - #return copy.deepcopy(self) - - def _notify_parameters_changed(self): - self.parameters_changed() - if self.has_parent(): - self._direct_parent_._notify_parameters_changed() - - def parameters_changed(self): - """ - This method gets called when parameters have changed. - Another way of listening to param changes is to - add self as a listener to the param, such that - updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` - """ - pass - class Gradcheckable(Parentable): - #=========================================================================== - # Gradchecking - #=========================================================================== + def __init__(self, *a, **kw): + super(Gradcheckable, self).__init__(*a, **kw) def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3): if self.has_parent(): return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) @@ -204,6 +109,7 @@ class Gradcheckable(Parentable): def _checkgrad(self, param): raise NotImplementedError, "Need log likelihood to check gradient against" + class Indexable(object): def _raveled_index(self): raise NotImplementedError, "Need to be able to get the raveled Index" @@ -222,9 +128,10 @@ class Indexable(object): """ raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable, Parentable): - def __init__(self, name, default_constraint=None): - super(Constrainable,self).__init__(name) + +class Constrainable(Nameable, Indexable): + def __init__(self, name, default_constraint=None, *a, **kw): + super(Constrainable, self).__init__(name=name, *a, **kw) self._default_constraint_ = default_constraint from index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() @@ -275,7 +182,7 @@ class Constrainable(Nameable, Indexable, Parentable): def _set_unfixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) - #rav_i = self._raveled_index_for(param)[index] + # rav_i = self._raveled_index_for(param)[index] self._fixes_[index] = UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED @@ -305,7 +212,7 @@ class Constrainable(Nameable, Indexable, Parentable): """evaluate the prior""" if self.priors.size > 0: x = self._get_params() - return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) + return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return 0. def _log_prior_gradients(self): @@ -409,7 +316,7 @@ class Constrainable(Nameable, Indexable, Parentable): if len(transforms) == 0: transforms = which.properties() import numpy as np - removed = np.empty((0, ), dtype=int) + removed = np.empty((0,), dtype=int) for t in transforms: unconstrained = which.remove(t, self._raveled_index()) removed = np.union1d(removed, unconstrained) @@ -419,5 +326,104 @@ class Constrainable(Nameable, Indexable, Parentable): return removed +class Parameterizable(Constrainable): + def __init__(self, *args, **kwargs): + super(Parameterizable, self).__init__(*args, **kwargs) + from GPy.core.parameterization.array_core import ParamList + _parameters_ = ParamList() + self._added_names_ = set() + + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names + + def _add_parameter_name(self, param): + pname = adjust_name_for_printing(param.name) + # and makes sure to not delete programmatically added parameters + if pname in self.__dict__: + if not (param is self.__dict__[pname]): + if pname in self._added_names_: + del self.__dict__[pname] + self._add_parameter_name(param) + else: + self.__dict__[pname] = param + self._added_names_.add(pname) + + def _remove_parameter_name(self, param=None, pname=None): + assert param is None or pname is None, "can only delete either param by name, or the name of a param" + pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) + if pname in self._added_names_: + del self.__dict__[pname] + self._added_names_.remove(pname) + self._connect_parameters() + def _name_changed(self, param, old_name): + self._remove_parameter_name(None, old_name) + self._add_parameter_name(param) + + def _collect_gradient(self, target): + import itertools + [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + + def _set_gradient(self, g): + import itertools + [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + + def _get_params(self): + import numpy as np + # don't overwrite this anymore! + if not self.size: + return np.empty(shape=(0,), dtype=np.float64) + return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0]) + + def _set_params(self, params, update=True): + # don't overwrite this anymore! + import itertools + [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + self.parameters_changed() + + def copy(self): + """Returns a (deep) copy of the current model""" + import copy + from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView + from .array_core import ParamList + dc = dict() + for k, v in self.__dict__.iteritems(): + if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): + if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + dc[k] = v.copy() + else: + dc[k] = copy.deepcopy(v) + if k == '_parameters_': + params = [p.copy() for p in v] + # dc = copy.deepcopy(self.__dict__) + dc['_direct_parent_'] = None + dc['_parent_index_'] = None + dc['_parameters_'] = ParamList() + s = self.__new__(self.__class__) + s.__dict__ = dc + # import ipdb;ipdb.set_trace() + for p in params: + s.add_parameter(p) + # dc._notify_parent_change() + return s + # return copy.deepcopy(self) + + def _notify_parameters_changed(self): + self.parameters_changed() + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + + def parameters_changed(self): + """ + This method gets called when parameters have changed. + Another way of listening to param changes is to + add self as a listener to the param, such that + updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` + """ + pass diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 12bf936c..177cc217 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -7,11 +7,11 @@ import cPickle import itertools from re import compile, _pattern_type from param import ParamConcatenation -from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, Parentable, adjust_name_for_printing, Gradcheckable +from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable, Parentable): +class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable): """ Parameterized class @@ -53,8 +53,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parame If you want to operate on all parameters use m[''] to wildcard select all paramters and concatenate them. Printing m[''] will result in printing of all parameters in detail. """ - def __init__(self, name=None): - super(Parameterized, self).__init__(name=name) + def __init__(self, name=None, *a, **kw): + super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) self._in_init_ = True self._parameters_ = ParamList() self.size = sum(p.size for p in self._parameters_) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 6e9199dd..9e98b97b 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -9,7 +9,7 @@ from ...core.parameterization.param import Param class Kern(Parameterized): - def __init__(self, input_dim, name): + def __init__(self, input_dim, name, *a, **kw): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -19,7 +19,7 @@ class Kern(Parameterized): Do not instantiate. """ - super(Kern, self).__init__(name) + super(Kern, self).__init__(name=name, *a, **kw) self.input_dim = input_dim def K(self, X, X2): diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index e8cf2e87..7822a1f6 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -9,7 +9,7 @@ from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -from ...util.caching import Cacher, cache_this +from ...util.caching import cache_this class Linear(Kern): """ diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 5fb1ca59..8aa378ce 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -36,7 +36,7 @@ class BayesianGPLVM(SparseGP, GPLVM): assert Z.shape[1] == X.shape[1] if kernel is None: - kernel = kern.rbf(input_dim) # + kern.white(input_dim) + kernel = kern.RBF(input_dim) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 6bf9aab1..55e546df 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -12,7 +12,7 @@ class Cacher(object): def __call__(self, *args): if self._reset_on_first: assert isinstance(args[0], Observable) - args[0].add_observer(self.reset) + args[0].add_observer(self, self.reset) cached_args = args else: cached_args = args[1:] @@ -29,21 +29,21 @@ class Cacher(object): else: if len(self.cached_inputs) == self.limit: args_ = self.cached_inputs.pop(0) - [a.remove_observer(self.on_cache_changed) for a in args_] + [a.remove_observer(self, self.on_cache_changed) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) self.cached_inputs.append(cached_args) self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - [a.add_observer(self.on_cache_changed) for a in args] + [a.add_observer(self, self.on_cache_changed) for a in args] return self.cached_outputs[-1] def on_cache_changed(self, arg): self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] def reset(self, obj): - [[a.remove_observer(self.reset) for a in args] for args in self.cached_inputs] + [[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs] self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] From 8b2f39450bffa5f6701924f0161a496829c46b65 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 21 Feb 2014 10:38:47 +0000 Subject: [PATCH 23/38] workin gon linear kernel --- GPy/core/parameterization/array_core.py | 2 +- GPy/core/parameterization/param.py | 2 +- GPy/core/parameterization/parameter_core.py | 2 +- GPy/examples/dimensionality_reduction.py | 11 +- GPy/kern/_src/linear.py | 204 +++++++------------- GPy/kern/_src/rbf.py | 2 +- 6 files changed, 83 insertions(+), 140 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index b12ca59b..642ea823 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -30,7 +30,7 @@ class ObservableArray(np.ndarray, Observable): def __new__(cls, input_array): obj = np.atleast_1d(input_array).view(cls) cls.__name__ = "ObservableArray\n " - obj._observer_callables_ = {} + obj._observer_callables_ = [] return obj def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 4c2cb469..44a27bdf 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -144,7 +144,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable return self.flat def _collect_gradient(self, target): - target[:] = self.gradient.flat + target += self.gradient.flat #=========================================================================== # Array operations -> done diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f8d83edd..d9f7c616 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -11,7 +11,7 @@ def adjust_name_for_printing(name): return '' class Observable(object): - _observer_callables_ = {} + _observer_callables_ = [] def add_observer(self, callble): self._observer_callables_.append(callble) #callble(self) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 80e77c57..3b5dcbf0 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -21,10 +21,11 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) - lengthscales = _np.random.rand(input_dim) - k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) - #+ GPy.kern.white(input_dim, 0.01) - ) + #lengthscales = _np.random.rand(input_dim) + #k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) + ##+ GPy.kern.white(input_dim, 0.01) + #) + k = GPy.kern.Linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T @@ -48,7 +49,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan # randomly obstruct data with percentage p #=========================================================================== #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) - m.lengthscales = lengthscales + #m.lengthscales = lengthscales if plot: import matplotlib.pyplot as pb diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index e8cf2e87..1454e684 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -47,12 +47,13 @@ class Linear(Kern): self.variances = Param('variances', variances, Logexp()) self.add_parameter(self.variances) - self.variances.add_observer(self, self._on_changed) + self.variances.add_observer(self._on_changed) def _on_changed(self, obj): + #TODO: move this to base class? isnt it jst for the caching? self._notify_observers() - @cache_this(limit=3, reset_on_self=True) + #@cache_this(limit=3, reset_on_self=True) def K(self, X, X2=None): if self.ARD: if X2 is None: @@ -63,7 +64,7 @@ class Linear(Kern): else: return self._dot_product(X, X2) * self.variances - @cache_this(limit=3, reset_on_self=False) + #@cache_this(limit=3, reset_on_self=False) def _dot_product(self, X, X2=None): if X2 is None: return tdot(X) @@ -73,43 +74,33 @@ class Linear(Kern): def Kdiag(self, X): return np.sum(self.variances * np.square(X), -1) - def update_gradients_full(self, dL_dK, X): - self.variances.gradient = np.zeros(self.variances.size) - self._param_grad_helper(dL_dK, X, None, self.variances.gradient) - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + target = np.zeros(self.size) + self.update_gradients_diag(dL_dKdiag, X) + self._collect_gradient(target) + self.update_gradients_full(dL_dKnm, X, Z) + self._collect_gradient(target) + self.update_gradients_full(dL_dKmm, Z, None) + self._collect_gradient(target) + return target + + def update_gradients_full(self, dL_dK, X): + if self.ARD: + if X2 is None: + self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)]) + else: + product = X[:, None, :] * X2[None, :, :] + self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0) + else: + self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK) + + def update_gradients_diag(self, dL_dKdiag, X): tmp = dL_dKdiag[:, None] * X ** 2 if self.ARD: self.variances.gradient = tmp.sum(0) else: self.variances.gradient = np.atleast_1d(tmp.sum()) - self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - self._psi_computations(Z, mu, S) - # psi0: - tmp = dL_dpsi0[:, None] * self.mu2_S - if self.ARD: self.variances.gradient[:] = tmp.sum(0) - else: self.variances.gradient[:] = tmp.sum() - #psi1 - self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient) - #psi2 - tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :]) - if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) - else: self.variances.gradient += tmp.sum() - #from Kmm - self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - - def _param_grad_helper(self, dL_dK, X, X2, target): - if self.ARD: - if X2 is None: - [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] - else: - product = X[:, None, :] * X2[None, :, :] - target += (dL_dK[:, :, None] * product).sum(0).sum(0) - else: - target += np.sum(self._dot_product(X, X2) * dL_dK) def gradients_X(self, dL_dK, X, X2=None): if X2 is None: @@ -119,12 +110,37 @@ class Linear(Kern): def gradients_X_diag(self, dL_dKdiag, X): return 2.*self.variances*dL_dKdiag[:,None]*X - + #---------------------------------------# # PSI statistics # # variational # #---------------------------------------# + def psi0(self, Z, mu, S): + return np.sum(self.variances * self._mu2S(mu, S), 1) + + def psi1(self, Z, mu, S): + return self.K(mu, Z) #the variance, it does nothing + + def psi2(self, Z, mu, S): + ZA = Z * self.variances + ZAinner = self._ZAinner(mu, S, Z) + return np.dot(ZAinner, ZA.T) + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + # psi0: + tmp = dL_dpsi0[:, None] * self._mu2S(mu, S) + if self.ARD: self.variances.gradient[:] = tmp.sum(0) + else: self.variances.gradient[:] = tmp.sum() + #psi1 + self.variances.gradient += self._param_grad_helper(dL_dpsi1, mu, Z) + #psi2 + tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(mu, S, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) + if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) + else: self.variances.gradient += tmp.sum() + #from Kmm + self.variances.gradient += self._param_grad_helper(dL_dKmm, Z, None) + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): # Kmm grad = self.gradients_X(dL_dKmm, Z, None) @@ -135,76 +151,30 @@ class Linear(Kern): return grad def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - target_mu, target_S = np.zeros(mu.shape), np.zeros(mu.shape) + grad_mu, grad_S = np.zeros(mu.shape), np.zeros(mu.shape) # psi0 - target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) - target_S += dL_dpsi0[:, None] * self.variances + grad_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) + grad_S += dL_dpsi0[:, None] * self.variances # psi1 - target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) + grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) # psi2 - self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, target_mu, target_S) - - return target_mu, target_S - - def psi0(self, Z, mu, S): - self._psi_computations(Z, mu, S) - return np.sum(self.variances * self.mu2_S, 1) + self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, grad_mu, grad_S) - def psi1(self, Z, mu, S): - """the variance, it does nothing""" - self._psi1 = self.K(mu, Z) - return self._psi1 + return grad_mu, grad_S - def psi2(self, Z, mu, S): - self._psi_computations(Z, mu, S) - return self._psi2 - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) - target_S += dL_dpsi0[:, None] * self.variances + #--------------------------------------------------# + # Helpers for psi statistics # + #--------------------------------------------------# - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - """Do nothing for S, it does not affect psi1""" - self._psi_computations(Z, mu, S) - target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) - - - def psi2_new(self,Z,mu,S,target): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1) - - def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target) - result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0) - if self.ARD: - target += result.sum(0).sum(0).sum(0) - else: - target += result.sum() - - def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) - - Zs = Z*self.variances - Zs_sq = Zs[:,None,:]*Zs[None,:,:] - target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1) def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] + # Think N,num_inducing,num_inducing,input_dim + ZA = Z * self.variances + AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA_2 = AZZA/2. - #muAZZA = np.tensordot(mu,AZZA,(-1,0)) - #target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S) - #target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1) - #target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1) - #Using weave, we can exploiut the symmetry of this problem: + #Using weave, we can exploit the symmetry of this problem: code = """ int n, m, mm,q,qq; double factor,tmp; @@ -248,12 +218,8 @@ class Linear(Kern): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - #psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :] - #dummy_target = np.zeros_like(target) - #dummy_target += psi2_dZ.sum(0).sum(0) - AZA = self.variances*self.ZAinner + AZA = self.variances*self._ZAinner(mu, S, Z) code=""" int n,m,mm,q; #pragma omp parallel for private(n,mm,q) @@ -282,38 +248,14 @@ class Linear(Kern): type_converters=weave.converters.blitz,**weave_options) - #---------------------------------------# - # Precomputations # - #---------------------------------------# + def _mu2S(self, mu, S): + return np.square(mu) + S - #def _K_computations(self, X, X2): - #if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): - #self._X = X.copy() - #if X2 is None: - ##self._dot_product = tdot(param_to_array(X)) - #self._X2 = None - #else: - #self._X2 = X2.copy() - #self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + def _ZAinner(self, mu, S, Z): + ZA = Z*self.variances + inner = (mu[:, None, :] * mu[:, :, None]) + diag_indices = np.diag_indices(mu.shape[1], 2) + inner[:, diag_indices[0], diag_indices[1]] += S + + return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! - def _psi_computations(self, Z, mu, S): - # here are the "statistics" for psi1 and psi2 - Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances)) - muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S)) - if Zv_changed: - # Z has changed, compute Z specific stuff - # self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim -# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F') -# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])] - self.ZA = Z * self.variances - self._Z = Z.copy() - self._variances = self.variances.copy() - if muS_changed: - self.mu2_S = np.square(mu) + S - self.inner = (mu[:, None, :] * mu[:, :, None]) - diag_indices = np.diag_indices(mu.shape[1], 2) - self.inner[:, diag_indices[0], diag_indices[1]] += S - self._mu, self._S = mu.copy(), S.copy() - if Zv_changed or muS_changed: - self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! - self._psi2 = np.dot(self.ZAinner, self.ZA.T) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 4fc2b591..807cac32 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -54,7 +54,7 @@ class RBF(Kern): self.variance = Param('variance', variance, Logexp()) self.lengthscale = Param('lengthscale', lengthscale, Logexp()) - self.lengthscale.add_observer(self, self.update_lengthscale) + self.lengthscale.add_observer(self.update_lengthscale) self.update_lengthscale(self.lengthscale) self.add_parameters(self.variance, self.lengthscale) From 0dc9a32ba3d1b7034978930af228adb63c04d72b Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 21 Feb 2014 11:25:33 +0000 Subject: [PATCH 24/38] non-working grads in linear --- GPy/core/parameterization/param.py | 2 +- GPy/examples/dimensionality_reduction.py | 2 +- GPy/kern/_src/linear.py | 21 ++++++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 6fc58bff..ccbc76d5 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -147,7 +147,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable): target += self.gradient.flat def _set_gradient(self, g): - self.gradient = g + self.gradient = g.reshape(self._realshape_) #=========================================================================== # Array operations -> done diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 3b5dcbf0..c8e79e6c 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -25,7 +25,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan #k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) ##+ GPy.kern.white(input_dim, 0.01) #) - k = GPy.kern.Linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 2e568d81..049b26f1 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -47,7 +47,7 @@ class Linear(Kern): self.variances = Param('variances', variances, Logexp()) self.add_parameter(self.variances) - self.variances.add_observer(self._on_changed) + self.variances.add_observer(self, self._on_changed) def _on_changed(self, obj): #TODO: move this to base class? isnt it jst for the caching? @@ -82,9 +82,9 @@ class Linear(Kern): self._collect_gradient(target) self.update_gradients_full(dL_dKmm, Z, None) self._collect_gradient(target) - return target + self._set_gradient(target) - def update_gradients_full(self, dL_dK, X): + def update_gradients_full(self, dL_dK, X, X2=None): if self.ARD: if X2 is None: self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)]) @@ -130,16 +130,19 @@ class Linear(Kern): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): # psi0: tmp = dL_dpsi0[:, None] * self._mu2S(mu, S) - if self.ARD: self.variances.gradient[:] = tmp.sum(0) - else: self.variances.gradient[:] = tmp.sum() + if self.ARD: grad = tmp.sum(0) + else: grad = np.atleast_1d(tmp.sum()) #psi1 - self.variances.gradient += self._param_grad_helper(dL_dpsi1, mu, Z) + self.update_gradients_full(dL_dpsi1, mu, Z) + grad += self.variances.gradient #psi2 tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(mu, S, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) - if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) - else: self.variances.gradient += tmp.sum() + if self.ARD: grad += tmp.sum(0).sum(0).sum(0) + else: grad += tmp.sum() #from Kmm - self.variances.gradient += self._param_grad_helper(dL_dKmm, Z, None) + self.update_gradients_full(dL_dpsi1, mu, Z) + grad += self.variances.gradient + self._set_gradient(grad) def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): # Kmm From 365bc4214010bbce65c25c3023074903859f0d61 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 21 Feb 2014 12:25:36 +0000 Subject: [PATCH 25/38] added Brownian motion --- GPy/kern/__init__.py | 4 +- GPy/kern/_src/Brownian.py | 65 ------------------------ GPy/kern/_src/brownian.py | 50 ++++++++++++++++++ GPy/kern/_src/kern.py | 11 +++- GPy/kern/_src/linear.py | 10 ---- GPy/plotting/matplot_dep/models_plots.py | 8 +-- 6 files changed, 65 insertions(+), 83 deletions(-) delete mode 100644 GPy/kern/_src/Brownian.py create mode 100644 GPy/kern/_src/brownian.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 630d74da..16c13066 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,8 +2,8 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern from _src.linear import Linear -#import bias -#import Brownian +from _src.brownian import Brownian +#from _src.bias import Bias #import coregionalize #import exponential #import eq_ode1 diff --git a/GPy/kern/_src/Brownian.py b/GPy/kern/_src/Brownian.py deleted file mode 100644 index 488e9b7a..00000000 --- a/GPy/kern/_src/Brownian.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np - -def theta(x): - """Heavisdie step function""" - return np.where(x>=0.,1.,0.) - -class Brownian(Kernpart): - """ - Brownian Motion kernel. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: - :type variance: float - """ - def __init__(self,input_dim,variance=1.): - self.input_dim = input_dim - assert self.input_dim==1, "Brownian motion in 1D only" - self.num_params = 1 - self.name = 'Brownian' - self._set_params(np.array([variance]).flatten()) - - def _get_params(self): - return self.variance - - def _set_params(self,x): - assert x.shape==(1,) - self.variance = x - - def _get_param_names(self): - return ['variance'] - - def K(self,X,X2,target): - if X2 is None: - X2 = X - target += self.variance*np.fmin(X,X2.T) - - def Kdiag(self,X,target): - target += self.variance*X.flatten() - - def _param_grad_helper(self,dL_dK,X,X2,target): - if X2 is None: - X2 = X - target += np.sum(np.fmin(X,X2.T)*dL_dK) - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.dot(X.flatten(), dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - raise NotImplementedError, "TODO" - #target += self.variance - #target -= self.variance*theta(X-X2.T) - #if X.shape==X2.shape: - #if np.all(X==X2): - #np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0]) - - - def dKdiag_dX(self,dL_dKdiag,X,target): - target += self.variance*dL_dKdiag[:,None] - diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py new file mode 100644 index 00000000..81b57a25 --- /dev/null +++ b/GPy/kern/_src/brownian.py @@ -0,0 +1,50 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +import numpy as np + +class Brownian(Kern): + """ + Brownian motion in 1D only. + + Negative times are treated as a separate (backwards!) Brownian motion. + + :param input_dim: the number of input dimensions + :type input_dim: int + :param variance: + :type variance: float + """ + def __init__(self, input_dim=1, variance=1., name='Brownian'): + assert input_dim==1, "Brownian motion in 1D only" + super(Brownian, self).__init__(input_dim, name) + + self.variance = Param('variance', variance, Logexp()) + self.add_parameters(self.variance) + + def K(self,X,X2=None): + if X2 is None: + X2 = X + return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.) + + def Kdiag(self,X): + return self.variance*np.abs(X.flatten()) + + def update_gradients_full(self, dL_dK, X, X2=None): + if X2 is None: + X2 = X + self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)) + + #def update_gradients_diag(self, dL_dKdiag, X): + #self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag) + + #def gradients_X(self, dL_dK, X, X2=None): + #if X2 is None: + #return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None] + #else: + #return np.sum(np.where(np.logical_and(np.abs(X) Date: Fri, 21 Feb 2014 12:29:28 +0000 Subject: [PATCH 26/38] linear and rbf fix for variational gradients in Z --- GPy/core/sparse_gp.py | 1 + .../latent_function_inference/var_dtc.py | 194 +++++++++--------- GPy/kern/_src/linear.py | 10 +- GPy/kern/_src/rbf.py | 4 +- GPy/plotting/matplot_dep/models_plots.py | 6 +- 5 files changed, 102 insertions(+), 113 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 5ab13251..61a664fe 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -58,6 +58,7 @@ class SparseGP(GP): def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) + self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) if self.has_uncertain_inputs(): self.kern.update_gradients_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 24f4a5b6..a81bb711 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -60,18 +60,88 @@ class VarDTC(object): trYYT = self.get_trYYT(Y) # do the inference: - dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \ - psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on( - kern, X, X_variance, Z, likelihood, - uncertain_inputs, output_dim, - beta, VVT_factor, trYYT) + het_noise = beta.size < 1 + num_inducing = Z.shape[0] + num_data = X.shape[0] + # kernel computations, using BGPLVM notation + Kmm = kern.K(Z) + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + + Lm = jitchol(Kmm) + + # The rather complex computations of A + if uncertain_inputs: + if het_noise: + psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: + psi2_beta = psi2.sum(0) * beta + #if 0: + # evals, evecs = linalg.eigh(psi2_beta) + # clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable + # if not np.array_equal(evals, clipped_evals): + # pass # print evals + # tmp = evecs * np.sqrt(clipped_evals) + # tmp = tmp.T + # no backsubstitution because of bound explosion on tr(A) if not... + LmInv = dtrtri(Lm) + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() - likelihood.update_gradients(partial_for_likelihood) + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + psi1Vf = np.dot(psi1.T, VVT_factor) + # back substutue C into psi1Vf + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + + # data fit and derivative of L w.r.t. Kmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + # Compute dL_dKmm + dL_dKmm = backsub_both_sides(Lm, delit) + + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + + # log marginal likelihood + log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit) + + #put the gradients in the right places + partial_for_likelihood = _compute_partial_for_likelihood(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT) + + #likelihood.update_gradients(partial_for_likelihood) if uncertain_inputs: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'partial_for_likelihood':partial_for_likelihood} else: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'partial_for_likelihood':partial_for_likelihood} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? @@ -184,9 +254,10 @@ class VarDTCMissingData(object): LB = jitchol(B) psi1Vf = psi1.T.dot(VVT_factor) - _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - - #LB_all[ind, :,:] = LB + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) @@ -233,16 +304,19 @@ class VarDTCMissingData(object): from ...util import diag diag.add(Bi, 1) woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] - - # gradients: - likelihood.update_gradients(partial_for_likelihood) + # gradients: if uncertain_inputs: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all} - kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'partial_for_likelihood':partial_for_likelihood} else: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all} - kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'partial_for_likelihood':partial_for_likelihood} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? @@ -266,33 +340,6 @@ class VarDTCMissingData(object): return post, log_marginal, grad_dict -def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm): -# The rather complex computations of A - if uncertain_inputs: - if het_noise: - psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) - else: - psi2_beta = psi2.sum(0) * beta - #if 0: - # evals, evecs = linalg.eigh(psi2_beta) - # clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable - # if not np.array_equal(evals, clipped_evals): - # pass # print evals - # tmp = evecs * np.sqrt(clipped_evals) - # tmp = tmp.T - # no backsubstitution because of bound explosion on tr(A) if not... - LmInv = dtrtri(Lm) - A = LmInv.dot(psi2_beta.dot(LmInv.T)) - else: - if het_noise: - tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) - else: - tmp = psi1 * (np.sqrt(beta)) - tmp, _ = dtrtrs(Lm, tmp.T, lower=1) - A = tdot(tmp) #print A.sum() - return A - - def _compute_psi(kern, X, X_variance, Z, uncertain_inputs): if uncertain_inputs: psi0 = kern.psi0(Z, X, X_variance) @@ -304,22 +351,6 @@ def _compute_psi(kern, X, X_variance, Z, uncertain_inputs): psi2 = None return psi0, psi1, psi2 -def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs): - Kmm = kern.K(Z) - psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) - return Kmm, psi0, psi1, psi2 - -def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf): - # Compute dL_dKmm - delit = tdot(_LBi_Lmi_psi1Vf) - data_fit = np.trace(delit) - DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) - delit = -0.5 * DBi_plus_BiPBi - delit += -0.5 * B * output_dim - delit += output_dim * np.eye(num_inducing) - dL_dKmm = backsub_both_sides(Lm, delit) - return DBi_plus_BiPBi, data_fit, dL_dKmm - def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) @@ -343,15 +374,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C return dL_dpsi0, dL_dpsi1, dL_dpsi2 -def _compute_psi1Vf(Lm, LB, psi1Vf): - # back substutue C into psi1Vf - tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) - _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) - tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) - Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - return _LBi_Lmi_psi1Vf, Cpsi1Vf - - def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT): # the partial derivative vector for the likelihood if likelihood.size == 0: @@ -393,35 +415,3 @@ def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het lik_4 = 0.5 * data_fit log_marginal = lik_1 + lik_2 + lik_3 + lik_4 return log_marginal - -def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT): - het_noise = beta.size < 1 - num_inducing = Z.shape[0] - num_data = X.shape[0] - # kernel computations, using BGPLVM notation - Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs) - #factor Kmm # TODO: cache? - Lm = jitchol(Kmm) - A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm) - # factor B - B = np.eye(num_inducing) + A - LB = jitchol(B) - psi1Vf = np.dot(psi1.T, VVT_factor) - _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - # data fit and derivative of L w.r.t. Kmm - DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim, - Lm, B, LB, _LBi_Lmi_psi1Vf) - # derivatives of L w.r.t. psi - dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, - VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, - psi1, het_noise, uncertain_inputs) - # log marginal likelihood - log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, - psi0, A, LB, trYYT, data_fit) - #put the gradients in the right places - partial_for_likelihood = _compute_partial_for_likelihood(likelihood, - het_noise, uncertain_inputs, LB, - _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, - psi0, psi1, beta, - data_fit, num_data, output_dim, trYYT) - return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 049b26f1..312440b8 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -140,9 +140,8 @@ class Linear(Kern): if self.ARD: grad += tmp.sum(0).sum(0).sum(0) else: grad += tmp.sum() #from Kmm - self.update_gradients_full(dL_dpsi1, mu, Z) - grad += self.variances.gradient - self._set_gradient(grad) + self.update_gradients_full(dL_dKmm, Z, None) + self.variances.gradient += grad def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): # Kmm @@ -221,7 +220,6 @@ class Linear(Kern): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - AZA = self.variances*self._ZAinner(mu, S, Z) code=""" int n,m,mm,q; @@ -230,7 +228,7 @@ class Linear(Kern): for(q=0;q Date: Fri, 21 Feb 2014 17:06:06 +0000 Subject: [PATCH 27/38] removed materns --- GPy/kern/__init__.py | 1 + GPy/kern/_src/Matern32.py | 139 ---------------------- GPy/kern/_src/Matern52.py | 145 ----------------------- GPy/kern/_src/exponential.py | 129 -------------------- GPy/kern/_src/stationary.py | 221 +++++++++++++++++++++++++++++++++++ GPy/util/__init__.py | 1 + GPy/util/diag.py | 40 ++++--- 7 files changed, 246 insertions(+), 430 deletions(-) delete mode 100644 GPy/kern/_src/Matern32.py delete mode 100644 GPy/kern/_src/Matern52.py delete mode 100644 GPy/kern/_src/exponential.py create mode 100644 GPy/kern/_src/stationary.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 16c13066..e5dc6d35 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -3,6 +3,7 @@ from _src.white import White from _src.kern import Kern from _src.linear import Linear from _src.brownian import Brownian +from _src.stationary import Exponential, Matern32, Matern52, ExpQuad #from _src.bias import Bias #import coregionalize #import exponential diff --git a/GPy/kern/_src/Matern32.py b/GPy/kern/_src/Matern32.py deleted file mode 100644 index 08fa452c..00000000 --- a/GPy/kern/_src/Matern32.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -from scipy import integrate - -class Matern32(Kernpart): - """ - Matern 3/2 kernel: - - .. math:: - - k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - - """ - - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if ARD == False: - self.num_params = 2 - self.name = 'Mat32' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - self.name = 'Mat32' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance, lengthscale.flatten()))) - - def _get_params(self): - """return the value of the parameters.""" - return np.hstack((self.variance, self.lengthscale)) - - def _set_params(self, x): - """set the value of the parameters.""" - assert x.size == self.num_params - self.variance = x[0] - self.lengthscale = x[1:] - - def _get_param_names(self): - """return parameter names.""" - if self.num_params == 2: - return ['variance', 'lengthscale'] - else: - return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - - def K(self, X, X2, target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target) - - def Kdiag(self, X, target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target, self.variance, target) - - def _param_grad_helper(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) - invdist = 1. / np.where(dist != 0., dist, np.inf) - dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 - # dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar * dL_dK) - if self.ARD == True: - dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis] - # dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) - else: - dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist - # dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl * dL_dK) - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - - else: - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - def Gram_matrix(self, F, F1, F2, lower, upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param F2: vector of second derivatives of F - :type F2: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x, i): - return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) - n = F.shape[0] - G = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] - Flower = np.array([f(lower) for f in F])[:, None] - F1lower = np.array([f(lower) for f in F1])[:, None] - # print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" - # return(G) - return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) diff --git a/GPy/kern/_src/Matern52.py b/GPy/kern/_src/Matern52.py deleted file mode 100644 index 7d36254c..00000000 --- a/GPy/kern/_src/Matern52.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -import hashlib -from scipy import integrate - -class Matern52(Kernpart): - """ - Matern 5/2 kernel: - - .. math:: - - k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - - """ - def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if ARD == False: - self.num_params = 2 - self.name = 'Mat52' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - self.name = 'Mat52' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance,lengthscale.flatten()))) - - def _get_params(self): - """return the value of the parameters.""" - return np.hstack((self.variance,self.lengthscale)) - - def _set_params(self,x): - """set the value of the parameters.""" - assert x.size == self.num_params - self.variance = x[0] - self.lengthscale = x[1:] - - def _get_param_names(self): - """return parameter names.""" - if self.num_params == 2: - return ['variance','lengthscale'] - else: - return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] - - def K(self,X,X2,target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target) - - def Kdiag(self,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target,self.variance,target) - - def _param_grad_helper(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - invdist = 1./np.where(dist!=0.,dist,np.inf) - dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 - dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*dL_dK) - if self.ARD: - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) - else: - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist - target[1] += np.sum(dl*dL_dK) - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - else: - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) - target += np.sum(gradients_X*dL_dK.T[:,:,None],0) - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass - - def Gram_matrix(self,F,F1,F2,F3,lower,upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param F2: vector of second derivatives of F - :type F2: np.array - :param F3: vector of third derivatives of F - :type F3: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x,i): - return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x)) - n = F.shape[0] - G = np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] - G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5)) - Flower = np.array([f(lower) for f in F])[:,None] - F1lower = np.array([f(lower) for f in F1])[:,None] - F2lower = np.array([f(lower) for f in F2])[:,None] - orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T) - orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) - return(1./self.variance* (G_coef*G + orig + orig2)) - - - diff --git a/GPy/kern/_src/exponential.py b/GPy/kern/_src/exponential.py deleted file mode 100644 index 372d4d9b..00000000 --- a/GPy/kern/_src/exponential.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -from scipy import integrate - -class Exponential(Kernpart): - """ - Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2) - - .. math:: - - k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :param name: the name of the kernel - :rtype: kernel object - - """ - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='exp'): - self.input_dim = input_dim - self.ARD = ARD - self.variance = variance - self.name = name - if ARD == False: - self.num_params = 2 - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - #self._set_params(np.hstack((variance, lengthscale.flatten()))) - self.set_as_parameter('variance', 'lengthscale') - -# def _get_params(self): -# """return the value of the parameters.""" -# return np.hstack((self.variance, self.lengthscale)) -# -# def _set_params(self, x): -# """set the value of the parameters.""" -# assert x.size == self.num_params -# self.variance = x[0] -# self.lengthscale = x[1:] -# -# def _get_param_names(self): -# """return parameter names.""" -# if self.num_params == 2: -# return ['variance', 'lengthscale'] -# else: -# return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - - def K(self, X, X2, target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - np.add(self.variance * np.exp(-dist), target, target) - - def Kdiag(self, X, target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target, self.variance, target) - - def _param_grad_helper(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - invdist = 1. / np.where(dist != 0., dist, np.inf) - dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 - dvar = np.exp(-dist) - target[0] += np.sum(dvar * dL_dK) - if self.ARD == True: - dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None] - target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) - else: - dl = self.variance * dvar * dist2M.sum(-1) * invdist - target[1] += np.sum(dl * dL_dK) - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - # NB: derivative of diagonal elements wrt lengthscale is 0 - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - def Gram_matrix(self, F, F1, lower, upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x, i): - return(1. / self.lengthscale * F[i](x) + F1[i](x)) - n = F.shape[0] - G = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] - Flower = np.array([f(lower) for f in F])[:, None] - return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T)) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py new file mode 100644 index 00000000..aaa534ac --- /dev/null +++ b/GPy/kern/_src/stationary.py @@ -0,0 +1,221 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +from ... import util +import numpy as np +from scipy import integrate + +class Stationary(Kern): + def __init__(self, input_dim, variance, lengthscale, ARD, name): + super(Stationary, self).__init__(input_dim, name) + self.ARD = ARD + if not ARD: + if lengthscale is None: + lengthscale = np.ones(1) + else: + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel" + else: + if lengthscale is not None: + lengthscale = np.asarray(lengthscale) + assert lengthscale.size in [1, input_dim], "Bad lengthscales" + if lengthscale.size != input_dim: + lengthscale = np.ones(input_dim)*lengthscale + else: + lengthscale = np.ones(self.input_dim) + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) + self.variance = Param('variance', variance, Logexp()) + assert self.variance.size==1 + self.add_parameters(self.variance, self.lengthscale) + + def _dist(self, X, X2): + if X2 is None: + X2 = X + return X[:, None, :] - X2[None, :, :] + + def _scaled_dist(self, X, X2=None): + return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1)) + + def Kdiag(self, X): + ret = np.empty(X.shape[0]) + ret[:] = self.variance + return ret + + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = np.sum(dL_dKdiag) + self.lengthscale.gradient = 0. + + def gradients_X_diag(self, dL_dKdiag, X): + return np.zeros(X.shape) + + def update_gradients_full(self, dL_dK, X, X2=None): + K = self.K(X, X2) + self.variance.gradient = np.sum(K * dL_dK)/self.variance + + rinv = self._inv_dist(X, X2) + dL_dr = self.dK_dr(X, X2) * dL_dK + x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 + + if self.ARD: + self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0) + else: + self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() + + def _inv_dist(self, X, X2=None): + dist = self._scaled_dist(X, X2) + if X2 is None: + nondiag = util.diag.offdiag_view(dist) + nondiag[:] = 1./nondiag + return dist + else: + return 1./np.where(dist != 0., dist, np.inf) + + def gradients_X(self, dL_dK, X, X2=None): + dL_dr = self.dK_dr(X, X2) * dL_dK + invdist = self._inv_dist(X, X2) + ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 + if X2 is None: + ret *= 2. + return ret + + + + +class Exponential(Stationary): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): + super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + dist = self._scaled_dist(X, X2) + return self.variance * np.exp(-0.5 * dist) + + def dK_dr(self, X, X2): + return -0.5*self.K(X, X2) + +class Matern32(Stationary): + """ + Matern 3/2 kernel: + + .. math:: + + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } + + :param input_dim: the number of input dimensions + :type input_dim: int + :param variance: the variance :math:`\sigma^2` + :type variance: float + :param lengthscale: the vector of lengthscale :math:`\ell_i` + :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) + :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. + :type ARD: Boolean + :rtype: kernel object + + """ + + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'): + super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + dist = self._scaled_dist(X, X2) + return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) + + def dK_dr(self, X, X2): + dist = self._scaled_dist(X, X2) + return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist) + + def Gram_matrix(self, F, F1, F2, lower, upper): + """ + Return the Gram matrix of the vector of functions F with respect to the + RKHS norm. The use of this function is limited to input_dim=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.input_dim == 1 + def L(x, i): + return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) + n = F.shape[0] + G = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] + Flower = np.array([f(lower) for f in F])[:, None] + F1lower = np.array([f(lower) for f in F1])[:, None] + return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) + + +class Matern52(Stationary): + """ + Matern 5/2 kernel: + + .. math:: + + k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } + """ + + def K(self, X, X2=None): + r = self._scaled_dist(X, X2) + return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) + + def dK_dr(self, X, X2): + r = self._scaled_dist(X, X2) + return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r) + + def Gram_matrix(self,F,F1,F2,F3,lower,upper): + """ + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param F3: vector of third derivatives of F + :type F3: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.input_dim == 1 + def L(x,i): + return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x)) + n = F.shape[0] + G = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] + G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5)) + Flower = np.array([f(lower) for f in F])[:,None] + F1lower = np.array([f(lower) for f in F1])[:,None] + F2lower = np.array([f(lower) for f in F2])[:,None] + orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T) + orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) + return(1./self.variance* (G_coef*G + orig + orig2)) + + + + +class ExpQuad(Stationary): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'): + super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + r = self._scaled_dist(X, X2) + return self.variance * np.exp(-0.5 * r**2) + + def dK_dr(self, X, X2): + dist = self._scaled_dist(X, X2) + return -dist*self.K(X, X2) + + + diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index c10fea4c..f93bb0ec 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -12,6 +12,7 @@ import decorators import classification import subarray_and_sorting import caching +import diag try: import sympy diff --git a/GPy/util/diag.py b/GPy/util/diag.py index 3d6b4dc9..3044ed54 100644 --- a/GPy/util/diag.py +++ b/GPy/util/diag.py @@ -11,14 +11,14 @@ import numpy as np def view(A, offset=0): """ Get a view on the diagonal elements of a 2D array. - - This is actually a view (!) on the diagonal of the array, so you can + + This is actually a view (!) on the diagonal of the array, so you can in-place adjust the view. - + :param :class:`ndarray` A: 2 dimensional numpy array :param int offset: view offset to give back (negative entries allowed) :rtype: :class:`ndarray` view of diag(A) - + >>> import numpy as np >>> X = np.arange(9).reshape(3,3) >>> view(X) @@ -36,7 +36,7 @@ def view(A, offset=0): """ from numpy.lib.stride_tricks import as_strided assert A.ndim == 2, "only implemented for 2 dimensions" - assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!" + assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!" if offset > 0: return as_strided(A[0, offset:], shape=(A.shape[0] - offset, ), strides=((A.shape[0]+1)*A.itemsize, )) elif offset < 0: @@ -44,6 +44,12 @@ def view(A, offset=0): else: return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, )) +def offdiag_view(A, offset=0): + from numpy.lib.stride_tricks import as_strided + assert A.ndim == 2, "only implemented for 2 dimensions" + Af = as_strided(A, shape=(A.size,), strides=(A.itemsize,)) + return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1])) + def _diag_ufunc(A,b,offset,func): dA = view(A, offset); func(dA,b,dA) return A @@ -51,11 +57,11 @@ def _diag_ufunc(A,b,offset,func): def times(A, b, offset=0): """ Times the view of A with b in place (!). - Returns modified A + Returns modified A Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -67,11 +73,11 @@ multiply = times def divide(A, b, offset=0): """ Divide the view of A by b in place (!). - Returns modified A + Returns modified A Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -84,9 +90,9 @@ def add(A, b, offset=0): Add b to the view of A in place (!). Returns modified A. Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -99,16 +105,16 @@ def subtract(A, b, offset=0): Subtract b from the view of A in place (!). Returns modified A. Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. :rtype: view of A, which is adjusted inplace """ return _diag_ufunc(A, b, offset, np.subtract) - + if __name__ == '__main__': import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From fddc663f286e94feef218c74a4f555903e097bee Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 21 Feb 2014 17:32:40 +0000 Subject: [PATCH 28/38] working on coregionalize --- GPy/kern/_src/coregionalize.py | 91 ++++++++++++++++------------------ 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index 69fc27ef..0d99ce21 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -5,6 +5,7 @@ from kern import Kern import numpy as np from scipy import weave from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class Coregionalize(Kern): """ @@ -20,7 +21,7 @@ class Coregionalize(Kern): k_2(x, y)=\mathbf{B} k(x, y) it is obtained as the tensor product between a covariance function - k(x,y) and B. + k(x, y) and B. :param output_dim: number of outputs to coregionalize :type output_dim: int @@ -29,7 +30,7 @@ class Coregionalize(Kern): :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B :type W: numpy array of dimensionality (num_outpus, W_columns) :param kappa: a vector which allows the outputs to behave independently - :type kappa: numpy array of dimensionality (output_dim,) + :type kappa: numpy array of dimensionality (output_dim, ) .. note: see coregionalization examples in GPy.examples.regression for some usage. """ @@ -37,18 +38,18 @@ class Coregionalize(Kern): super(Coregionalize, self).__init__(input_dim=1, name=name) self.output_dim = output_dim self.rank = rank - if self.rank>output_dim-1: + if self.rank>output_dim: print("Warning: Unusual choice of rank, it should normally be less than the output_dim.") if W is None: - W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) + W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) else: - assert W.shape==(self.output_dim,self.rank) - self.W = Param('W',W) + assert W.shape==(self.output_dim, self.rank) + self.W = Param('W', W) if kappa is None: kappa = 0.5*np.ones(self.output_dim) else: - assert kappa.shape==(self.output_dim,) - self.kappa = Param('kappa', kappa) + assert kappa.shape==(self.output_dim, ) + self.kappa = Param('kappa', kappa, Logexp()) self.add_parameters(self.W, self.kappa) self.parameters_changed() @@ -56,54 +57,58 @@ class Coregionalize(Kern): def parameters_changed(self): self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) - def K(self,index,index2,target): - index = np.asarray(index,dtype=np.int) + def K(self, X, X2=None): + index = np.asarray(X, dtype=np.int) #here's the old code (numpy) #if index2 is None: #index2 = index #else: - #index2 = np.asarray(index2,dtype=np.int) + #index2 = np.asarray(index2, dtype=np.int) #false_target = target.copy() - #ii,jj = np.meshgrid(index,index2) - #ii,jj = ii.T, jj.T - #false_target += self.B[ii,jj] + #ii, jj = np.meshgrid(index, index2) + #ii, jj = ii.T, jj.T + #false_target += self.B[ii, jj] - if index2 is None: + + if X2 is None: + target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64) code=""" for(int i=0;i Date: Fri, 21 Feb 2014 17:39:02 +0000 Subject: [PATCH 29/38] tidying --- GPy/kern/_src/coregionalize.py | 4 ++++ GPy/kern/_src/stationary.py | 16 +++------------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index 0d99ce21..74cd2a1d 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -135,3 +135,7 @@ class Coregionalize(Kern): def gradients_X(self, dL_dK, X, X2=None): return np.zeros(X.shape) + + def gradients_X_diag(self, dL_dKdiag, X): + return np.zeros(X.shape) + diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index aaa534ac..7cc2e695 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -49,9 +49,6 @@ class Stationary(Kern): self.variance.gradient = np.sum(dL_dKdiag) self.lengthscale.gradient = 0. - def gradients_X_diag(self, dL_dKdiag, X): - return np.zeros(X.shape) - def update_gradients_full(self, dL_dK, X, X2=None): K = self.K(X, X2) self.variance.gradient = np.sum(K * dL_dK)/self.variance @@ -82,6 +79,9 @@ class Stationary(Kern): ret *= 2. return ret + def gradients_X_diag(self, dL_dKdiag, X): + return np.zeros(X.shape) + @@ -104,16 +104,6 @@ class Matern32(Stationary): k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - """ def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'): From 659643038fe0c6937c69e48cf12c4efd32e41edf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 21 Feb 2014 17:53:44 +0000 Subject: [PATCH 30/38] parameterized now supports deleting of parameters --- GPy/core/model.py | 17 ++++------ GPy/core/parameterization/index_operations.py | 18 +++++++--- GPy/core/parameterization/parameter_core.py | 12 ++++--- GPy/core/parameterization/parameterized.py | 18 ++++++---- GPy/examples/dimensionality_reduction.py | 19 +++++++---- .../latent_function_inference/var_dtc.py | 10 +++--- GPy/kern/_src/kern.py | 2 +- .../matplot_dep/dim_reduction_plots.py | 20 +++++------ GPy/plotting/matplot_dep/kernel_plots.py | 34 +++++++++---------- GPy/plotting/matplot_dep/models_plots.py | 23 +++++++------ GPy/testing/index_operations_tests.py | 7 ++++ GPy/testing/parameterized_tests.py | 16 ++++----- 12 files changed, 113 insertions(+), 83 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index c067d51d..21bcf0c7 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -485,20 +485,17 @@ class Model(Parameterized): if not hasattr(self, 'kern'): raise ValueError, "this model has no kernel" - k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] - if (not len(k) == 1): - raise ValueError, "cannot determine sensitivity for this kernel" - k = k[0] - from ..kern.parts.rbf import RBF - from ..kern.parts.rbf_inv import RBFInv - from ..kern.parts.linear import Linear + k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] + from ..kern import RBF, Linear#, RBFInv + if isinstance(k, RBF): return 1. / k.lengthscale - elif isinstance(k, RBFInv): - return k.inv_lengthscale + #elif isinstance(k, RBFInv): + # return k.inv_lengthscale elif isinstance(k, Linear): return k.variances - + else: + raise ValueError, "cannot determine sensitivity for this kernel" def pseudo_EM(self, stop_crit=.1, **kwargs): """ diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index bfd0bf21..b5399741 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -83,11 +83,21 @@ class ParameterIndexOperations(object): def iterproperties(self): return self._properties.iterkeys() - def shift(self, start, size): + def shift_right(self, start, size): for ind in self.iterindices(): toshift = ind>=start - if toshift.size > 0: - ind[toshift] += size + ind[toshift] += size + + def shift_left(self, start, size): + for v, ind in self.items(): + todelete = (ind>=start) * (ind=start + if toshift.size != 0: + ind[toshift] -= size + if ind.size != 0: self._properties[v] = ind + else: del self._properties[v] def clear(self): self._properties.clear() @@ -183,7 +193,7 @@ class ParameterIndexOperationsView(object): yield i - def shift(self, start, size): + def shift_right(self, start, size): raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations' diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 45b57eab..c2c8a05a 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -390,6 +390,7 @@ class Parameterizable(Constrainable): import copy from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView from .array_core import ParamList + dc = dict() for k, v in self.__dict__.iteritems(): if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): @@ -399,18 +400,21 @@ class Parameterizable(Constrainable): dc[k] = copy.deepcopy(v) if k == '_parameters_': params = [p.copy() for p in v] - # dc = copy.deepcopy(self.__dict__) + dc['_direct_parent_'] = None dc['_parent_index_'] = None dc['_parameters_'] = ParamList() + dc['constraints'].clear() + dc['priors'].clear() + dc['size'] = 0 + s = self.__new__(self.__class__) s.__dict__ = dc - # import ipdb;ipdb.set_trace() + for p in params: s.add_parameter(p) - # dc._notify_parent_change() + return s - # return copy.deepcopy(self) def _notify_parameters_changed(self): self.parameters_changed() diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 177cc217..d463ed43 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -87,8 +87,8 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable): self._parameters_.append(param) else: start = sum(p.size for p in self._parameters_[:index]) - self.constraints.shift(start, param.size) - self.priors.shift(start, param.size) + self.constraints.shift_right(start, param.size) + self.priors.shift_right(start, param.size) self.constraints.update(param.constraints, start) self.priors.update(param.priors, start) self._parameters_.insert(index, param) @@ -113,15 +113,19 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable): """ if not param in self._parameters_: raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) - del self._parameters_[param._parent_index_] + + start = sum([p.size for p in self._parameters_[:param._parent_index_]]) + self._remove_parameter_name(param) self.size -= param.size + del self._parameters_[param._parent_index_] param._disconnect_parent() - self._remove_parameter_name(param) - - #self._notify_parent_change() + self.constraints.shift_left(start, param.size) self._connect_fixes() - + self._connect_parameters() + self._notify_parent_change() + + def _connect_parameters(self): # connect parameterlist to this parameterized object # This just sets up the right connection for the params objects diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index c8e79e6c..3ba54d34 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -74,7 +74,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True): data = GPy.util.datasets.oil_100() Y = data['X'] # create simple GP model - kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6) + kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) if optimize: m.optimize('scg', messages=verbose) @@ -190,17 +190,22 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): _np.random.seed(1234) x = _np.linspace(0, 4 * _np.pi, N)[:, None] - s1 = _np.vectorize(lambda x: _np.sin(x)) + s1 = _np.vectorize(lambda x: -_np.sin(x)) s2 = _np.vectorize(lambda x: _np.cos(x)) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) - sS = _np.vectorize(lambda x: _np.sin(2 * x)) + sS = _np.vectorize(lambda x: x*_np.sin(x)) s1 = s1(x) s2 = s2(x) s3 = s3(x) sS = sS(x) - S1 = _np.hstack([s1, sS]) + s1 -= s1.mean(); s1 /= s1.std(0) + s2 -= s2.mean(); s2 /= s2.std(0) + s3 -= s3.mean(); s3 /= s3.std(0) + sS -= sS.mean(); sS /= sS.std(0) + + S1 = _np.hstack([s1, s2, sS]) S2 = _np.hstack([s2, s3, sS]) S3 = _np.hstack([s3, sS]) @@ -271,7 +276,7 @@ def bgplvm_simulation(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) if optimize: @@ -291,10 +296,10 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, from GPy.models import BayesianGPLVM from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData - D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 + D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index a81bb711..5e88569c 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -308,14 +308,14 @@ class VarDTCMissingData(object): # gradients: if uncertain_inputs: grad_dict = {'dL_dKmm': dL_dKmm, - 'dL_dpsi0':dL_dpsi0, - 'dL_dpsi1':dL_dpsi1, - 'dL_dpsi2':dL_dpsi2, + 'dL_dpsi0':dL_dpsi0_all, + 'dL_dpsi1':dL_dpsi1_all, + 'dL_dpsi2':dL_dpsi2_all, 'partial_for_likelihood':partial_for_likelihood} else: grad_dict = {'dL_dKmm': dL_dKmm, - 'dL_dKdiag':dL_dpsi0, - 'dL_dKnm':dL_dpsi1, + 'dL_dKdiag':dL_dpsi0_all, + 'dL_dKnm':dL_dpsi1_all, 'partial_for_likelihood':partial_for_likelihood} #get sufficient things for posterior prediction diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b3ee57cd..f436d322 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -67,7 +67,7 @@ class Kern(Parameterized): See GPy.plotting.matplot_dep.plot_ARD """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots + from ...plotting.matplot_dep import kernel_plots return kernel_plots.plot_ARD(self,*args) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 74292c05..3f4ea9b0 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -1,8 +1,8 @@ import pylab as pb import numpy as np -from ... import util from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController -from GPy.util.misc import param_to_array +from ...util.misc import param_to_array +from .base_plots import x_frame2D import itertools import Tango from matplotlib.cm import get_cmap @@ -37,7 +37,7 @@ def plot_latent(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - util.plot.Tango.reset() + Tango.reset() if labels is None: labels = np.ones(model.num_data) @@ -46,7 +46,7 @@ def plot_latent(model, labels=None, which_indices=None, X = param_to_array(model.X) # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(X[:, [input_1, input_2]], resolution=resolution) + Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) def plot_function(x): @@ -87,7 +87,7 @@ def plot_latent(model, labels=None, which_indices=None, else: x = X[index, input_1] y = X[index, input_2] - ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) + ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) @@ -120,7 +120,7 @@ def plot_magnification(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - util.plot.Tango.reset() + Tango.reset() if labels is None: labels = np.ones(model.num_data) @@ -128,7 +128,7 @@ def plot_magnification(model, labels=None, which_indices=None, input_1, input_2 = most_significant_input_dimensions(model, which_indices) # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) + Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) def plot_function(x): @@ -165,7 +165,7 @@ def plot_magnification(model, labels=None, which_indices=None, else: x = model.X[index, input_1] y = model.X[index, input_2] - ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) + ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) @@ -205,7 +205,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, return dmu_dX[indices, argmax], np.array(labels)[argmax] if ax is None: - fig = pyplot.figure(num=fignum) + fig = pb.figure(num=fignum) ax = fig.add_subplot(111) if data_labels is None: @@ -241,7 +241,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, ax.legend() ax.figure.tight_layout() if updates: - pyplot.show() + pb.show() clear = raw_input('Enter to continue') if clear.lower() in 'yes' or clear == '': controller.deactivate() diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 30157294..3436c4ff 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -1,7 +1,6 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import sys import numpy as np import pylab as pb import Tango @@ -29,22 +28,23 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): xticklabels = [] bars = [] x0 = 0 - for p in kernel._parameters_: - c = Tango.nextMedium() - if hasattr(p, 'ARD') and p.ARD: - if title is None: - ax.set_title('ARD parameters, %s kernel' % p.name) - else: - ax.set_title(title) - if isinstance(p, Linear): - ard_params = p.variances - else: - ard_params = 1. / p.lengthscale - - x = np.arange(x0, x0 + len(ard_params)) - bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) - xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) - x0 += len(ard_params) + #for p in kernel._parameters_: + p = kernel + c = Tango.nextMedium() + if hasattr(p, 'ARD') and p.ARD: + if title is None: + ax.set_title('ARD parameters, %s kernel' % p.name) + else: + ax.set_title(title) + if isinstance(p, Linear): + ard_params = p.variances + else: + ard_params = 1. / p.lengthscale + x = np.arange(x0, x0 + len(ard_params)) + from ...util.misc import param_to_array + bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) + xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) + x0 += len(ard_params) x = np.arange(x0) transOffset = offset_copy(ax.transData, fig=fig, x=0., y= -2., units='points') diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 47c8642e..59c32775 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -56,7 +56,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - + + X, Y = param_to_array(model.X, model.Y) + if model.has_uncertain_inputs(): X_variance = model.X_variance + #work out what the inputs are for plotting (1D or 2D) fixed_dims = np.array([i for i,v in fixed_inputs]) free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) @@ -66,7 +69,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #define the frame on which to plot resolution = resolution or 200 - Xnew, xmin, xmax = x_frame1D(model.X[:,free_dims], plot_limits=plot_limits) + Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits) Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid[:,free_dims] = Xnew for i,v in fixed_inputs: @@ -77,13 +80,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', m, v = model._raw_predict(Xgrid) lower = m - 2*np.sqrt(v) upper = m + 2*np.sqrt(v) - Y = model.Y + Y = Y else: m, v, lower, upper = model.predict(Xgrid) - Y = model.Y + Y = Y for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(model.X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) + ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -95,8 +98,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add error bars for uncertain (if input uncertainty is being modelled) if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): - ax.errorbar(model.X[which_data_rows, free_dims], model.Y[which_data_rows, which_data_ycols], - xerr=2 * np.sqrt(model.X_variance[which_data_rows, free_dims]), + ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), + xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), ecolor='k', fmt=None, elinewidth=.5, alpha=.5) @@ -120,7 +123,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #define the frame for plotting on resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(model.X[:,free_dims], plot_limits, resolution) + Xnew, _, _, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution) Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid[:,free_dims] = Xnew for i,v in fixed_inputs: @@ -130,14 +133,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: m, _ = model._raw_predict(Xgrid) - Y = model.Y + Y = Y else: m, _, _, _ = model.predict(Xgrid) Y = model.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(model.X[which_data_rows, free_dims[0]], model.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index d5ef7007..171db5cc 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -24,6 +24,13 @@ class Test(unittest.TestCase): self.param_index.remove(one, [1]) self.assertListEqual(self.param_index[one].tolist(), [3]) + def test_shift_left(self): + self.param_index.shift_left(1, 2) + self.assertListEqual(self.param_index[three].tolist(), [2,5]) + self.assertListEqual(self.param_index[two].tolist(), [0,3]) + self.assertListEqual(self.param_index[one].tolist(), [1]) + + def test_index_view(self): #======================================================================= # 0 1 2 3 4 5 6 7 8 9 diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index ff57606a..6f13d294 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -10,8 +10,8 @@ import numpy as np class Test(unittest.TestCase): def setUp(self): - self.rbf = GPy.kern.rbf(1) - self.white = GPy.kern.white(1) + self.rbf = GPy.kern.RBF(1) + self.white = GPy.kern.White(1) from GPy.core.parameterization import Param from GPy.core.parameterization.transformations import Logistic self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) @@ -39,14 +39,13 @@ class Test(unittest.TestCase): def test_remove_parameter(self): - from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__ + from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp self.white.fix() self.test1.remove_parameter(self.white) self.assertIs(self.test1._fixes_,None) self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) - self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops) - self.assertEquals(self.white.white.constraints._offset, 0) + self.assertEquals(self.white.constraints._offset, 0) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) @@ -57,18 +56,19 @@ class Test(unittest.TestCase): self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0]) self.assertIs(self.white._fixes_,None) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52) + self.test1.remove_parameter(self.white) self.assertIs(self.test1._fixes_,None) self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) - self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) - self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) + self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) + self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) def test_add_parameter_already_in_hirarchy(self): self.test1.add_parameter(self.white._parameters_[0]) def test_default_constraints(self): - self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) + self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) from GPy.core.parameterization.transformations import Logexp From 99c6a2095fa603f7842efa2a1f0e6e8d25354dd0 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Fri, 21 Feb 2014 17:56:37 +0000 Subject: [PATCH 31/38] adapt the new interface of the variational posterior distribution. --- GPy/core/parameterization/variational.py | 26 ++ GPy/core/sparse_gp.py | 9 +- .../latent_function_inference/var_dtc.py | 61 +-- GPy/kern/_src/kern.py | 12 +- GPy/kern/_src/rbf.py | 23 +- GPy/kern/_src/ss_rbf.py | 352 ------------------ GPy/models/bayesian_gplvm.py | 2 +- 7 files changed, 96 insertions(+), 389 deletions(-) delete mode 100644 GPy/kern/_src/ss_rbf.py diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index a7b26a80..5fe63052 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -29,3 +29,29 @@ class Normal(Parameterized): assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) + + +class SpikeAndSlab(Parameterized): + ''' + The SpikeAndSlab distribution for variational approximations. + ''' + def __init__(self, means, variances, binary_prob, name='latent space'): + """ + binary_prob : the probability of the distribution on the slab part. + """ + Parameterized.__init__(self, name=name) + self.mean = Param("mean", means) + self.variance = Param('variance', variances, Logexp()) + self.gamma = Param("binary_prob",binary_prob,) + self.add_parameters(self.mean, self.variance, self.gamma) + + def plot(self, *args): + """ + Plot latent space X in 1D: + + See GPy.plotting.matplot_dep.variational_plots + """ + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ...plotting.matplot_dep import variational_plots + return variational_plots.plot(self,*args) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 61a664fe..71053867 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -57,11 +57,14 @@ class SparseGP(GP): return not (self.X_variance is None) def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) + if self.has_uncertain_inputs(): + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y) + else: + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) if self.has_uncertain_inputs(): - self.kern.update_gradients_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) - self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) else: self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index a81bb711..c2f179ac 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -43,9 +43,20 @@ class VarDTC(object): return Y * prec # TODO chache this, and make it effective def inference(self, kern, X, X_variance, Z, likelihood, Y): + """Inference for normal sparseGP""" + uncertain_inputs = False + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def inference_latent(self, kern, posterior_variational, Z, likelihood, Y): + """Inference for GPLVM with uncertain inputs""" + uncertain_inputs = True + psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs): #see whether we're using variational uncertain inputs - uncertain_inputs = not (X_variance is None) _, output_dim = Y.shape @@ -62,10 +73,9 @@ class VarDTC(object): # do the inference: het_noise = beta.size < 1 num_inducing = Z.shape[0] - num_data = X.shape[0] + num_data = Y.shape[0] # kernel computations, using BGPLVM notation - Kmm = kern.K(Z) - psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + Kmm = kern.K(Z) Lm = jitchol(Kmm) @@ -191,20 +201,31 @@ class VarDTCMissingData(object): else: self._subarray_indices = [[slice(None),slice(None)]] return [Y], [(Y**2).sum()] - + def inference(self, kern, X, X_variance, Z, likelihood, Y): + """Inference for normal sparseGP""" + uncertain_inputs = False + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def inference_latent(self, kern, posterior_variational, Z, likelihood, Y): + """Inference for GPLVM with uncertain inputs""" + uncertain_inputs = True + psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs): Ys, traces = self._Y(Y) beta_all = 1./likelihood.variance - uncertain_inputs = not (X_variance is None) het_noise = beta_all.size != 1 import itertools num_inducing = Z.shape[0] - dL_dpsi0_all = np.zeros(X.shape[0]) - dL_dpsi1_all = np.zeros((X.shape[0], num_inducing)) + dL_dpsi0_all = np.zeros(Y.shape[0]) + dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing)) if uncertain_inputs: - dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) + dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing)) partial_for_likelihood = 0 woodbury_vector = np.zeros((num_inducing, Y.shape[1])) @@ -217,9 +238,6 @@ class VarDTCMissingData(object): Lm = jitchol(Kmm) if uncertain_inputs: LmInv = dtrtri(Lm) - # kernel computations, using BGPLVM notation - psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) - VVT_factor_all = np.empty(Y.shape) full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] if not full_VVT_factor: @@ -340,15 +358,16 @@ class VarDTCMissingData(object): return post, log_marginal, grad_dict -def _compute_psi(kern, X, X_variance, Z, uncertain_inputs): - if uncertain_inputs: - psi0 = kern.psi0(Z, X, X_variance) - psi1 = kern.psi1(Z, X, X_variance) - psi2 = kern.psi2(Z, X, X_variance) - else: - psi0 = kern.Kdiag(X) - psi1 = kern.K(X, Z) - psi2 = None +def _compute_psi(kern, X, X_variance, Z): + psi0 = kern.Kdiag(X) + psi1 = kern.K(X, Z) + psi2 = None + return psi0, psi1, psi2 + +def _compute_psi_latent(kern, posterior_variational, Z): + psi0 = kern.psi0(Z, posterior_variational) + psi1 = kern.psi1(Z, posterior_variational) + psi2 = kern.psi2(Z, posterior_variational) return psi0, psi1, psi2 def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b3ee57cd..5fe29d51 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -26,11 +26,11 @@ class Kern(Parameterized): raise NotImplementedError def Kdiag(self, Xa): raise NotImplementedError - def psi0(self,Z,mu,S): + def psi0(self,Z,posterior_variational): raise NotImplementedError - def psi1(self,Z,mu,S): + def psi1(self,Z,posterior_variational): raise NotImplementedError - def psi2(self,Z,mu,S): + def psi2(self,Z,posterior_variational): raise NotImplementedError def gradients_X(self, dL_dK, X, X2): raise NotImplementedError @@ -49,16 +49,16 @@ class Kern(Parameterized): self._collect_gradient(target) self._set_gradient(target) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" raise NotImplementedError def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): grad = self.gradients_X(dL_dKmm, Z) grad += self.gradients_X(dL_dKnm.T, Z, X) return grad - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): raise NotImplementedError - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): raise NotImplementedError def plot_ARD(self, *args): diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index c4d595d0..0c8588a2 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -79,16 +79,21 @@ class RBF(Kern): ret[:] = self.variance return ret - def psi0(self, Z, mu, S): + def psi0(self, Z, posterior_variational): + mu = posterior_variational.mean ret = np.empty(mu.shape[0], dtype=np.float64) ret[:] = self.variance return ret - def psi1(self, Z, mu, S): + def psi1(self, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) return self._psi1 - def psi2(self, Z, mu, S): + def psi2(self, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) return self._psi2 @@ -121,7 +126,9 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) #contributions from psi0: @@ -155,7 +162,9 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) #psi1 @@ -173,7 +182,9 @@ class RBF(Kern): return grad - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) #psi1 tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom diff --git a/GPy/kern/_src/ss_rbf.py b/GPy/kern/_src/ss_rbf.py deleted file mode 100644 index cab8fd11..00000000 --- a/GPy/kern/_src/ss_rbf.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param - -class SS_RBF(Kernpart): - """ - The RBF kernel for Spike-and-Slab GPLVM - Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: - - .. math:: - - k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2} - - where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance of the kernel - :type variance: float - :param lengthscale: the vector of lengthscale of the kernel - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :rtype: kernel object - """ - - def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'): - super(RBF, self).__init__(input_dim, name) - self.input_dim = input_dim - - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - - self.variance = Param('variance', variance) - self.lengthscale = Param('lengthscale', lengthscale) - self.lengthscale.add_observer(self, self.update_lengthscale) - self.add_parameters(self.variance, self.lengthscale) - self.parameters_changed() # initializes cache - - def on_input_change(self, X): - #self._K_computations(X, None) - pass - - def update_lengthscale(self, l): - self.lengthscale2 = np.square(self.lengthscale) - - def parameters_changed(self): - # reset cached results - self._X, self._X2 = np.empty(shape=(2, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - def K(self, X, X2, target): - self._K_computations(X, X2) - target += self.variance * self._K_dvar - - def Kdiag(self, X, target): - np.add(target, self.variance, target) - - def psi0(self, Z, mu, S, target): - target += self.variance - - def psi1(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi1 - - def psi2(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi2 - - def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.variance.gradient = np.sum(self._K_dvar * dL_dK) - if self.ARD: - self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None) - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - #contributions from Kdiag - self.variance.gradient = np.sum(dL_dKdiag) - - #from Knm - self._K_computations(X, Z) - self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) - - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - self._psi_computations(Z, mu, S) - - #contributions from psi0: - self.variance.gradient = np.sum(dL_dpsi0) - - #from psi1 - self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance) - d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) - dpsi1_dlength = d_length * dL_dpsi1[:, :, None] - if not self.ARD: - self.lengthscale.gradeint = dpsi1_dlength.sum() - else: - self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0) - - #from psi2 - d_var = 2.*self._psi2 / self.variance - d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom) - - self.variance.gradient += np.sum(dL_dpsi2 * d_var) - dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None] - if not self.ARD: - self.lengthscale.gradient += dpsi2_dlength.sum() - else: - self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) - - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - def gradients_X(self, dL_dK, X, X2, target): - #if self._X is None or X.base is not self._X.base or X2 is not None: - self._K_computations(X, X2) - if X2 is None: - _K_dist = 2*(X[:, None, :] - X[None, :, :]) - else: - _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - #---------------------------------------# - # Precomputations # - #---------------------------------------# - - def _K_computations(self, X, X2): - #params = self._get_params() - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)): - #self._X = X.copy() - #self._params_save = params.copy() - if X2 is None: - self._X2 = None - X = X / self.lengthscale - Xsquare = np.sum(np.square(X), 1) - self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :]) - else: - self._X2 = X2.copy() - X = X / self.lengthscale - X2 = X2 / self.lengthscale - self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :]) - self._K_dvar = np.exp(-0.5 * self._K_dist2) - - def _dL_dlengthscales_via_K(self, dL_dK, X, X2): - """ - A helper function for update_gradients_* methods - - Computes the derivative of the objective L wrt the lengthscales via - - dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl) - - assumes self._K_computations has just been called. - - This is only valid if self.ARD=True - """ - target = np.zeros(self.input_dim) - dvardLdK = self._K_dvar * dL_dK - var_len3 = self.variance / np.power(self.lengthscale, 3) - if X2 is None: - # save computation for the symmetrical case - dvardLdK = dvardLdK + dvardLdK.T - code = """ - int q,i,j; - double tmp; - for(q=0; q - #include - """ - weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], - type_converters=weave.converters.blitz, **self.weave_options) - - return mudist, mudist_sq, psi2_exponent, psi2 diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 8aa378ce..cc68de68 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -66,7 +66,7 @@ class BayesianGPLVM(SparseGP, GPLVM): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + dL_dmu, dL_dS = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) # dL: self.q.mean.gradient = dL_dmu From ea5d19bb4ede5bd952a63f776e2df9c2678843c1 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 21 Feb 2014 18:09:05 +0000 Subject: [PATCH 32/38] bias now looks in shape --- GPy/kern/_src/bias.py | 82 ++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py index d2301bcd..d45561f8 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/bias.py @@ -2,80 +2,60 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp -class Bias(Kernpart): +class Bias(Kern): def __init__(self,input_dim,variance=1.,name=None): - """ - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance of the kernel - :type variance: float - """ super(Bias, self).__init__(input_dim, name) - from ...core.parameterization.transformations import Logexp self.variance = Param("variance", variance, Logexp()) self.add_parameter(self.variance) - def K(self,X,X2,target): - target += self.variance + def K(self, X, X2=None): + shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) + ret = np.empty(shape, dtype=np.float64) + ret[:] = self.variance + return ret - def Kdiag(self,X,target): - target += self.variance + def Kdiag(self,X): + ret = np.empty((X.shape[0],), dtype=np.float64) + ret[:] = self.variance + return ret - #def dK_dtheta(self,dL_dKdiag,X,X2,target): - #target += dL_dKdiag.sum() - def update_gradients_full(self, dL_dK, X): + def update_gradients_full(self, dL_dK, X, X2=None): self.variance.gradient = dL_dK.sum() - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += dL_dKdiag.sum() + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = dL_dK.sum() def gradients_X(self, dL_dK,X, X2, target): - pass + return np.zeros(X.shape) - def dKdiag_dX(self,dL_dKdiag,X,target): - pass + def gradients_X_diag(self,dL_dKdiag,X,target): + return np.zeros(X.shape) #---------------------------------------# # PSI statistics # #---------------------------------------# - def psi0(self, Z, mu, S, target): - target += self.variance + def psi0(self, Z, mu, S): + return self.Kdiag(mu) def psi1(self, Z, mu, S, target): - self._psi1 = self.variance - target += self._psi1 - + return self.K(mu, S) + def psi2(self, Z, mu, S, target): - target += self.variance**2 + ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) + ret[:] = self.variance**2 + return ret - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target): - target += dL_dpsi0.sum() + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target): - target += dL_dpsi1.sum() + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(Z.shape) - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): - target += 2.*self.variance*dL_dpsi2.sum() - - def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target): - pass - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - pass - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - pass - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - pass - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - pass + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(mu.shape), np.zeros(S.shape) From 61a101ed0547b158388abddc029edf62370b7182 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Sun, 23 Feb 2014 11:02:20 -0500 Subject: [PATCH 33/38] Changes to sympykern.py --- GPy/kern/parts/rbf.py | 2 +- GPy/kern/parts/sympykern.py | 204 +++++++++++++++++++++--------------- 2 files changed, 123 insertions(+), 83 deletions(-) diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 8811b74a..027aa382 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -109,7 +109,7 @@ class RBF(Kernpart): self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None) else: self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) -b + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): #contributions from Kdiag self.variance.gradient = np.sum(dL_dKdiag) diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index 52813ecd..3d6517a8 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -1,3 +1,4 @@ +# Check Matthew Rocklin's blog post. try: import sympy as sp sympy_available=True @@ -129,6 +130,8 @@ class spkern(Kernpart): if False: self.compute_psi_stats() + self._code = {} + # generate the code for the covariance functions self._gen_code() @@ -169,6 +172,7 @@ class spkern(Kernpart): code_type = "C" else: code_type = "PYTHON" + # Need to add the sympy_helpers header in here. (foo_c,self._function_code), (foo_h,self._function_header) = \ codegen(code_list, code_type, @@ -233,7 +237,7 @@ class spkern(Kernpart): """ # Here's the code to do the looping for K - self._K_code =\ + self._code['K'] =\ """ // _K_code // Code for computing the covariance function. @@ -254,7 +258,7 @@ class spkern(Kernpart): """%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") # adding a string representation of the function in the # comment forces recompile when needed - self._K_code_X = self._K_code.replace('Z2(', 'X2(') + self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(') # Code to compute diagonal of covariance. @@ -265,9 +269,9 @@ class spkern(Kernpart): diag_precompute_string = re.sub('Z','X',diag_precompute_string) diag_precompute_string = re.sub('j','i',diag_precompute_string) # Code to do the looping for Kdiag - self._Kdiag_code =\ + self._code['Kdiag'] =\ """ - // _Kdiag_code + // _code['Kdiag'] // Code for computing diagonal of covariance function. int i; int n = target_array->dimensions[0]; @@ -282,51 +286,88 @@ class spkern(Kernpart): """%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed # Code to compute gradients - grad_func_list = [] if self.output_dim>1: - grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) - grad_func_string = '\n'.join(grad_func_list) + for i, theta in enumerate(self._sp_theta_i): + grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)] + grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)] + grad_func_list = c_define_output_indices+grad_func_list - self._dK_dtheta_code =\ - """ - // _dK_dtheta_code - // Code for computing gradient of covariance with respect to parameters. - int i; - int j; - int n = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;idimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;i Date: Mon, 24 Feb 2014 08:22:06 +0000 Subject: [PATCH 34/38] minor fixes in kerns --- GPy/kern/__init__.py | 2 +- GPy/kern/_src/bias.py | 1 + GPy/kern/_src/stationary.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index e5dc6d35..594ff6d3 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,9 +2,9 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern from _src.linear import Linear +from _src.bias import Bias from _src.brownian import Brownian from _src.stationary import Exponential, Matern32, Matern52, ExpQuad -#from _src.bias import Bias #import coregionalize #import exponential #import eq_ode1 diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py index d45561f8..e1938c95 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/bias.py @@ -5,6 +5,7 @@ from kern import Kern from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +import numpy as np class Bias(Kern): def __init__(self,input_dim,variance=1.,name=None): diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 7cc2e695..a6ff9424 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -18,7 +18,7 @@ class Stationary(Kern): lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel" + assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" else: if lengthscale is not None: lengthscale = np.asarray(lengthscale) From b20beaa8630034adfefaf3561f3cad6ec88d323e Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 08:55:18 +0000 Subject: [PATCH 35/38] some work pon EP (uninished) --- GPy/inference/latent_function_inference/ep.py | 421 +++--------------- 1 file changed, 61 insertions(+), 360 deletions(-) diff --git a/GPy/inference/latent_function_inference/ep.py b/GPy/inference/latent_function_inference/ep.py index aa106067..87c08221 100644 --- a/GPy/inference/latent_function_inference/ep.py +++ b/GPy/inference/latent_function_inference/ep.py @@ -3,390 +3,91 @@ from scipy import stats from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from likelihood import likelihood -class EP(likelihood): - def __init__(self,data,noise_model): - """ - Expectation Propagation - - :param data: data to model - :type data: numpy array - :param noise_model: noise distribution - :type noise_model: A GPy noise model - - """ - self.noise_model = noise_model - self.data = data - self.num_data, self.output_dim = self.data.shape - self.is_heteroscedastic = True - self.num_params = 0 - - #Initial values - Likelihood approximation parameters: - #p(y|f) = t(f|tau_tilde,v_tilde) - self.tau_tilde = np.zeros(self.num_data) - self.v_tilde = np.zeros(self.num_data) - - #initial values for the GP variables - self.Y = np.zeros((self.num_data,1)) - self.covariance_matrix = np.eye(self.num_data) - self.precision = np.ones(self.num_data)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - super(EP, self).__init__() - - def restart(self): - self.tau_tilde = np.zeros(self.num_data) - self.v_tilde = np.zeros(self.num_data) - self.Y = np.zeros((self.num_data,1)) - self.covariance_matrix = np.eye(self.num_data) - self.precision = np.ones(self.num_data)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - def predictive_values(self,mu,var,full_cov,**noise_args): - if full_cov: - raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.noise_model.predictive_values(mu,var,**noise_args) - - def log_predictive_density(self, y_test, mu_star, var_star): - """ - Calculation of the log predictive density - - .. math: - p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) - - :param y_test: test observations (y_{*}) - :type y_test: (Nx1) array - :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) - :type mu_star: (Nx1) array - :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) - :type var_star: (Nx1) array - """ - return self.noise_model.log_predictive_density(y_test, mu_star, var_star) - - def _get_params(self): - #return np.zeros(0) - return self.noise_model._get_params() - - def _get_param_names(self): - #return [] - return self.noise_model._get_param_names() - - def _set_params(self,p): - #pass # TODO: the EP likelihood might want to take some parameters... - self.noise_model._set_params(p) - - def _gradients(self,partial): - #return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... - return self.noise_model._gradients(partial) - - def _compute_GP_variables(self): - #Variables to be called from GP - mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model - sigma_sum = 1./self.tau_ + 1./self.tau_tilde - mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 - self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep - self.Z += 0.5*self.num_data*np.log(2*np.pi) - - self.Y = mu_tilde[:,None] - self.YYT = np.dot(self.Y,self.Y.T) - self.covariance_matrix = np.diag(1./self.tau_tilde) - self.precision = self.tau_tilde[:,None] - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = np.trace(self.YYT) - - def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]): +class EP(object): + def __init__(self, epsilon=1e-6, eta=1., delta=1.): """ The expectation-propagation algorithm. For nomenclature see Rasmussen & Williams 2006. :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - + :param eta: Power EP thing TODO: Ricardo: what, exactly? + :type eta: float64 + :param delta: Power EP thing TODO: Ricardo: what, exactly? + :type delta: float64 """ - self.epsilon = epsilon - self.eta, self.delta = power_ep + self.epsilon, self.eta, self.delta = epsilon, eta, delta + self.reset() + + def reset(self): + self.old_mutilde, self.old_vtilde = None, None + + def inference(self, kern, X, likelihood, Y, Y_metadata=None): + + K = kern.K(X) + + mu_tilde, tau_tilde = self.expectation_propagation() + + + def expectation_propagation(self, K, Y, Y_metadata, likelihood) + + num_data, data_dim = Y.shape + assert data_dim == 1, "This EP methods only works for 1D outputs" + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) mu = np.zeros(self.num_data) Sigma = K.copy() - """ - Initial values - Cavity distribution parameters: - q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) + Z_hat = np.empty(num_data,dtype=np.float64) + mu_hat = np.empty(num_data,dtype=np.float64) + sigma2_hat = np.empty(num_data,dtype=np.float64) + + #initial values - Gaussian factors + if self.old_mutilde is None: + tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data)) + else: + assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" + mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde + tau_tilde = v_tilde/mu_tilde #Approximation epsilon_np1 = self.epsilon + 1. epsilon_np2 = self.epsilon + 1. - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) + iterations = 0 + while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon): + update_order = np.random.permutation(num_data) for i in update_order: #Cavity distribution parameters - self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i] + v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) + Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i])) #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v + delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + tau_tilde[i] += delta_tau + v_tilde[i] += delta_v #Posterior distribution parameters update - DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) - mu = np.dot(Sigma,self.v_tilde) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K - B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i])) + mu = np.dot(Sigma, v_tilde) + iterations += 1 + + #(re) compute Sigma and mu using full Cholesky decompy + tau_tilde_root = np.sqrt(tau_tilde) + Sroot_tilde_K = tau_tilde_root[:,None] * K + B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:] L = jitchol(B) - V,info = dtrtrs(L,Sroot_tilde_K,lower=1) + V, _ = dtrtrs(L, Sroot_tilde_K, lower=1) Sigma = K - np.dot(V.T,V) - mu = np.dot(Sigma,self.v_tilde) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) + mu = np.dot(Sigma,v_tilde) - return self._compute_GP_variables() + #monitor convergence + epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old)) + epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old)) + tau_tilde_old = tau_tilde.copy() + v_tilde_old = v_tilde.copy() - def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see ... 2013. + return mu, Sigma, mu_tilde, tau_tilde - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - - """ - self.epsilon = epsilon - self.eta, self.delta = power_ep - - num_inducing = Kmm.shape[0] - - #TODO: this doesn't work with uncertain inputs! - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = Qnn = Knm*Kmmi*Kmn - """ - KmnKnm = np.dot(Kmn,Kmn.T) - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - KmmiKmn = np.dot(Kmmi,Kmn) - Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - LLT0 = Kmm.copy() - - #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) - #KmnKnm = np.dot(Kmn, Kmn.T) - #KmmiKmn = np.dot(Kmmi,Kmn) - #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - #LLT0 = Kmm.copy() - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - mu = np.zeros(self.num_data) - LLT = Kmm.copy() - Sigma_diag = Qnn_diag.copy() - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - np1 = [self.tau_tilde.copy()] - np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau - L = jitchol(LLT) - #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) - V,info = dtrtrs(L,Kmn,lower=1) - Sigma_diag = np.sum(V*V,-2) - si = np.sum(V.T*V[:,i],-1) - mu += (Delta_v-Delta_tau*mu[i])*si - self.iterations += 1 - #Sigma recomputation with Cholesky decompositon - LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) - L = jitchol(LLT) - V,info = dtrtrs(L,Kmn,lower=1) - V2,info = dtrtrs(L.T,V,lower=0) - Sigma_diag = np.sum(V*V,-2) - Knmv_tilde = np.dot(Kmn,self.v_tilde) - mu = np.dot(V2.T,Knmv_tilde) - epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data - np1.append(self.tau_tilde.copy()) - np2.append(self.v_tilde.copy()) - - self._compute_GP_variables() - - def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see Naish-Guzman and Holden, 2008. - - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - """ - self.epsilon = epsilon - self.eta, self.delta = power_ep - - num_inducing = Kmm.shape[0] - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn - """ - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - P0 = Kmn.T - KmnKnm = np.dot(P0.T, P0) - KmmiKmn = np.dot(Kmmi,P0.T) - Qnn_diag = np.sum(P0.T*KmmiKmn,-2) - Diag0 = Knn_diag - Qnn_diag - R0 = jitchol(Kmmi).T - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - self.w = np.zeros(self.num_data) - self.Gamma = np.zeros(num_inducing) - mu = np.zeros(self.num_data) - P = P0.copy() - R = R0.copy() - Diag = Diag0.copy() - Sigma_diag = Knn_diag - RPT0 = np.dot(R0,P0.T) - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - dtd1 = Delta_tau*Diag[i] + 1. - dii = Diag[i] - Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 - pi_ = P[i,:].reshape(1,num_inducing) - P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ - Rp_i = np.dot(R,pi_.T) - RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) - R = jitchol(RTR).T - self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 - self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - mu = self.w + np.dot(P,self.Gamma) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) - Diag = Diag0 * Iplus_Dprod_i - P = Iplus_Dprod_i[:,None] * P0 - safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) - L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) - R,info = dtrtrs(L,R0,lower=1) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - self.w = Diag * self.v_tilde - self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) - mu = self.w + np.dot(P,self.Gamma) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) - - return self._compute_GP_variables() From 1eb8cc5eab01b9a0448f0bd46e5c1e1ab767e633 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 09:49:29 +0000 Subject: [PATCH 36/38] variational posterior and prior added, linear updated --- GPy/core/gp.py | 5 +- GPy/core/parameterization/array_core.py | 4 +- GPy/core/parameterization/variational.py | 56 +++++++++++++----- GPy/core/sparse_gp.py | 16 ++--- .../latent_function_inference/posterior.py | 18 +++--- GPy/kern/_src/kern.py | 3 +- GPy/kern/_src/linear.py | 58 ++++++++++--------- GPy/kern/_src/stationary.py | 2 +- GPy/models/bayesian_gplvm.py | 34 +++++------ 9 files changed, 118 insertions(+), 78 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 13336ef5..d8d1a87a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -30,7 +30,10 @@ class GP(Model): super(GP, self).__init__(name) assert X.ndim == 2 - self.X = ObservableArray(X) + if isinstance(X, ObservableArray): + self.X = self.X = X + else: self.X = ObservableArray(X) + self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index dffe2ed1..e8be0f77 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -28,7 +28,9 @@ class ObservableArray(np.ndarray, Observable): """ __array_priority__ = -1 # Never give back ObservableArray def __new__(cls, input_array): - obj = np.atleast_1d(input_array).view(cls) + if not isinstance(input_array, ObservableArray): + obj = np.atleast_1d(input_array).view(cls) + else: obj = input_array cls.__name__ = "ObservableArray\n " return obj diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 5fe63052..d1c0faf8 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -3,21 +3,54 @@ Created on 6 Nov 2013 @author: maxz ''' + +import numpy as np from parameterized import Parameterized from param import Param from transformations import Logexp -class Normal(Parameterized): +class VariationalPrior(object): + def KL_divergence(self, variational_posterior): + raise NotImplementedError, "override this for variational inference of latent space" + + def update_gradients_KL(self, variational_posterior): + """ + updates the gradients for mean and variance **in place** + """ + raise NotImplementedError, "override this for variational inference of latent space" + +class NormalPrior(VariationalPrior): + def KL_divergence(self, variational_posterior): + var_mean = np.square(variational_posterior.mean).sum() + var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum() + return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data + + def update_gradients_KL(self, variational_posterior): + # dL: + variational_posterior.mean.gradient -= variational_posterior.mean + variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 + + +class VariationalPosterior(Parameterized): + def __init__(self, means=None, variances=None, name=None, **kw): + super(VariationalPosterior, self).__init__(name=name, **kw) + self.mean = Param("mean", means) + self.variance = Param("variance", variances, Logexp()) + self.add_parameters(self.mean, self.variance) + self.num_data, self.input_dim = self.mean.shape + if self.has_uncertain_inputs(): + assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion" + + def has_uncertain_inputs(self): + return not self.variance is None + + +class NormalPosterior(VariationalPosterior): ''' - Normal distribution for variational approximations. + NormalPosterior distribution for variational approximations. holds the means and variances for a factorizing multivariate normal distribution ''' - def __init__(self, means, variances, name='latent space'): - Parameterized.__init__(self, name=name) - self.mean = Param("mean", means) - self.variance = Param('variance', variances, Logexp()) - self.add_parameters(self.mean, self.variance) def plot(self, *args): """ @@ -30,8 +63,7 @@ class Normal(Parameterized): from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) - -class SpikeAndSlab(Parameterized): +class SpikeAndSlab(VariationalPosterior): ''' The SpikeAndSlab distribution for variational approximations. ''' @@ -39,11 +71,9 @@ class SpikeAndSlab(Parameterized): """ binary_prob : the probability of the distribution on the slab part. """ - Parameterized.__init__(self, name=name) - self.mean = Param("mean", means) - self.variance = Param('variance', variances, Logexp()) + super(SpikeAndSlab, self).__init__(means, variances, name) self.gamma = Param("binary_prob",binary_prob,) - self.add_parameters(self.mean, self.variance, self.gamma) + self.add_parameter(self.gamma) def plot(self, *args): """ diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 71053867..37f2baf8 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -5,8 +5,9 @@ import numpy as np from ..util.linalg import mdot from gp import GP from parameterization.param import Param -from GPy.inference.latent_function_inference import var_dtc +from ..inference.latent_function_inference import var_dtc from .. import likelihoods +from parameterization.variational import NormalPosterior class SparseGP(GP): """ @@ -45,16 +46,14 @@ class SparseGP(GP): self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] - self.X_variance = X_variance - if self.has_uncertain_inputs(): - assert X_variance.shape == X.shape + self.q = NormalPosterior(X, X_variance) - GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) + GP.__init__(self, self.q.mean, Y, kernel, likelihood, inference_method=inference_method, name=name) self.add_parameter(self.Z, index=0) self.parameters_changed() def has_uncertain_inputs(self): - return not (self.X_variance is None) + return self.q.has_uncertain_inputs() def parameters_changed(self): if self.has_uncertain_inputs(): @@ -81,7 +80,10 @@ class SparseGP(GP): var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) else: Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) + WKx_old = np.dot(np.atleast_3d(self.posterior.woodbury_inv)[:,:,0], Kx) + WKx = np.tensordot(np.atleast_3d(self.posterior.woodbury_inv), Kx, [0,0]) + import ipdb;ipdb.set_trace() + var = Kxx - np.sum(Kx * WKx, 0) else: Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index 73741a13..a996e1df 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri +from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol class Posterior(object): """ @@ -83,14 +83,15 @@ class Posterior(object): #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) - return self._covariance + return self._covariance.squeeze() @property def precision(self): if self._precision is None: - self._precision = np.zeros(np.atleast_3d(self.covariance).shape) # if one covariance per dimension - for p in xrange(self.covariance.shape[-1]): - self._precision[:,:,p] = pdinv(self.covariance[:,:,p])[0] + cov = np.atleast_3d(self.covariance) + self._precision = np.zeros(cov.shape) # if one covariance per dimension + for p in xrange(cov.shape[-1]): + self._precision[:,:,p] = pdinv(cov[:,:,p])[0] return self._precision @property @@ -98,7 +99,10 @@ class Posterior(object): if self._woodbury_chol is None: #compute woodbury chol from if self._woodbury_inv is not None: - _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv) + winv = np.atleast_3d(self._woodbury_inv) + self._woodbury_chol = np.zeros(winv.shape) + for p in xrange(winv.shape[-1]): + self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2] #Li = jitchol(self._woodbury_inv) #self._woodbury_chol, _ = dtrtri(Li) #W, _, _, _, = pdinv(self._woodbury_inv) @@ -132,7 +136,7 @@ class Posterior(object): @property def K_chol(self): if self._K_chol is None: - self._K_chol = dportf(self._K) + self._K_chol = jitchol(self._K) return self._K_chol diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 3ef231b3..8bd9b6d1 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -127,11 +127,12 @@ from GPy.core.model import Model class Kern_check_model(Model): """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + from GPy.kern import RBF Model.__init__(self, 'kernel_test_model') num_samples = 20 num_samples2 = 10 if kernel==None: - kernel = GPy.kern.rbf(1) + kernel = RBF(1) if X==None: X = np.random.randn(num_samples, kernel.input_dim) if dL_dK==None: diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 61a1dbd3..a66b3705 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -106,51 +106,52 @@ class Linear(Kern): # variational # #---------------------------------------# - def psi0(self, Z, mu, S): - return np.sum(self.variances * self._mu2S(mu, S), 1) + def psi0(self, Z, posterior_variational): + return np.sum(self.variances * self._mu2S(posterior_variational), 1) - def psi1(self, Z, mu, S): - return self.K(mu, Z) #the variance, it does nothing + def psi1(self, Z, posterior_variational): + return self.K(posterior_variational.mean, Z) #the variance, it does nothing - def psi2(self, Z, mu, S): + def psi2(self, Z, posterior_variational): ZA = Z * self.variances - ZAinner = self._ZAinner(mu, S, Z) + ZAinner = self._ZAinner(posterior_variational, Z) return np.dot(ZAinner, ZA.T) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): + mu, S = posterior_variational.mean, posterior_variational.variance # psi0: - tmp = dL_dpsi0[:, None] * self._mu2S(mu, S) + tmp = dL_dpsi0[:, None] * self._mu2S(posterior_variational) if self.ARD: grad = tmp.sum(0) else: grad = np.atleast_1d(tmp.sum()) #psi1 self.update_gradients_full(dL_dpsi1, mu, Z) grad += self.variances.gradient #psi2 - tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(mu, S, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) + tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(posterior_variational, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) if self.ARD: grad += tmp.sum(0).sum(0).sum(0) else: grad += tmp.sum() #from Kmm self.update_gradients_full(dL_dKmm, Z, None) self.variances.gradient += grad - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): # Kmm grad = self.gradients_X(dL_dKmm, Z, None) #psi1 - grad += self.gradients_X(dL_dpsi1.T, Z, mu) + grad += self.gradients_X(dL_dpsi1.T, Z, posterior_variational.mean) #psi2 - self._weave_dpsi2_dZ(dL_dpsi2, Z, mu, S, grad) + self._weave_dpsi2_dZ(dL_dpsi2, Z, posterior_variational, grad) return grad - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - grad_mu, grad_S = np.zeros(mu.shape), np.zeros(mu.shape) + def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): + grad_mu, grad_S = np.zeros(posterior_variational.mean.shape), np.zeros(posterior_variational.mean.shape) # psi0 - grad_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) + grad_mu += dL_dpsi0[:, None] * (2.0 * posterior_variational.mean * self.variances) grad_S += dL_dpsi0[:, None] * self.variances # psi1 grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) # psi2 - self._weave_dpsi2_dmuS(dL_dpsi2, Z, mu, S, grad_mu, grad_S) + self._weave_dpsi2_dmuS(dL_dpsi2, Z, posterior_variational, grad_mu, grad_S) return grad_mu, grad_S @@ -159,7 +160,7 @@ class Linear(Kern): #--------------------------------------------------# - def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): + def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S): # Think N,num_inducing,num_inducing,input_dim ZA = Z * self.variances AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] @@ -202,15 +203,16 @@ class Linear(Kern): weave_options = {'headers' : [''], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - + + mu = pv.mean N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) - def _weave_dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - AZA = self.variances*self._ZAinner(mu, S, Z) + def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): + AZA = self.variances*self._ZAinner(pv, Z) code=""" int n,m,mm,q; #pragma omp parallel for private(n,mm,q) @@ -232,21 +234,21 @@ class Linear(Kern): 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] - mu = param_to_array(mu) + N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] + mu = param_to_array(pv.mean) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) - def _mu2S(self, mu, S): - return np.square(mu) + S + def _mu2S(self, pv): + return np.square(pv.mean) + pv.variance - def _ZAinner(self, mu, S, Z): + def _ZAinner(self, pv, Z): ZA = Z*self.variances - inner = (mu[:, None, :] * mu[:, :, None]) - diag_indices = np.diag_indices(mu.shape[1], 2) - inner[:, diag_indices[0], diag_indices[1]] += S + inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) + diag_indices = np.diag_indices(pv.mean.shape[1], 2) + inner[:, diag_indices[0], diag_indices[1]] += pv.variance return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 7cc2e695..a6ff9424 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -18,7 +18,7 @@ class Stationary(Kern): lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel" + assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" else: if lengthscale is not None: lengthscale = np.asarray(lengthscale) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index cc68de68..7b09e0b1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -8,7 +8,7 @@ from ..core import SparseGP from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg -from ..core.parameterization.variational import Normal +from ..core.parameterization.variational import NormalPosterior, NormalPrior class BayesianGPLVM(SparseGP, GPLVM): """ @@ -29,7 +29,7 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = init if X_variance is None: - X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) + X_variance = np.random.uniform(0,.1,X.shape) if Z is None: Z = np.random.permutation(X.copy())[:num_inducing] @@ -40,7 +40,9 @@ class BayesianGPLVM(SparseGP, GPLVM): if likelihood is None: likelihood = Gaussian() - self.q = Normal(X, X_variance) + self.q = NormalPosterior(X, X_variance) + self.variational_prior = NormalPrior() + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs) self.add_parameter(self.q, index=0) #self.ensure_default_constraints() @@ -57,24 +59,17 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP._setstate(self, state) - def KL_divergence(self): - var_mean = np.square(self.X).sum() - var_S = np.sum(self.X_variance - np.log(self.X_variance)) - return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data - def parameters_changed(self): super(BayesianGPLVM, self).parameters_changed() - - self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) - - # dL: - self.q.mean.gradient = dL_dmu - self.q.variance.gradient = dL_dS - - # dKL: - self.q.mean.gradient -= self.X - self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5 + self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q) + + # TODO: This has to go into kern + # maybe a update_gradients_q_variational? + self.q.mean.gradient, self.q.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + + # update for the KL divergence + self.variational_prior.update_gradients_KL(self.q) + def plot_latent(self, plot_inducing=True, *args, **kwargs): """ @@ -147,6 +142,7 @@ class BayesianGPLVM(SparseGP, GPLVM): """ See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map """ + import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots From f311bfdf17c78bc4f56f03514d4e28b26e2e5057 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 24 Feb 2014 11:33:58 +0000 Subject: [PATCH 37/38] changed to 'update_gradients_q_variational' --- GPy/core/parameterization/variational.py | 4 ++-- GPy/kern/_src/rbf.py | 7 ++++--- GPy/models/bayesian_gplvm.py | 4 +--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index d1c0faf8..05ce2109 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -63,7 +63,7 @@ class NormalPosterior(VariationalPosterior): from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) -class SpikeAndSlab(VariationalPosterior): +class SpikeAndSlabPosterior(VariationalPosterior): ''' The SpikeAndSlab distribution for variational approximations. ''' @@ -71,7 +71,7 @@ class SpikeAndSlab(VariationalPosterior): """ binary_prob : the probability of the distribution on the slab part. """ - super(SpikeAndSlab, self).__init__(means, variances, name) + super(SpikeAndSlabPosterior, self).__init__(means, variances, name) self.gamma = Param("binary_prob",binary_prob,) self.add_parameter(self.gamma) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0c8588a2..e23e9e2c 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -182,7 +182,7 @@ class RBF(Kern): return grad - def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) @@ -194,8 +194,9 @@ class RBF(Kern): tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - return grad_mu, grad_S + + posterior_variational.mean.gradient = grad_mu + posterior_variational.variance.gradient = grad_S def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 7b09e0b1..a8d643b9 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -63,9 +63,7 @@ class BayesianGPLVM(SparseGP, GPLVM): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q) - # TODO: This has to go into kern - # maybe a update_gradients_q_variational? - self.q.mean.gradient, self.q.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.q) From 8dbb65ab504fc6cd2c8743646e5c3e1ca30d571c Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 11:34:22 +0000 Subject: [PATCH 38/38] 2d plotting --- GPy/core/sparse_gp.py | 10 ++-- GPy/examples/dimensionality_reduction.py | 66 ++++++++++++------------ GPy/plotting/matplot_dep/models_plots.py | 18 +++---- GPy/testing/index_operations_tests.py | 5 ++ 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 37f2baf8..bb3116ba 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -77,13 +77,11 @@ class SparseGP(GP): mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(Xnew) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) + #var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) + var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) else: Kxx = self.kern.Kdiag(Xnew) - WKx_old = np.dot(np.atleast_3d(self.posterior.woodbury_inv)[:,:,0], Kx) - WKx = np.tensordot(np.atleast_3d(self.posterior.woodbury_inv), Kx, [0,0]) - import ipdb;ipdb.set_trace() - var = Kxx - np.sum(Kx * WKx, 0) + var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T else: Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) @@ -93,7 +91,7 @@ class SparseGP(GP): Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var[:,None] + return mu, var def _getstate(self): diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 3ba54d34..b6030eb7 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -89,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci Y = Y - Y.mean(0) Y /= Y.std(0) # Create the model - kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -139,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 (1 - var))) + .001 Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2)) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c @@ -159,28 +159,26 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): import GPy - from GPy.likelihoods import Gaussian from matplotlib import pyplot as plt _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] - Yn = Gaussian(Y, normalize=True) - m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) + m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) - m['noise'] = Yn.Y.var() / 100. + m['.*noise.var'] = Y.var() / 100. if optimize: m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - y = m.likelihood.Y[0, :] + y = m.Y[0, :] fig, (latent_axes, sense_axes) = plt.subplots(1, 2) m.plot_latent(ax=latent_axes) - data_show = GPy.util.visualize.vector_show(y) - lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable + data_show = GPy.plotting.matplot_dep.visualize.vector_show(y) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') plt.close(fig) @@ -190,8 +188,8 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): _np.random.seed(1234) x = _np.linspace(0, 4 * _np.pi, N)[:, None] - s1 = _np.vectorize(lambda x: -_np.sin(x)) - s2 = _np.vectorize(lambda x: _np.cos(x)) + s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x))) + s2 = _np.vectorize(lambda x: _np.cos(x)**2) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) sS = _np.vectorize(lambda x: x*_np.sin(x)) @@ -328,7 +326,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] - k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) + k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2)) m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) m.ensure_default_constraints() @@ -355,15 +353,15 @@ def brendan_faces(optimize=True, verbose=True, plot=True): m = GPy.models.GPLVM(Yn, Q) # optimize - m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) + m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped()) if optimize: m.optimize('scg', messages=verbose, max_iters=1000) if plot: ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -382,8 +380,8 @@ def olivetti_faces(optimize=True, verbose=True, plot=True): if plot: ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -398,8 +396,8 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru Y = data['Y'][range[0]:range[1], :].copy() if plot: y = Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.data_play(Y, data_show, frame_rate) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate) return Y def stick(kernel=None, optimize=True, verbose=True, plot=True): @@ -410,12 +408,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True): # optimize m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -429,12 +427,12 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -449,12 +447,12 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -480,7 +478,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2)) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() @@ -491,8 +489,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): plt.sca(latent_axes) m.plot_latent() y = m.likelihood.Y[0, :].copy() - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') return m @@ -511,8 +509,8 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose if plot: ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel']) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') lvm_visualizer.close() diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 59c32775..3d019bfd 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -57,8 +57,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - X, Y = param_to_array(model.X, model.Y) - if model.has_uncertain_inputs(): X_variance = model.X_variance + X, Y, Z = param_to_array(model.X, model.Y, model.Z) + if model.has_uncertain_inputs(): X_variance = param_to_array(model.q.variance) #work out what the inputs are for plotting (1D or 2D) fixed_dims = np.array([i for i,v in fixed_inputs]) @@ -97,10 +97,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add error bars for uncertain (if input uncertainty is being modelled) - if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): - ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), - xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + #if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): + # ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), + # xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), + # ecolor='k', fmt=None, elinewidth=.5, alpha=.5) #set the limits of the plot to some sensible values @@ -112,7 +112,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add inducing inputs (if a sparse model is used) if hasattr(model,"Z"): #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] - Zu = param_to_array(model.Z[:,free_dims]) + Zu = Z[:,free_dims] z_height = ax.get_ylim()[0] ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) @@ -136,7 +136,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', Y = Y else: m, _, _, _ = model.predict(Xgrid) - Y = model.data + Y = Y for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) @@ -152,7 +152,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add inducing inputs (if a sparse model is used) if hasattr(model,"Z"): #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] - Zu = model.Z[:,free_dims] + Zu = Z[:,free_dims] ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo') else: diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index 171db5cc..64b0c908 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -30,6 +30,11 @@ class Test(unittest.TestCase): self.assertListEqual(self.param_index[two].tolist(), [0,3]) self.assertListEqual(self.param_index[one].tolist(), [1]) + def test_shift_right(self): + self.param_index.shift_right(5, 2) + self.assertListEqual(self.param_index[three].tolist(), [2,4,9]) + self.assertListEqual(self.param_index[two].tolist(), [0,7]) + self.assertListEqual(self.param_index[one].tolist(), [3]) def test_index_view(self): #=======================================================================