Updated sympy code, multioutput grad checks pass apart from wrt X. Similar problems with prediction as to sinc covariance, needs investigation.

This commit is contained in:
Neil Lawrence 2013-10-14 09:37:35 +01:00
parent 66daf2ad45
commit fe30db1331
4 changed files with 124 additions and 52 deletions

View file

@ -327,8 +327,6 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
m.plot_scales("MRD Scales") m.plot_scales("MRD Scales")
return m return m
def brendan_faces(): def brendan_faces():
from GPy import kern from GPy import kern
data = GPy.util.datasets.brendan_faces() data = GPy.util.datasets.brendan_faces()
@ -342,7 +340,7 @@ def brendan_faces():
# optimize # optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
m.optimize('scg', messages=1, max_iters=10) m.optimize('scg', messages=1, max_iters=1000)
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]

View file

@ -322,17 +322,19 @@ if sympy_available:
real_input_dim -= 1 real_input_dim -= 1
X = sp.symbols('x_:' + str(real_input_dim)) X = sp.symbols('x_:' + str(real_input_dim))
Z = sp.symbols('z_:' + str(real_input_dim)) Z = sp.symbols('z_:' + str(real_input_dim))
variance = sp.var('variance',positive=True) scale = sp.var('scale_i scale_j',positive=True)
if ARD: if ARD:
lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)] lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
dist_string = ' + '.join(['(x_%i-z_%i)**2/(lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)]) shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string) dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/2.) f = variance*sp.exp(-dist/2.)
else: else:
lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True) lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True)
shared_lengthscale = sp.var('shared_lengthscale',positive=True)
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)]) dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string) dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/(2*lengthscale_i*lengthscale_j)) f = scale_i*scale_j*sp.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')]) return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):

View file

@ -43,9 +43,9 @@ class spkern(Kernpart):
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z) assert len(self._sp_x)==len(self._sp_z)
self.input_dim = len(self._sp_x) self.input_dim = len(self._sp_x)
self._real_input_dim = self.input_dim
if output_dim > 1: if output_dim > 1:
self.input_dim += 1 self.input_dim += 1
assert self.input_dim == input_dim assert self.input_dim == input_dim
self.output_dim = output_dim self.output_dim = output_dim
# extract parameter names # extract parameter names
@ -139,8 +139,10 @@ class spkern(Kernpart):
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# This is the basic argument construction for the C code. # This is the basic argument construction for the C code.
arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x]
+ ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z])
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
if self.output_dim>1: if self.output_dim>1:
reverse_arg_list = list(arg_list) reverse_arg_list = list(arg_list)
reverse_arg_list.reverse() reverse_arg_list.reverse()
@ -151,17 +153,21 @@ class spkern(Kernpart):
precompute_list=[] precompute_list=[]
if self.output_dim > 1: if self.output_dim > 1:
reverse_arg_list+=list(param_arg_list) reverse_arg_list+=list(param_arg_list)
split_param_arg_list = ["%s[%s]"%(theta.name[:-2],index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s[%s]"%(theta.name[:-2],index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list reverse_arg_list += split_param_reverse_arg_list
precompute_list += [' '*16+"int %s=(int)%s[%s*input_dim+output_dim];"%(index, var, index2) for index, var, index2 in zip(['ii', 'jj'], ['X', 'Z'], ['i', 'j'])] # Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list) reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list) arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list) precompute_string = "\n".join(precompute_list)
# Here's the code to do the looping for K # Here's the code to do the looping for K
self._K_code =\ self._K_code =\
""" """
// _K_code
// Code for computing the covariance function.
int i; int i;
int j; int j;
int N = target_array->dimensions[0]; int N = target_array->dimensions[0];
@ -171,7 +177,8 @@ class spkern(Kernpart):
for (i=0;i<N;i++){ for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){ for (j=0;j<num_inducing;j++){
%s %s
target[i*num_inducing+j] = k(%s); //target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
} }
} }
%s %s
@ -188,28 +195,33 @@ class spkern(Kernpart):
# Code to do the looping for Kdiag # Code to do the looping for Kdiag
self._Kdiag_code =\ self._Kdiag_code =\
""" """
// _Kdiag_code
// Code for computing diagonal of covariance function.
int i; int i;
int N = target_array->dimensions[0]; int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
//#pragma omp parallel for //#pragma omp parallel for
for (i=0;i<N;i++){ for (i=0;i<N;i++){
%s %s
target[i] = k(%s); //target[i] =
TARGET1(i)=k(%s);
} }
%s %s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients # Code to compute gradients
func_list = [] grad_func_list = []
if self.output_dim>1: if self.output_dim>1:
func_list += [' '*16 + "int %s=(int)%s[%s*input_dim+output_dim];"%(index, var, index2) for index, var, index2 in zip(['ii', 'jj'], ['X', 'Z'], ['i', 'j'])] grad_func_list += c_define_output_indices
func_list += [' '*16 + 'target[%i+ii] += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
func_list += [' '*16 + 'target[%i+jj] += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
func_list += ([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
func_string = '\n'.join(func_list) grad_func_string = '\n'.join(grad_func_list)
self._dK_dtheta_code =\ self._dK_dtheta_code =\
""" """
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i; int i;
int j; int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
@ -222,16 +234,18 @@ class spkern(Kernpart):
} }
} }
%s %s
"""%(func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
# Code to compute gradients for Kdiag TODO: needs clean up # Code to compute gradients for Kdiag TODO: needs clean up
diag_func_string = re.sub('Z','X',func_string,count=0) diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_func_string = re.sub('int jj','//int jj',diag_func_string) diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_func_string = re.sub('j','i',diag_func_string) diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_func_string) diag_grad_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_grad_func_string)
self._dKdiag_dtheta_code =\ self._dKdiag_dtheta_code =\
""" """
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i; int i;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
@ -239,13 +253,19 @@ class spkern(Kernpart):
%s %s
} }
%s %s
"""%(diag_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X # Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arg_string) for q in range(self.input_dim)]) gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._dK_dX_code = \ self._dK_dX_code = \
""" """
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i; int i;
int j; int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
@ -258,24 +278,26 @@ class spkern(Kernpart):
} }
} }
%s %s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0) diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradient_funcs = re.sub('int jj','//int jj',diag_gradient_funcs) diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs) diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs) diag_gradX_func_string = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X # Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \ self._dKdiag_dX_code= \
""" """
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
for (int i=0;i<N; i++){ for (int i=0;i<N; i++){
%s %s
} }
%s %s
"""%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a """%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid # string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't # of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that. # diag_func_string called here? Need to check that.
@ -285,6 +307,9 @@ class spkern(Kernpart):
self._K_code_X = self._K_code.replace('Z[', 'X[') self._K_code_X = self._K_code.replace('Z[', 'X[')
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[') self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[') self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
self._K_code_X = self._K_code.replace('Z2(', 'X2(')
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
#TODO: insert multiple functions here via string manipulation #TODO: insert multiple functions here via string manipulation

View file

@ -609,16 +609,18 @@ def olivetti_faces(data_set='olivetti_faces'):
lbls = np.asarray(lbls)[:, None] lbls = np.asarray(lbls)[:, None]
return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set) return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set)
def olympic_100m_men(data_set='rogers_girolami_data'): def download_rogers_girolami_data():
if not data_available(data_set): if not data_available('rogers_girolami_data'):
download_data(data_set) download_data(data_set)
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz') tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
tar = tarfile.open(tar_file) tar = tarfile.open(tar_file)
print('Extracting file.') print('Extracting file.')
tar.extractall(path=path) tar.extractall(path=path)
tar.close() tar.close()
def olympic_100m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100'] olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100']
X = olympic_data[:, 0][:, None] X = olympic_data[:, 0][:, None]
@ -626,20 +628,45 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set) return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_100m_women(data_set='rogers_girolami_data'): def olympic_100m_women(data_set='rogers_girolami_data'):
if not data_available(data_set): download_rogers_girolami_data()
download_data(data_set)
path = os.path.join(data_path, data_set)
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
tar = tarfile.open(tar_file)
print('Extracting file.')
tar.extractall(path=path)
tar.close()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100'] olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
X = olympic_data[:, 0][:, None] X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None] Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set) return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_200m_women(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female200']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_200m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male200']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Male 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_400m_women(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female400']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_400m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male400']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Male 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_marathon_men(data_set='olympic_marathon_men'): def olympic_marathon_men(data_set='olympic_marathon_men'):
if not data_available(data_set): if not data_available(data_set):
download_data(data_set) download_data(data_set)
@ -648,6 +675,26 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
Y = olympics[:, 1:2] Y = olympics[:, 1:2]
return data_details_return({'X': X, 'Y': Y}, data_set) return data_details_return({'X': X, 'Y': Y}, data_set)
def olympics():
"""All olympics sprint winning times for multiple output prediction."""
X = np.zeros((0, 2))
Y = np.zeros((0, 1))
for i, dataset in enumerate([olympic_100m_men,
olympic_100m_women,
olympic_200m_men,
olympic_200m_women,
olympic_400m_men,
olympic_400m_women]):
data = dataset()
year = data['X']
time = data['Y']
X = np.vstack((X, np.hstack((year, np.ones_like(year)*i))))
Y = np.vstack((Y, time))
data['X'] = X
data['Y'] = Y
data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning."
return data
# def movielens_small(partNo=1,seed=default_seed): # def movielens_small(partNo=1,seed=default_seed):
# np.random.seed(seed=seed) # np.random.seed(seed=seed)