mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 19:42:39 +02:00
Part working version of sympy covariance with new params version.
This commit is contained in:
parent
3968d48ba5
commit
339b5caa76
6 changed files with 182 additions and 479 deletions
|
|
@ -4,7 +4,9 @@ from _src.kern import Kern
|
||||||
from _src.linear import Linear
|
from _src.linear import Linear
|
||||||
from _src.brownian import Brownian
|
from _src.brownian import Brownian
|
||||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
|
||||||
#from _src.bias import Bias
|
from _src.sympykern import Sympykern
|
||||||
|
#from _src.kern import kern_test
|
||||||
|
from _src.bias import Bias
|
||||||
#import coregionalize
|
#import coregionalize
|
||||||
#import exponential
|
#import exponential
|
||||||
#import eq_ode1
|
#import eq_ode1
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
|
|
||||||
from kern import Kern
|
from kern import Kern
|
||||||
|
import numpy as np
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X_diag(self, dL_dK, X):
|
def gradients_X_diag(self, dL_dK, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X, X2):
|
||||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
|
@ -125,7 +125,7 @@ class Kern(Parameterized):
|
||||||
from GPy.core.model import Model
|
from GPy.core.model import Model
|
||||||
|
|
||||||
class Kern_check_model(Model):
|
class Kern_check_model(Model):
|
||||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgrad() to be called independently on a kernel."""
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
Model.__init__(self, 'kernel_test_model')
|
Model.__init__(self, 'kernel_test_model')
|
||||||
num_samples = 20
|
num_samples = 20
|
||||||
|
|
@ -165,9 +165,10 @@ class Kern_check_dK_dtheta(Kern_check_model):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
|
||||||
|
target = np.zeros_like(self._get_params())
|
||||||
|
self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2, target)
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class Stationary(Kern):
|
||||||
lengthscale = np.ones(1)
|
lengthscale = np.ones(1)
|
||||||
else:
|
else:
|
||||||
lengthscale = np.asarray(lengthscale)
|
lengthscale = np.asarray(lengthscale)
|
||||||
assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel"
|
assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel"
|
||||||
else:
|
else:
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
lengthscale = np.asarray(lengthscale)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
|
|
||||||
|
|
@ -2,35 +2,17 @@
|
||||||
try:
|
try:
|
||||||
import sympy as sp
|
import sympy as sp
|
||||||
sympy_available=True
|
sympy_available=True
|
||||||
|
from sympy.utilities.lambdify import lambdify
|
||||||
except ImportError:
|
except ImportError:
|
||||||
sympy_available=False
|
sympy_available=False
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
from sympy.core.cache import clear_cache
|
|
||||||
from sympy.utilities.codegen import codegen
|
|
||||||
|
|
||||||
try:
|
|
||||||
from scipy import weave
|
|
||||||
weave_available = True
|
|
||||||
except ImportError:
|
|
||||||
weave_available = False
|
|
||||||
|
|
||||||
import os
|
|
||||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import re
|
from kern import Kern
|
||||||
import tempfile
|
|
||||||
import pdb
|
|
||||||
import ast
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
# TODO have this set up in a set up file!
|
|
||||||
user_code_storage = tempfile.gettempdir()
|
|
||||||
|
|
||||||
class spkern(Kernpart):
|
class Sympykern(Kern):
|
||||||
"""
|
"""
|
||||||
A kernel object, where all the hard work in done by sympy.
|
A kernel object, where all the hard work in done by sympy.
|
||||||
|
|
||||||
|
|
@ -51,10 +33,10 @@ class spkern(Kernpart):
|
||||||
name='sympykern'
|
name='sympykern'
|
||||||
if k is None:
|
if k is None:
|
||||||
raise ValueError, "You must provide an argument for the covariance function."
|
raise ValueError, "You must provide an argument for the covariance function."
|
||||||
super(spkern, self).__init__(input_dim, name)
|
super(Sympykern, self).__init__(input_dim, name)
|
||||||
|
|
||||||
self._sp_k = k
|
self._sp_k = k
|
||||||
|
|
||||||
# pull the variable names out of the symbolic covariance function.
|
# pull the variable names out of the symbolic covariance function.
|
||||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||||
|
|
@ -66,6 +48,10 @@ class spkern(Kernpart):
|
||||||
assert len(self._sp_x)==len(self._sp_z)
|
assert len(self._sp_x)==len(self._sp_z)
|
||||||
x_dim=len(self._sp_x)
|
x_dim=len(self._sp_x)
|
||||||
|
|
||||||
|
self._sp_kdiag = k
|
||||||
|
for x, z in zip(self._sp_x, self._sp_z):
|
||||||
|
self._sp_kdiag = self._sp_kdiag.subs(z, x)
|
||||||
|
|
||||||
# If it is a multi-output covariance, add an input for indexing the outputs.
|
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||||
self._real_input_dim = x_dim
|
self._real_input_dim = x_dim
|
||||||
# Check input dim is number of xs + 1 if output_dim is >1
|
# Check input dim is number of xs + 1 if output_dim is >1
|
||||||
|
|
@ -97,6 +83,8 @@ class spkern(Kernpart):
|
||||||
#setattr(self, theta, np.ones(self.output_dim))
|
#setattr(self, theta, np.ones(self.output_dim))
|
||||||
|
|
||||||
self.num_shared_params = len(self._sp_theta)
|
self.num_shared_params = len(self._sp_theta)
|
||||||
|
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
|
||||||
|
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
|
||||||
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -112,43 +100,33 @@ class spkern(Kernpart):
|
||||||
if param is not None:
|
if param is not None:
|
||||||
if param.has_key(theta):
|
if param.has_key(theta):
|
||||||
val = param[theta]
|
val = param[theta]
|
||||||
#setattr(self, theta.name, val)
|
|
||||||
setattr(self, theta.name, Param(theta.name, val, None))
|
setattr(self, theta.name, Param(theta.name, val, None))
|
||||||
self.add_parameters(getattr(self, theta.name))
|
self.add_parameters(getattr(self, theta.name))
|
||||||
#deal with param
|
#deal with param
|
||||||
#self._set_params(self._get_params())
|
#self._set_params(self._get_params())
|
||||||
|
|
||||||
# Differentiate with respect to parameters.
|
# Differentiate with respect to parameters.
|
||||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
derivative_arguments = self._sp_x + self._sp_theta
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
derivative_arguments += self._sp_theta_i
|
||||||
|
|
||||||
# differentiate with respect to input variables.
|
self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
|
||||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
|
||||||
|
|
||||||
|
|
||||||
|
# This gives the parameters for the arg list.
|
||||||
|
self.arg_list = self._sp_x + self._sp_z + self._sp_theta
|
||||||
|
self.diag_arg_list = self._sp_x + self._sp_theta
|
||||||
|
if self.output_dim > 1:
|
||||||
|
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
||||||
|
self.diag_arg_list += self._sp_theta_i
|
||||||
# psi_stats aren't yet implemented.
|
# psi_stats aren't yet implemented.
|
||||||
if False:
|
if False:
|
||||||
self.compute_psi_stats()
|
self.compute_psi_stats()
|
||||||
|
|
||||||
self._code = {}
|
|
||||||
|
|
||||||
# generate the code for the covariance functions
|
# generate the code for the covariance functions
|
||||||
self._gen_code()
|
self._gen_code()
|
||||||
|
|
||||||
if weave_available:
|
|
||||||
if False:
|
|
||||||
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
|
||||||
else:
|
|
||||||
extra_compile_args = []
|
|
||||||
|
|
||||||
self.weave_kwargs = {
|
|
||||||
'support_code': None, #self._function_code,
|
|
||||||
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
|
|
||||||
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
|
|
||||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
|
|
||||||
'extra_compile_args':extra_compile_args,
|
|
||||||
'extra_link_args':['-lgomp'],
|
|
||||||
'verbose':True}
|
|
||||||
self.parameters_changed() # initializes caches
|
self.parameters_changed() # initializes caches
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -157,407 +135,128 @@ class spkern(Kernpart):
|
||||||
|
|
||||||
def _gen_code(self):
|
def _gen_code(self):
|
||||||
|
|
||||||
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
|
||||||
code_list = [('k',self._sp_k)]
|
for key in self.derivatives.keys():
|
||||||
# gradients with respect to covariance input
|
setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
|
||||||
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
|
||||||
# gradient with respect to parameters
|
self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy')
|
||||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
for key in self.derivatives.keys():
|
||||||
# gradient with respect to multiple output parameters
|
setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
|
||||||
if self.output_dim > 1:
|
|
||||||
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
def K(self,X,X2=None):
|
||||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
self._K_computations(X, X2)
|
||||||
# generate c functions from sympy objects
|
return self._K_function(**self._arguments)
|
||||||
if weave_available:
|
|
||||||
code_type = "C"
|
|
||||||
else:
|
|
||||||
code_type = "PYTHON"
|
|
||||||
# Need to add the sympy_helpers header in here.
|
|
||||||
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
|
||||||
codegen(code_list,
|
|
||||||
code_type,
|
|
||||||
self.name,
|
|
||||||
argument_sequence=argument_sequence)
|
|
||||||
|
|
||||||
|
|
||||||
# Use weave to compute the underlying functions.
|
def Kdiag(self,X):
|
||||||
if weave_available:
|
self._K_computations(X)
|
||||||
# put the header file where we can find it
|
return self._Kdiag_function(**self._diag_arguments)
|
||||||
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
|
|
||||||
f.write(self._function_header)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
if weave_available:
|
|
||||||
# Substitute any known derivatives which sympy doesn't compute
|
|
||||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
|
||||||
# put the cpp file in user code storage (defaults to temp file location)
|
|
||||||
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
|
|
||||||
else:
|
|
||||||
# put the python file in user code storage
|
|
||||||
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
|
|
||||||
f.write(self._function_code)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
if weave_available:
|
|
||||||
# arg_list will store the arguments required for the C code.
|
|
||||||
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
|
||||||
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
|
||||||
|
|
||||||
# for multiple outputs reverse argument list is also required
|
|
||||||
if self.output_dim>1:
|
|
||||||
reverse_input_arg_list = list(input_arg_list)
|
|
||||||
reverse_input_arg_list.reverse()
|
|
||||||
|
|
||||||
# This gives the parameters for the arg list.
|
|
||||||
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
|
||||||
arg_list = input_arg_list + param_arg_list
|
|
||||||
|
|
||||||
precompute_list=[]
|
|
||||||
if self.output_dim > 1:
|
|
||||||
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
|
|
||||||
# For multiple outputs, also need the split parameters.
|
|
||||||
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
|
||||||
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
|
||||||
arg_list += split_param_arg_list
|
|
||||||
reverse_arg_list += split_param_reverse_arg_list
|
|
||||||
# Extract the right output indices from the inputs.
|
|
||||||
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
|
||||||
precompute_list += c_define_output_indices
|
|
||||||
reverse_arg_string = ", ".join(reverse_arg_list)
|
|
||||||
arg_string = ", ".join(arg_list)
|
|
||||||
precompute_string = "\n".join(precompute_list)
|
|
||||||
|
|
||||||
# Now we use the arguments in code that computes the separate parts.
|
|
||||||
|
|
||||||
# Any precomputations will be done here eventually.
|
|
||||||
self._precompute = \
|
|
||||||
"""
|
|
||||||
// Precompute code would go here. It will be called when parameters are updated.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Here's the code to do the looping for K
|
|
||||||
self._code['K'] =\
|
|
||||||
"""
|
|
||||||
// _K_code
|
|
||||||
// Code for computing the covariance function.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int n = target_array->dimensions[0];
|
|
||||||
int num_inducing = target_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
//target[i*num_inducing+j] =
|
|
||||||
TARGET2(i, j) += k(%s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
|
|
||||||
# adding a string representation of the function in the
|
|
||||||
# comment forces recompile when needed
|
|
||||||
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
|
|
||||||
|
|
||||||
|
|
||||||
# Code to compute diagonal of covariance.
|
|
||||||
diag_arg_string = re.sub('Z','X',arg_string)
|
|
||||||
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
|
|
||||||
diag_arg_string = re.sub('j','i',diag_arg_string)
|
|
||||||
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
|
|
||||||
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
|
|
||||||
diag_precompute_string = re.sub('j','i',diag_precompute_string)
|
|
||||||
# Code to do the looping for Kdiag
|
|
||||||
self._code['Kdiag'] =\
|
|
||||||
"""
|
|
||||||
// _code['Kdiag']
|
|
||||||
// Code for computing diagonal of covariance function.
|
|
||||||
int i;
|
|
||||||
int n = target_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
%s
|
|
||||||
//target[i] =
|
|
||||||
TARGET1(i)=k(%s);
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Code to compute gradients
|
|
||||||
if self.output_dim>1:
|
|
||||||
for i, theta in enumerate(self._sp_theta_i):
|
|
||||||
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
|
|
||||||
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
|
|
||||||
grad_func_list = c_define_output_indices+grad_func_list
|
|
||||||
|
|
||||||
grad_func_string = '\n'.join(grad_func_list)
|
|
||||||
self._code['dK_d' + theta.name] =\
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
|
||||||
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
|
||||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
|
||||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
|
||||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
|
||||||
self._code['dKdiag_d' + theta.name] =\
|
|
||||||
"""
|
|
||||||
// _dKdiag_dtheta_code
|
|
||||||
// Code for computing gradient of diagonal with respect to parameters.
|
|
||||||
int i;
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
for i, theta in enumerate(self._sp_theta):
|
|
||||||
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
|
|
||||||
grad_func_string = '\n'.join(grad_func_list)
|
|
||||||
|
|
||||||
self._code['dK_d' + theta.name] =\
|
|
||||||
"""
|
|
||||||
// _dK_dtheta_code
|
|
||||||
// Code for computing gradient of covariance with respect to parameters.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
|
||||||
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
|
||||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
|
||||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
|
||||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
|
||||||
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
|
||||||
self._code['dKdiag_d' + theta.name] =\
|
|
||||||
"""
|
|
||||||
// _dKdiag_dtheta_code
|
|
||||||
// Code for computing gradient of diagonal with respect to parameters.
|
|
||||||
int i;
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (i=0;i<n;i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
|
||||||
gradX_func_list = []
|
|
||||||
if self.output_dim>1:
|
|
||||||
gradX_func_list += c_define_output_indices
|
|
||||||
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
|
||||||
gradX_func_string = "\n".join(gradX_func_list)
|
|
||||||
|
|
||||||
self._code['dK_dX'] = \
|
|
||||||
"""
|
|
||||||
// _dK_dX_code
|
|
||||||
// Code for computing gradient of covariance with respect to inputs.
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<n; i++){
|
|
||||||
for (j=0; j<num_inducing; j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
|
|
||||||
|
|
||||||
|
|
||||||
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
|
||||||
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
|
||||||
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
|
||||||
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
|
|
||||||
|
|
||||||
# Code for gradients of Kdiag wrt X
|
|
||||||
self._code['dKdiag_dX'] = \
|
|
||||||
"""
|
|
||||||
// _dKdiag_dX_code
|
|
||||||
// Code for computing gradient of diagonal with respect to inputs.
|
|
||||||
int n = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (int i=0;i<n; i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
|
||||||
# string representation forces recompile when needed Get rid
|
|
||||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
|
||||||
# diag_func_string called here? Need to check that.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#TODO: insert multiple functions here via string manipulation
|
|
||||||
#TODO: similar functions for psi_stats
|
|
||||||
#TODO: similar functions when cython available.
|
|
||||||
#TODO: similar functions when only python available.
|
|
||||||
|
|
||||||
def _get_arg_names(self, target=None, Z=None, partial=None):
|
|
||||||
arg_names = ['X']
|
|
||||||
if target is not None:
|
|
||||||
arg_names += ['target']
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
arg_names += [shared_params.name]
|
|
||||||
if Z is not None:
|
|
||||||
arg_names += ['Z']
|
|
||||||
if partial is not None:
|
|
||||||
arg_names += ['partial']
|
|
||||||
if self.output_dim>1:
|
|
||||||
arg_names += self._split_theta_names
|
|
||||||
arg_names += ['output_dim']
|
|
||||||
return arg_names
|
|
||||||
|
|
||||||
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
|
|
||||||
output_dim = self.output_dim
|
|
||||||
# Need to extract parameters to local variables first
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
locals()[shared_params.name] = getattr(self, shared_params.name)
|
|
||||||
|
|
||||||
for split_params in self._split_theta_names:
|
|
||||||
locals()[split_params] = np.asarray(getattr(self, split_params))
|
|
||||||
arg_names = self._get_arg_names(target, Z, partial)
|
|
||||||
|
|
||||||
if weave_available:
|
|
||||||
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
|
||||||
else:
|
|
||||||
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
|
|
||||||
|
|
||||||
def K(self,X,Z,target):
|
|
||||||
if Z is None:
|
|
||||||
self._generate_inline(self._code['K_X'], X, target)
|
|
||||||
else:
|
|
||||||
self._generate_inline(self._code['K'], X, target, Z)
|
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
self._generate_inline(self._code['Kdiag'], X, target)
|
|
||||||
|
|
||||||
def _param_grad_helper(self,partial,X,Z,target):
|
def _param_grad_helper(self,partial,X,Z,target):
|
||||||
if Z is None:
|
pass
|
||||||
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
|
|
||||||
else:
|
|
||||||
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,partial,X,target):
|
|
||||||
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
|
|
||||||
|
|
||||||
def gradients_X(self,partial,X,Z,target):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
if Z is None:
|
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||||
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
|
self._K_computations(X, X2)
|
||||||
else:
|
gradients_X = np.zeros((X.shape[0], X.shape[1]))
|
||||||
self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
|
for i, x in enumerate(self._sp_x):
|
||||||
|
gf = getattr(self, '_K_diff_' + x.name)
|
||||||
|
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
|
||||||
|
if X2 is None:
|
||||||
|
gradients_X *= 2
|
||||||
|
return gradients_X
|
||||||
|
|
||||||
def dKdiag_dX(self,partial,X,target):
|
def gradients_X_diag(self, dL_dK, X):
|
||||||
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
|
self._K_computations(X)
|
||||||
|
dX = np.zeros_like(X)
|
||||||
def compute_psi_stats(self):
|
for i, x in enumerate(self._sp_x):
|
||||||
#define some normal distributions
|
gf = getattr(self, '_Kdiag_diff_' + x.name)
|
||||||
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
dX[:, i] = gf(**self._diag_arguments)*dL_dK
|
||||||
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
return dX
|
||||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
|
||||||
|
|
||||||
#do some integration!
|
|
||||||
#self._sp_psi0 = ??
|
|
||||||
self._sp_psi1 = self._sp_k
|
|
||||||
for i in range(self.input_dim):
|
|
||||||
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
|
||||||
sys.stdout.flush()
|
|
||||||
self._sp_psi1 *= normals[i]
|
|
||||||
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
|
||||||
clear_cache()
|
|
||||||
self._sp_psi1 = self._sp_psi1.simplify()
|
|
||||||
|
|
||||||
#and here's psi2 (eek!)
|
|
||||||
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
|
||||||
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
|
||||||
for i in range(self.input_dim):
|
|
||||||
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
|
||||||
sys.stdout.flush()
|
|
||||||
self._sp_psi2 *= normals[i]
|
|
||||||
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
|
||||||
clear_cache()
|
|
||||||
self._sp_psi2 = self._sp_psi2.simplify()
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
# Reset the caches
|
|
||||||
self._cache, self._cache2 = np.empty(shape=(2, 1))
|
|
||||||
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
|
||||||
# Need to extract parameters to local variables first
|
|
||||||
self._K_computations(X, None)
|
|
||||||
for shared_params in self._sp_theta:
|
|
||||||
parameter = getattr(self, shared_params.name)
|
|
||||||
code = self._code['dK_d' + shared_params.name]
|
|
||||||
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
|
||||||
|
|
||||||
for split_params in self._split_theta_names:
|
|
||||||
parameter = getattr(self, split_params.name)
|
|
||||||
code = self._code['dK_d' + split_params.name]
|
|
||||||
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
# Need to extract parameters to local variables first
|
||||||
|
self._K_computations(X, X2)
|
||||||
|
for theta in self._sp_theta:
|
||||||
|
parameter = getattr(self, theta.name)
|
||||||
|
gf = getattr(self, '_K_diff_' + theta.name)
|
||||||
|
gradient = (gf(**self._arguments)*dL_dK).sum()
|
||||||
|
if X2 is not None:
|
||||||
|
gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
|
||||||
|
setattr(parameter, 'gradient', gradient)
|
||||||
|
if self.output_dim>1:
|
||||||
|
for theta in self._sp_theta_i:
|
||||||
|
parameter = getattr(self, theta.name[:-2])
|
||||||
|
gf = getattr(self, '_K_diff_' + theta.name)
|
||||||
|
A = gf(**self._arguments)*dL_dK
|
||||||
|
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
|
||||||
|
for i in np.arange(self.output_dim)])
|
||||||
|
if X2 is None:
|
||||||
|
gradient *= 2
|
||||||
|
else:
|
||||||
|
A = gf(**self._reverse_arguments)*dL_dK.T
|
||||||
|
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
|
||||||
|
for i in np.arange(self.output_dim)])
|
||||||
|
setattr(parameter, 'gradient', gradient)
|
||||||
|
|
||||||
|
|
||||||
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
# #contributions from Kdiag
|
self._K_computations(X)
|
||||||
# self.variance.gradient = np.sum(dL_dKdiag)
|
for theta in self._sp_theta:
|
||||||
|
parameter = getattr(self, theta.name)
|
||||||
# #from Knm
|
gf = getattr(self, '_Kdiag_diff_' + theta.name)
|
||||||
# self._K_computations(X, Z)
|
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
|
||||||
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
if self.output_dim>1:
|
||||||
# if self.ARD:
|
for theta in self._sp_theta_i:
|
||||||
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
parameter = getattr(self, theta.name[:-2])
|
||||||
|
gf = getattr(self, '_Kdiag_diff_' + theta.name)
|
||||||
# else:
|
a = gf(**self._diag_arguments)*dL_dKdiag
|
||||||
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
setattr(parameter, 'gradient',
|
||||||
|
np.asarray([a[np.where(self._output_ind==i)].sum()
|
||||||
# #from Kmm
|
for i in np.arange(self.output_dim)]))
|
||||||
# self._K_computations(Z, None)
|
|
||||||
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
def _K_computations(self, X, X2=None):
|
||||||
# if self.ARD:
|
"""Set up argument lists for the derivatives."""
|
||||||
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
# Could check if this needs doing or not, there could
|
||||||
# else:
|
# definitely be some computational savings by checking for
|
||||||
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
# parameter updates here.
|
||||||
|
self._arguments = {}
|
||||||
|
self._diag_arguments = {}
|
||||||
#---------------------------------------#
|
for i, x in enumerate(self._sp_x):
|
||||||
# Precomputations #
|
self._arguments[x.name] = X[:, i][:, None]
|
||||||
#---------------------------------------#
|
self._diag_arguments[x.name] = X[:, i][:, None]
|
||||||
|
if self.output_dim > 1:
|
||||||
|
self._output_ind = np.asarray(X[:, -1], dtype='int')
|
||||||
|
for i, theta in enumerate(self._sp_theta_i):
|
||||||
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
|
||||||
|
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||||
|
for theta in self._sp_theta:
|
||||||
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||||
|
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||||
|
|
||||||
def _K_computations(self, X, Z):
|
if X2 is not None:
|
||||||
if Z is None:
|
for i, z in enumerate(self._sp_z):
|
||||||
self._generate_inline(self._precompute, X)
|
self._arguments[z.name] = X2[:, i][None, :]
|
||||||
|
if self.output_dim > 1:
|
||||||
|
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
|
||||||
|
for i, theta in enumerate(self._sp_theta_j):
|
||||||
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
|
||||||
else:
|
else:
|
||||||
self._generate_inline(self._precompute, X, Z=Z)
|
for z in self._sp_z:
|
||||||
|
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
|
||||||
|
if self.output_dim > 1:
|
||||||
|
self._output_ind2 = self._output_ind
|
||||||
|
for theta in self._sp_theta_j:
|
||||||
|
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
|
||||||
|
if X2 is not None:
|
||||||
|
# These arguments are needed in gradients when X2 is not equal to X.
|
||||||
|
self._reverse_arguments = self._arguments
|
||||||
|
for x, z in zip(self._sp_x, self._sp_z):
|
||||||
|
self._reverse_arguments[x.name] = self._arguments[z.name].T
|
||||||
|
self._reverse_arguments[z.name] = self._arguments[x.name].T
|
||||||
|
if self.output_dim > 1:
|
||||||
|
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
|
||||||
|
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||||
|
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -62,41 +62,41 @@ def force_F_ordered(A):
|
||||||
print "why are your arrays not F order?"
|
print "why are your arrays not F order?"
|
||||||
return np.asfortranarray(A)
|
return np.asfortranarray(A)
|
||||||
|
|
||||||
|
# def jitchol(A, maxtries=5):
|
||||||
|
# A = force_F_ordered_symmetric(A)
|
||||||
|
# L, info = lapack.dpotrf(A, lower=1)
|
||||||
|
# if info == 0:
|
||||||
|
# return L
|
||||||
|
# else:
|
||||||
|
# if maxtries==0:
|
||||||
|
# raise linalg.LinAlgError, "not positive definite, even with jitter."
|
||||||
|
# diagA = np.diag(A)
|
||||||
|
# if np.any(diagA <= 0.):
|
||||||
|
# raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
|
||||||
|
# jitter = diagA.mean() * 1e-6
|
||||||
|
|
||||||
|
# return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
|
||||||
|
|
||||||
def jitchol(A, maxtries=5):
|
def jitchol(A, maxtries=5):
|
||||||
A = force_F_ordered_symmetric(A)
|
A = np.ascontiguousarray(A)
|
||||||
L, info = lapack.dpotrf(A, lower=1)
|
L, info = lapack.dpotrf(A, lower=1)
|
||||||
if info == 0:
|
if info == 0:
|
||||||
return L
|
return L
|
||||||
else:
|
else:
|
||||||
if maxtries==0:
|
diagA = np.diag(A)
|
||||||
raise linalg.LinAlgError, "not positive definite, even with jitter."
|
if np.any(diagA <= 0.):
|
||||||
diagA = np.diag(A)
|
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
|
||||||
if np.any(diagA <= 0.):
|
jitter = diagA.mean() * 1e-6
|
||||||
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
|
while maxtries > 0 and np.isfinite(jitter):
|
||||||
jitter = diagA.mean() * 1e-6
|
print 'Warning: adding jitter of {:.10e}'.format(jitter)
|
||||||
|
try:
|
||||||
|
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
|
||||||
|
except:
|
||||||
|
jitter *= 10
|
||||||
|
finally:
|
||||||
|
maxtries -= 1
|
||||||
|
raise linalg.LinAlgError, "not positive definite, even with jitter."
|
||||||
|
|
||||||
return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
|
|
||||||
|
|
||||||
#def jitchol(A, maxtries=5):
|
|
||||||
# A = np.ascontiguousarray(A)
|
|
||||||
# L, info = lapack.dpotrf(A, lower=1)
|
|
||||||
# if info == 0:
|
|
||||||
# return L
|
|
||||||
# else:
|
|
||||||
# diagA = np.diag(A)
|
|
||||||
# if np.any(diagA <= 0.):
|
|
||||||
# raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
|
|
||||||
# jitter = diagA.mean() * 1e-6
|
|
||||||
# while maxtries > 0 and np.isfinite(jitter):
|
|
||||||
# print 'Warning: adding jitter of {:.10e}'.format(jitter)
|
|
||||||
# try:
|
|
||||||
# return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
|
|
||||||
# except:
|
|
||||||
# jitter *= 10
|
|
||||||
# finally:
|
|
||||||
# maxtries -= 1
|
|
||||||
# raise linalg.LinAlgError, "not positive definite, even with jitter."
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue