Part working version of sympy covariance with new params version.

This commit is contained in:
Neil Lawrence 2014-02-24 21:16:26 +00:00
parent 3968d48ba5
commit 339b5caa76
6 changed files with 182 additions and 479 deletions

View file

@ -4,7 +4,9 @@ from _src.kern import Kern
from _src.linear import Linear from _src.linear import Linear
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad from _src.stationary import Exponential, Matern32, Matern52, ExpQuad
#from _src.bias import Bias from _src.sympykern import Sympykern
#from _src.kern import kern_test
from _src.bias import Bias
#import coregionalize #import coregionalize
#import exponential #import exponential
#import eq_ode1 #import eq_ode1

View file

@ -3,6 +3,7 @@
from kern import Kern from kern import Kern
import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp

View file

@ -36,7 +36,7 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
@ -125,7 +125,7 @@ class Kern(Parameterized):
from GPy.core.model import Model from GPy.core.model import Model
class Kern_check_model(Model): class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgrad() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model') Model.__init__(self, 'kernel_test_model')
num_samples = 20 num_samples = 20
@ -165,9 +165,10 @@ class Kern_check_dK_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
target = np.zeros_like(self._get_params())
self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2, target)
return target

View file

@ -18,7 +18,7 @@ class Stationary(Kern):
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel" assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel"
else: else:
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)

View file

@ -2,35 +2,17 @@
try: try:
import sympy as sp import sympy as sp
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify
except ImportError: except ImportError:
sympy_available=False sympy_available=False
exit() exit()
from sympy.core.cache import clear_cache
from sympy.utilities.codegen import codegen
try:
from scipy import weave
weave_available = True
except ImportError:
weave_available = False
import os
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import sys
import numpy as np import numpy as np
import re from kern import Kern
import tempfile
import pdb
import ast
from kernpart import Kernpart
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
# TODO have this set up in a set up file!
user_code_storage = tempfile.gettempdir()
class spkern(Kernpart): class Sympykern(Kern):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work in done by sympy.
@ -51,7 +33,7 @@ class spkern(Kernpart):
name='sympykern' name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(spkern, self).__init__(input_dim, name) super(Sympykern, self).__init__(input_dim, name)
self._sp_k = k self._sp_k = k
@ -66,6 +48,10 @@ class spkern(Kernpart):
assert len(self._sp_x)==len(self._sp_z) assert len(self._sp_x)==len(self._sp_z)
x_dim=len(self._sp_x) x_dim=len(self._sp_x)
self._sp_kdiag = k
for x, z in zip(self._sp_x, self._sp_z):
self._sp_kdiag = self._sp_kdiag.subs(z, x)
# If it is a multi-output covariance, add an input for indexing the outputs. # If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1 # Check input dim is number of xs + 1 if output_dim is >1
@ -97,6 +83,8 @@ class spkern(Kernpart):
#setattr(self, theta, np.ones(self.output_dim)) #setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sp_theta)
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else: else:
@ -112,43 +100,33 @@ class spkern(Kernpart):
if param is not None: if param is not None:
if param.has_key(theta): if param.has_key(theta):
val = param[theta] val = param[theta]
#setattr(self, theta.name, val)
setattr(self, theta.name, Param(theta.name, val, None)) setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
#deal with param #deal with param
#self._set_params(self._get_params()) #self._set_params(self._get_params())
# Differentiate with respect to parameters. # Differentiate with respect to parameters.
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] derivative_arguments = self._sp_x + self._sp_theta
if self.output_dim > 1: if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] derivative_arguments += self._sp_theta_i
# differentiate with respect to input variables. self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
# This gives the parameters for the arg list.
self.arg_list = self._sp_x + self._sp_z + self._sp_theta
self.diag_arg_list = self._sp_x + self._sp_theta
if self.output_dim > 1:
self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()
self._code = {}
# generate the code for the covariance functions # generate the code for the covariance functions
self._gen_code() self._gen_code()
if weave_available:
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code': None, #self._function_code,
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
self.parameters_changed() # initializes caches self.parameters_changed() # initializes caches
@ -157,407 +135,128 @@ class spkern(Kernpart):
def _gen_code(self): def _gen_code(self):
argument_sequence = self._sp_x+self._sp_z+self._sp_theta self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
code_list = [('k',self._sp_k)] for key in self.derivatives.keys():
# gradients with respect to covariance input setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy')
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] for key in self.derivatives.keys():
# gradient with respect to multiple output parameters setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j def K(self,X,X2=None):
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] self._K_computations(X, X2)
# generate c functions from sympy objects return self._K_function(**self._arguments)
if weave_available:
code_type = "C"
else:
code_type = "PYTHON"
# Need to add the sympy_helpers header in here.
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list,
code_type,
self.name,
argument_sequence=argument_sequence)
# Use weave to compute the underlying functions. def Kdiag(self,X):
if weave_available: self._K_computations(X)
# put the header file where we can find it return self._Kdiag_function(**self._diag_arguments)
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
f.write(self._function_header)
f.close()
if weave_available:
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# put the cpp file in user code storage (defaults to temp file location)
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
else:
# put the python file in user code storage
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
f.write(self._function_code)
f.close()
if weave_available:
# arg_list will store the arguments required for the C code.
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs reverse argument list is also required
if self.output_dim>1:
reverse_input_arg_list = list(input_arg_list)
reverse_input_arg_list.reverse()
# This gives the parameters for the arg list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list = input_arg_list + param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
# For multiple outputs, also need the split parameters.
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Now we use the arguments in code that computes the separate parts.
# Any precomputations will be done here eventually.
self._precompute = \
"""
// Precompute code would go here. It will be called when parameters are updated.
"""
# Here's the code to do the looping for K
self._code['K'] =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int n = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
# adding a string representation of the function in the
# comment forces recompile when needed
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
# Code to compute diagonal of covariance.
diag_arg_string = re.sub('Z','X',arg_string)
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
diag_precompute_string = re.sub('j','i',diag_precompute_string)
# Code to do the looping for Kdiag
self._code['Kdiag'] =\
"""
// _code['Kdiag']
// Code for computing diagonal of covariance function.
int i;
int n = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<n;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
if self.output_dim>1:
for i, theta in enumerate(self._sp_theta_i):
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
grad_func_list = c_define_output_indices+grad_func_list
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
for i, theta in enumerate(self._sp_theta):
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._code['dK_dX'] = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._code['dKdiag_dX'] = \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<n; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
#TODO: similar functions when cython available.
#TODO: similar functions when only python available.
def _get_arg_names(self, target=None, Z=None, partial=None):
arg_names = ['X']
if target is not None:
arg_names += ['target']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
output_dim = self.output_dim
# Need to extract parameters to local variables first
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
for split_params in self._split_theta_names:
locals()[split_params] = np.asarray(getattr(self, split_params))
arg_names = self._get_arg_names(target, Z, partial)
if weave_available:
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
else:
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
def K(self,X,Z,target):
if Z is None:
self._generate_inline(self._code['K_X'], X, target)
else:
self._generate_inline(self._code['K'], X, target, Z)
def Kdiag(self,X,target):
self._generate_inline(self._code['Kdiag'], X, target)
def _param_grad_helper(self,partial,X,Z,target): def _param_grad_helper(self,partial,X,Z,target):
if Z is None: pass
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
def gradients_X(self,partial,X,Z,target): def gradients_X(self, dL_dK, X, X2=None):
if Z is None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial) self._K_computations(X, X2)
else: gradients_X = np.zeros((X.shape[0], X.shape[1]))
self._generate_inline(self._code['dK_dX'], X, target, Z, partial) for i, x in enumerate(self._sp_x):
gf = getattr(self, '_K_diff_' + x.name)
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
if X2 is None:
gradients_X *= 2
return gradients_X
def dKdiag_dX(self,partial,X,target): def gradients_X_diag(self, dL_dK, X):
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial) self._K_computations(X)
dX = np.zeros_like(X)
for i, x in enumerate(self._sp_x):
gf = getattr(self, '_Kdiag_diff_' + x.name)
dX[:, i] = gf(**self._diag_arguments)*dL_dK
return dX
def compute_psi_stats(self): def update_gradients_full(self, dL_dK, X, X2=None):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def parameters_changed(self):
# Reset the caches
self._cache, self._cache2 = np.empty(shape=(2, 1))
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
def update_gradients_full(self, dL_dK, X):
# Need to extract parameters to local variables first # Need to extract parameters to local variables first
self._K_computations(X, None) self._K_computations(X, X2)
for shared_params in self._sp_theta: for theta in self._sp_theta:
parameter = getattr(self, shared_params.name) parameter = getattr(self, theta.name)
code = self._code['dK_d' + shared_params.name] gf = getattr(self, '_K_diff_' + theta.name)
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) gradient = (gf(**self._arguments)*dL_dK).sum()
if X2 is not None:
for split_params in self._split_theta_names: gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
parameter = getattr(self, split_params.name) setattr(parameter, 'gradient', gradient)
code = self._code['dK_d' + split_params.name] if self.output_dim>1:
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) for theta in self._sp_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_K_diff_' + theta.name)
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): A = gf(**self._arguments)*dL_dK
# #contributions from Kdiag gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
# self.variance.gradient = np.sum(dL_dKdiag) for i in np.arange(self.output_dim)])
if X2 is None:
# #from Knm gradient *= 2
# self._K_computations(X, Z)
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
# else:
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
# #from Kmm
# self._K_computations(Z, None)
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
# else:
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, Z):
if Z is None:
self._generate_inline(self._precompute, X)
else: else:
self._generate_inline(self._precompute, X, Z=Z) A = gf(**self._reverse_arguments)*dL_dK.T
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
for i in np.arange(self.output_dim)])
setattr(parameter, 'gradient', gradient)
def update_gradients_diag(self, dL_dKdiag, X):
self._K_computations(X)
for theta in self._sp_theta:
parameter = getattr(self, theta.name)
gf = getattr(self, '_Kdiag_diff_' + theta.name)
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
if self.output_dim>1:
for theta in self._sp_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_Kdiag_diff_' + theta.name)
a = gf(**self._diag_arguments)*dL_dKdiag
setattr(parameter, 'gradient',
np.asarray([a[np.where(self._output_ind==i)].sum()
for i in np.arange(self.output_dim)]))
def _K_computations(self, X, X2=None):
"""Set up argument lists for the derivatives."""
# Could check if this needs doing or not, there could
# definitely be some computational savings by checking for
# parameter updates here.
self._arguments = {}
self._diag_arguments = {}
for i, x in enumerate(self._sp_x):
self._arguments[x.name] = X[:, i][:, None]
self._diag_arguments[x.name] = X[:, i][:, None]
if self.output_dim > 1:
self._output_ind = np.asarray(X[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_i):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
self._diag_arguments[theta.name] = self._arguments[theta.name]
for theta in self._sp_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
self._diag_arguments[theta.name] = self._arguments[theta.name]
if X2 is not None:
for i, z in enumerate(self._sp_z):
self._arguments[z.name] = X2[:, i][None, :]
if self.output_dim > 1:
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_j):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
else:
for z in self._sp_z:
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
if self.output_dim > 1:
self._output_ind2 = self._output_ind
for theta in self._sp_theta_j:
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
if X2 is not None:
# These arguments are needed in gradients when X2 is not equal to X.
self._reverse_arguments = self._arguments
for x, z in zip(self._sp_x, self._sp_z):
self._reverse_arguments[x.name] = self._arguments[z.name].T
self._reverse_arguments[z.name] = self._arguments[x.name].T
if self.output_dim > 1:
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T

View file

@ -62,41 +62,41 @@ def force_F_ordered(A):
print "why are your arrays not F order?" print "why are your arrays not F order?"
return np.asfortranarray(A) return np.asfortranarray(A)
def jitchol(A, maxtries=5):
A = force_F_ordered_symmetric(A)
L, info = lapack.dpotrf(A, lower=1)
if info == 0:
return L
else:
if maxtries==0:
raise linalg.LinAlgError, "not positive definite, even with jitter."
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
# def jitchol(A, maxtries=5): # def jitchol(A, maxtries=5):
# A = np.ascontiguousarray(A) # A = force_F_ordered_symmetric(A)
# L, info = lapack.dpotrf(A, lower=1) # L, info = lapack.dpotrf(A, lower=1)
# if info == 0: # if info == 0:
# return L # return L
# else: # else:
# if maxtries==0:
# raise linalg.LinAlgError, "not positive definite, even with jitter."
# diagA = np.diag(A) # diagA = np.diag(A)
# if np.any(diagA <= 0.): # if np.any(diagA <= 0.):
# raise linalg.LinAlgError, "not pd: non-positive diagonal elements" # raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
# jitter = diagA.mean() * 1e-6 # jitter = diagA.mean() * 1e-6
# while maxtries > 0 and np.isfinite(jitter):
# print 'Warning: adding jitter of {:.10e}'.format(jitter) # return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
# try:
# return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) def jitchol(A, maxtries=5):
# except: A = np.ascontiguousarray(A)
# jitter *= 10 L, info = lapack.dpotrf(A, lower=1)
# finally: if info == 0:
# maxtries -= 1 return L
# raise linalg.LinAlgError, "not positive definite, even with jitter." else:
# diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."