diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index e5dc6d35..35655196 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -4,7 +4,9 @@ from _src.kern import Kern from _src.linear import Linear from _src.brownian import Brownian from _src.stationary import Exponential, Matern32, Matern52, ExpQuad -#from _src.bias import Bias +from _src.sympykern import Sympykern +#from _src.kern import kern_test +from _src.bias import Bias #import coregionalize #import exponential #import eq_ode1 diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py index d45561f8..845f121f 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/bias.py @@ -3,6 +3,7 @@ from kern import Kern +import numpy as np from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 3ef231b3..5a2238d0 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -36,7 +36,7 @@ class Kern(Parameterized): raise NotImplementedError def gradients_X_diag(self, dL_dK, X): raise NotImplementedError - def update_gradients_full(self, dL_dK, X): + def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): @@ -125,7 +125,7 @@ class Kern(Parameterized): from GPy.core.model import Model class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" + """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgrad() to be called independently on a kernel.""" def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): Model.__init__(self, 'kernel_test_model') num_samples = 20 @@ -165,9 +165,10 @@ class Kern_check_dK_dtheta(Kern_check_model): Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) def _log_likelihood_gradients(self): - return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) - - + + target = np.zeros_like(self._get_params()) + self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2, target) + return target diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 7cc2e695..a6ff9424 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -18,7 +18,7 @@ class Stationary(Kern): lengthscale = np.ones(1) else: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1 "Only lengthscale needed for non-ARD kernel" + assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" else: if lengthscale is not None: lengthscale = np.asarray(lengthscale) diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py index 3d6517a8..cf6838c4 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/sympykern.py @@ -2,35 +2,17 @@ try: import sympy as sp sympy_available=True + from sympy.utilities.lambdify import lambdify except ImportError: sympy_available=False exit() -from sympy.core.cache import clear_cache -from sympy.utilities.codegen import codegen - -try: - from scipy import weave - weave_available = True -except ImportError: - weave_available = False - -import os -current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) -import sys import numpy as np -import re -import tempfile -import pdb -import ast - -from kernpart import Kernpart +from kern import Kern from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -# TODO have this set up in a set up file! -user_code_storage = tempfile.gettempdir() -class spkern(Kernpart): +class Sympykern(Kern): """ A kernel object, where all the hard work in done by sympy. @@ -51,10 +33,10 @@ class spkern(Kernpart): name='sympykern' if k is None: raise ValueError, "You must provide an argument for the covariance function." - super(spkern, self).__init__(input_dim, name) + super(Sympykern, self).__init__(input_dim, name) self._sp_k = k - + # pull the variable names out of the symbolic covariance function. sp_vars = [e for e in k.atoms() if e.is_Symbol] self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) @@ -66,6 +48,10 @@ class spkern(Kernpart): assert len(self._sp_x)==len(self._sp_z) x_dim=len(self._sp_x) + self._sp_kdiag = k + for x, z in zip(self._sp_x, self._sp_z): + self._sp_kdiag = self._sp_kdiag.subs(z, x) + # If it is a multi-output covariance, add an input for indexing the outputs. self._real_input_dim = x_dim # Check input dim is number of xs + 1 if output_dim is >1 @@ -97,6 +83,8 @@ class spkern(Kernpart): #setattr(self, theta, np.ones(self.output_dim)) self.num_shared_params = len(self._sp_theta) + for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): + self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i) #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim else: @@ -112,43 +100,33 @@ class spkern(Kernpart): if param is not None: if param.has_key(theta): val = param[theta] - #setattr(self, theta.name, val) setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) #deal with param #self._set_params(self._get_params()) # Differentiate with respect to parameters. - self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] + derivative_arguments = self._sp_x + self._sp_theta if self.output_dim > 1: - self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] - - # differentiate with respect to input variables. - self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] - + derivative_arguments += self._sp_theta_i + + self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments} + self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments} + + + # This gives the parameters for the arg list. + self.arg_list = self._sp_x + self._sp_z + self._sp_theta + self.diag_arg_list = self._sp_x + self._sp_theta + if self.output_dim > 1: + self.arg_list += self._sp_theta_i + self._sp_theta_j + self.diag_arg_list += self._sp_theta_i # psi_stats aren't yet implemented. if False: self.compute_psi_stats() - self._code = {} - # generate the code for the covariance functions self._gen_code() - if weave_available: - if False: - extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] - else: - extra_compile_args = [] - - self.weave_kwargs = { - 'support_code': None, #self._function_code, - 'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')], - 'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'], - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')], - 'extra_compile_args':extra_compile_args, - 'extra_link_args':['-lgomp'], - 'verbose':True} self.parameters_changed() # initializes caches @@ -157,407 +135,128 @@ class spkern(Kernpart): def _gen_code(self): - argument_sequence = self._sp_x+self._sp_z+self._sp_theta - code_list = [('k',self._sp_k)] - # gradients with respect to covariance input - code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)] - # gradient with respect to parameters - code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] - # gradient with respect to multiple output parameters - if self.output_dim > 1: - argument_sequence += self._sp_theta_i + self._sp_theta_j - code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] - # generate c functions from sympy objects - if weave_available: - code_type = "C" - else: - code_type = "PYTHON" - # Need to add the sympy_helpers header in here. - (foo_c,self._function_code), (foo_h,self._function_header) = \ - codegen(code_list, - code_type, - self.name, - argument_sequence=argument_sequence) + self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy') + for key in self.derivatives.keys(): + setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy')) + + self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy') + for key in self.derivatives.keys(): + setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy')) + + def K(self,X,X2=None): + self._K_computations(X, X2) + return self._K_function(**self._arguments) - # Use weave to compute the underlying functions. - if weave_available: - # put the header file where we can find it - f = file(os.path.join(user_code_storage, self.name + '.h'),'w') - f.write(self._function_header) - f.close() + def Kdiag(self,X): + self._K_computations(X) + return self._Kdiag_function(**self._diag_arguments) - - if weave_available: - # Substitute any known derivatives which sympy doesn't compute - self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - # put the cpp file in user code storage (defaults to temp file location) - f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w') - else: - # put the python file in user code storage - f = file(os.path.join(user_code_storage, self.name + '.py'),'w') - f.write(self._function_code) - f.close() - - if weave_available: - # arg_list will store the arguments required for the C code. - input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] - + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - - # for multiple outputs reverse argument list is also required - if self.output_dim>1: - reverse_input_arg_list = list(input_arg_list) - reverse_input_arg_list.reverse() - - # This gives the parameters for the arg list. - param_arg_list = [shared_params.name for shared_params in self._sp_theta] - arg_list = input_arg_list + param_arg_list - - precompute_list=[] - if self.output_dim > 1: - reverse_arg_list= reverse_input_arg_list + list(param_arg_list) - # For multiple outputs, also need the split parameters. - split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] - split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] - arg_list += split_param_arg_list - reverse_arg_list += split_param_reverse_arg_list - # Extract the right output indices from the inputs. - c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] - precompute_list += c_define_output_indices - reverse_arg_string = ", ".join(reverse_arg_list) - arg_string = ", ".join(arg_list) - precompute_string = "\n".join(precompute_list) - - # Now we use the arguments in code that computes the separate parts. - - # Any precomputations will be done here eventually. - self._precompute = \ - """ - // Precompute code would go here. It will be called when parameters are updated. - """ - - # Here's the code to do the looping for K - self._code['K'] =\ - """ - // _K_code - // Code for computing the covariance function. - int i; - int j; - int n = target_array->dimensions[0]; - int num_inducing = target_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for - for (i=0;i1: - for i, theta in enumerate(self._sp_theta_i): - grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)] - grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)] - grad_func_list = c_define_output_indices+grad_func_list - - grad_func_string = '\n'.join(grad_func_list) - self._code['dK_d' + theta.name] =\ - """ - int i; - int j; - int n = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;idimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;i1: - gradX_func_list += c_define_output_indices - gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] - gradX_func_string = "\n".join(gradX_func_list) - - self._code['dK_dX'] = \ - """ - // _dK_dX_code - // Code for computing gradient of covariance with respect to inputs. - int i; - int j; - int n = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (int i=0;i1: - arg_names += self._split_theta_names - arg_names += ['output_dim'] - return arg_names - - def _generate_inline(self, code, X, target=None, Z=None, partial=None): - output_dim = self.output_dim - # Need to extract parameters to local variables first - for shared_params in self._sp_theta: - locals()[shared_params.name] = getattr(self, shared_params.name) - - for split_params in self._split_theta_names: - locals()[split_params] = np.asarray(getattr(self, split_params)) - arg_names = self._get_arg_names(target, Z, partial) - - if weave_available: - return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs) - else: - raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented') - - def K(self,X,Z,target): - if Z is None: - self._generate_inline(self._code['K_X'], X, target) - else: - self._generate_inline(self._code['K'], X, target, Z) - - - def Kdiag(self,X,target): - self._generate_inline(self._code['Kdiag'], X, target) - def _param_grad_helper(self,partial,X,Z,target): - if Z is None: - self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial) - else: - self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial) + pass - def dKdiag_dtheta(self,partial,X,target): - self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name) - def gradients_X(self,partial,X,Z,target): - if Z is None: - self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial) - else: - self._generate_inline(self._code['dK_dX'], X, target, Z, partial) + def gradients_X(self, dL_dK, X, X2=None): + #if self._X is None or X.base is not self._X.base or X2 is not None: + self._K_computations(X, X2) + gradients_X = np.zeros((X.shape[0], X.shape[1])) + for i, x in enumerate(self._sp_x): + gf = getattr(self, '_K_diff_' + x.name) + gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1) + if X2 is None: + gradients_X *= 2 + return gradients_X - def dKdiag_dX(self,partial,X,target): - self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial) - - def compute_psi_stats(self): - #define some normal distributions - mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] - Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] - normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] - - #do some integration! - #self._sp_psi0 = ?? - self._sp_psi1 = self._sp_k - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi1 *= normals[i] - self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi1 = self._sp_psi1.simplify() - - #and here's psi2 (eek!) - zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] - self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi2 *= normals[i] - self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi2 = self._sp_psi2.simplify() - - def parameters_changed(self): - # Reset the caches - self._cache, self._cache2 = np.empty(shape=(2, 1)) - self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1)) - - def update_gradients_full(self, dL_dK, X): - # Need to extract parameters to local variables first - self._K_computations(X, None) - for shared_params in self._sp_theta: - parameter = getattr(self, shared_params.name) - code = self._code['dK_d' + shared_params.name] - setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) - - for split_params in self._split_theta_names: - parameter = getattr(self, split_params.name) - code = self._code['dK_d' + split_params.name] - setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) + def gradients_X_diag(self, dL_dK, X): + self._K_computations(X) + dX = np.zeros_like(X) + for i, x in enumerate(self._sp_x): + gf = getattr(self, '_Kdiag_diff_' + x.name) + dX[:, i] = gf(**self._diag_arguments)*dL_dK + return dX + def update_gradients_full(self, dL_dK, X, X2=None): + # Need to extract parameters to local variables first + self._K_computations(X, X2) + for theta in self._sp_theta: + parameter = getattr(self, theta.name) + gf = getattr(self, '_K_diff_' + theta.name) + gradient = (gf(**self._arguments)*dL_dK).sum() + if X2 is not None: + gradient += (gf(**self._reverse_arguments)*dL_dK).sum() + setattr(parameter, 'gradient', gradient) + if self.output_dim>1: + for theta in self._sp_theta_i: + parameter = getattr(self, theta.name[:-2]) + gf = getattr(self, '_K_diff_' + theta.name) + A = gf(**self._arguments)*dL_dK + gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum() + for i in np.arange(self.output_dim)]) + if X2 is None: + gradient *= 2 + else: + A = gf(**self._reverse_arguments)*dL_dK.T + gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum() + for i in np.arange(self.output_dim)]) + setattr(parameter, 'gradient', gradient) + - # def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - # #contributions from Kdiag - # self.variance.gradient = np.sum(dL_dKdiag) - - # #from Knm - # self._K_computations(X, Z) - # self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) - # if self.ARD: - # self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) - - # else: - # self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) - - # #from Kmm - # self._K_computations(Z, None) - # self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - # if self.ARD: - # self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - # else: - # self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - - - #---------------------------------------# - # Precomputations # - #---------------------------------------# + def update_gradients_diag(self, dL_dKdiag, X): + self._K_computations(X) + for theta in self._sp_theta: + parameter = getattr(self, theta.name) + gf = getattr(self, '_Kdiag_diff_' + theta.name) + setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum()) + if self.output_dim>1: + for theta in self._sp_theta_i: + parameter = getattr(self, theta.name[:-2]) + gf = getattr(self, '_Kdiag_diff_' + theta.name) + a = gf(**self._diag_arguments)*dL_dKdiag + setattr(parameter, 'gradient', + np.asarray([a[np.where(self._output_ind==i)].sum() + for i in np.arange(self.output_dim)])) + + def _K_computations(self, X, X2=None): + """Set up argument lists for the derivatives.""" + # Could check if this needs doing or not, there could + # definitely be some computational savings by checking for + # parameter updates here. + self._arguments = {} + self._diag_arguments = {} + for i, x in enumerate(self._sp_x): + self._arguments[x.name] = X[:, i][:, None] + self._diag_arguments[x.name] = X[:, i][:, None] + if self.output_dim > 1: + self._output_ind = np.asarray(X[:, -1], dtype='int') + for i, theta in enumerate(self._sp_theta_i): + self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None] + self._diag_arguments[theta.name] = self._arguments[theta.name] + for theta in self._sp_theta: + self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) + self._diag_arguments[theta.name] = self._arguments[theta.name] - def _K_computations(self, X, Z): - if Z is None: - self._generate_inline(self._precompute, X) + if X2 is not None: + for i, z in enumerate(self._sp_z): + self._arguments[z.name] = X2[:, i][None, :] + if self.output_dim > 1: + self._output_ind2 = np.asarray(X2[:, -1], dtype='int') + for i, theta in enumerate(self._sp_theta_j): + self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :] else: - self._generate_inline(self._precompute, X, Z=Z) + for z in self._sp_z: + self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T + if self.output_dim > 1: + self._output_ind2 = self._output_ind + for theta in self._sp_theta_j: + self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T + if X2 is not None: + # These arguments are needed in gradients when X2 is not equal to X. + self._reverse_arguments = self._arguments + for x, z in zip(self._sp_x, self._sp_z): + self._reverse_arguments[x.name] = self._arguments[z.name].T + self._reverse_arguments[z.name] = self._arguments[x.name].T + if self.output_dim > 1: + for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): + self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T + self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T + diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 22b4f86c..f2b372be 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -62,41 +62,41 @@ def force_F_ordered(A): print "why are your arrays not F order?" return np.asfortranarray(A) +# def jitchol(A, maxtries=5): +# A = force_F_ordered_symmetric(A) +# L, info = lapack.dpotrf(A, lower=1) +# if info == 0: +# return L +# else: +# if maxtries==0: +# raise linalg.LinAlgError, "not positive definite, even with jitter." +# diagA = np.diag(A) +# if np.any(diagA <= 0.): +# raise linalg.LinAlgError, "not pd: non-positive diagonal elements" +# jitter = diagA.mean() * 1e-6 + +# return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1) + def jitchol(A, maxtries=5): - A = force_F_ordered_symmetric(A) - L, info = lapack.dpotrf(A, lower=1) - if info == 0: - return L - else: - if maxtries==0: - raise linalg.LinAlgError, "not positive definite, even with jitter." - diagA = np.diag(A) - if np.any(diagA <= 0.): - raise linalg.LinAlgError, "not pd: non-positive diagonal elements" - jitter = diagA.mean() * 1e-6 + A = np.ascontiguousarray(A) + L, info = lapack.dpotrf(A, lower=1) + if info == 0: + return L + else: + diagA = np.diag(A) + if np.any(diagA <= 0.): + raise linalg.LinAlgError, "not pd: non-positive diagonal elements" + jitter = diagA.mean() * 1e-6 + while maxtries > 0 and np.isfinite(jitter): + print 'Warning: adding jitter of {:.10e}'.format(jitter) + try: + return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) + except: + jitter *= 10 + finally: + maxtries -= 1 + raise linalg.LinAlgError, "not positive definite, even with jitter." - return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1) - -#def jitchol(A, maxtries=5): -# A = np.ascontiguousarray(A) -# L, info = lapack.dpotrf(A, lower=1) -# if info == 0: -# return L -# else: -# diagA = np.diag(A) -# if np.any(diagA <= 0.): -# raise linalg.LinAlgError, "not pd: non-positive diagonal elements" -# jitter = diagA.mean() * 1e-6 -# while maxtries > 0 and np.isfinite(jitter): -# print 'Warning: adding jitter of {:.10e}'.format(jitter) -# try: -# return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) -# except: -# jitter *= 10 -# finally: -# maxtries -= 1 -# raise linalg.LinAlgError, "not positive definite, even with jitter." -#