rbf gpu usable

This commit is contained in:
Zhenwen Dai 2014-06-20 18:02:35 +01:00
parent e486f3fd99
commit ca1edecce4
5 changed files with 140 additions and 146 deletions

View file

@ -66,7 +66,6 @@ class VarDTC_GPU(LatentFunctionInference):
'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'), 'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'),
'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN 'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN
'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN 'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN
'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'),
# inference_minibatch # inference_minibatch
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
@ -122,6 +121,89 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise):
num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape
trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
beta_gpu.fill(beta)
betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
YRY_full = trYYT*beta
if kern.useGPU:
psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.)
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start
X_slice = X[n_start:n_end]
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
psi0_full += psi0.sum()
if uncertain_inputs:
sum_axis(psi2_gpu,psi2p_gpu,1,1)
else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
psi0_full *= beta
if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else:
psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2_full += kern.psi2(Z, X_slice)*b
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise:
YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full)
return psi0_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
@ -146,118 +228,10 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
uncertain_inputs = False uncertain_inputs = False
trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu'] psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu'] psi2_gpu = self.gpuCache['psi2_gpu']
beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
if het_noise: psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise)
beta_gpu.set(np.asfortranarray(beta))
mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size)
YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
else:
beta_gpu.fill(beta)
betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
YRY_full = trYYT*beta
if kern.useGPU:
psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.)
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start
X_slice = X[n_start:n_end]
beta_gpu_slice = beta_gpu[n_start:n_end]
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
if ndata==self.batchsize:
psi2_t_gpu_slice = psi2_t_gpu
else:
psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata]
if uncertain_inputs:
psi0p_gpu = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
if het_noise:
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
else:
psi0_full += gpuarray.sum(psi0p_gpu).get()
if uncertain_inputs:
if het_noise:
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
else:
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
else:
if het_noise:
psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata]
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
if not het_noise:
psi0_full *= beta
if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else:
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
if het_noise:
beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
if uncertain_inputs:
if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
else:
psi2_full += psi2
else:
if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
else:
psi2_full += np.outer(psi1.T, psi1.T)
if not het_noise:
psi0_full *= beta
psi1Y_full *= beta
psi2_full *= beta
psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components

View file

@ -73,13 +73,14 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise): def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
if self.batchsize == None: if self.batchsize == None:
self.batchsize = num_data self.batchsize = num_data
het_noise = beta.size > 1
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
@ -89,45 +90,29 @@ class VarDTC_minibatch(LatentFunctionInference):
YRY_full = 0. YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end] Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice) psi2_full += kern.psi2(Z, X_slice)*b
else: else:
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2_full += np.dot(psi1.T,psi1)*b
if het_noise: psi0_full += psi0.sum()*b
beta_slice = beta[n_start:n_end] psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(beta_slice*Y_slice.T,psi1) # DxM
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs:
if het_noise:
psi2_full += beta_slice*psi2
else:
psi2_full += psi2
else:
if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1)
else:
psi2_full += np.dot(psi1.T,psi1)
if not het_noise: if not het_noise:
psi0_full *= beta
psi1Y_full *= beta
psi2_full *= beta
YRY_full = trYYT*beta YRY_full = trYYT*beta
if self.mpi_comm != None: if self.mpi_comm != None:
@ -168,7 +153,7 @@ class VarDTC_minibatch(LatentFunctionInference):
if het_noise: if het_noise:
self.batchsize = 1 self.batchsize = 1
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise) psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components

View file

@ -248,7 +248,6 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'), 'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'), 'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'), 'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'), 'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
@ -265,6 +264,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'), 'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'), 'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
} }
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S): def sync_params(self, lengthscale, Z, mu, S):
if len(lengthscale)==1: if len(lengthscale)==1:
@ -299,7 +302,6 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance) self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
psi0_gpu = self.gpuCache['psi0_gpu']
psi1_gpu = self.gpuCache['psi1_gpu'] psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu'] psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu'] psi2n_gpu = self.gpuCache['psi2n_gpu']
@ -308,14 +310,15 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
mu_gpu = self.gpuCache['mu_gpu'] mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu'] S_gpu = self.gpuCache['S_gpu']
psi0_gpu.fill(variance) psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations(psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1)) self.g_psi1computations(psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
self.g_psi2computations(psi2_gpu, psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1)) self.g_psi2computations(psi2_gpu, psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
if self.GPU_direct: if self.GPU_direct:
return psi0_gpu, psi1_gpu, psi2_gpu return psi0, psi1_gpu, psi2_gpu
else: else:
return psi0_gpu.get(), psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3)) @Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@ -340,17 +343,19 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
if self.GPU_direct: if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1 dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2 dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
else: else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu'] dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu'] dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1)) dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2)) dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative() self.reset_derivative()
self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1)) self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1)) self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
dL_dvar = np.sum(dL_dpsi0) + gpuarray.sum(dvar_gpu).get() dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum) sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get() dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum) sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)

View file

@ -63,6 +63,9 @@ class BayesianGPLVM(SparseGP):
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
inference_method = VarDTC() inference_method = VarDTC()
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)

View file

@ -12,6 +12,9 @@ from ..util import gpu_init
try: try:
from pycuda.reduction import ReductionKernel from pycuda.reduction import ReductionKernel
from pycuda.elementwise import ElementwiseKernel from pycuda.elementwise import ElementwiseKernel
import scikits.cuda.linalg as culinalg
from scikits.cuda import cublas
from scikits.cuda.cula import culaExceptions
# log|A| for A is a low triangle matrix # log|A| for A is a low triangle matrix
# logDiagSum(A, A.shape[0]+1) # logDiagSum(A, A.shape[0]+1)
@ -60,3 +63,27 @@ try:
except: except:
pass pass
def jitchol(A, L, cublas_handle, maxtries=5):
try:
cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
culinalg.cho_factor(L,'L')
except culaExceptions:
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."