This commit is contained in:
Zhenwen Dai 2014-05-08 13:08:16 +01:00
parent 06336bf0d4
commit ddbf15d763
6 changed files with 122 additions and 86 deletions

View file

@ -15,7 +15,7 @@ try:
import scikits.cuda.linalg as culinalg import scikits.cuda.linalg as culinalg
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
from scikits.cuda import cublas from scikits.cuda import cublas
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod, traceDot
except: except:
pass pass
@ -69,14 +69,14 @@ class VarDTC_GPU(object):
# inference_minibatch # inference_minibatch
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), 'dL_dpsi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'), 'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'),
'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), 'psi2p_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
} }
self.gpuCache['ones_gpu'].fill(1.0) self.gpuCache['ones_gpu'].fill(1.0)
@ -136,6 +136,12 @@ class VarDTC_GPU(object):
num_inducing, input_dim = Z.shape[0], Z.shape[1] num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize=0
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y) self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
@ -143,9 +149,7 @@ class VarDTC_GPU(object):
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
trYYT = self._trYYT trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu'] psi1Y_gpu = self.gpuCache['psi1Y_gpu']
@ -218,7 +222,6 @@ class VarDTC_GPU(object):
psi2_full = np.zeros((num_inducing,num_inducing),order='F') psi2_full = np.zeros((num_inducing,num_inducing),order='F')
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
psi0_full = 0 psi0_full = 0
# YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
@ -236,7 +239,6 @@ class VarDTC_GPU(object):
beta_slice = beta[n_start:n_end] beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum() psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else: else:
psi0_full += psi0.sum() psi0_full += psi0.sum()
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
@ -245,18 +247,17 @@ class VarDTC_GPU(object):
if het_noise: if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2) psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
else: else:
psi2_full += psi2.sum(axis=0) psi2_full += psi2
else: else:
if het_noise: if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1) psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
else: else:
psi2_full += tdot(psi1.T) psi2_full += np.outer(psi1.T, psi1.T)
if not het_noise: if not het_noise:
psi0_full *= beta psi0_full *= beta
psi1Y_full *= beta psi1Y_full *= beta
psi2_full *= beta psi2_full *= beta
# YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full) psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full) psi2_gpu.set(psi2_full)
@ -372,6 +373,17 @@ class VarDTC_GPU(object):
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get()) post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if uncertain_inputs and not het_noise:
dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2.
dL_dthetaL += -beta*cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
dL_dthetaL += -beta*cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
dL_dthetaL += traceDot(v_gpu, psi1Y_gpu, num_inducing, output_dim)
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm_gpu.get(), post return logL, dL_dKmm_gpu.get(), post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
@ -403,6 +415,8 @@ class VarDTC_GPU(object):
nSlice = n_end-n_start nSlice = n_end-n_start
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise:
beta = beta[n_start] # nSlice==1
if kern.useGPU: if kern.useGPU:
if uncertain_inputs: if uncertain_inputs:
@ -413,8 +427,6 @@ class VarDTC_GPU(object):
psi0p_gpu = kern.Kdiag(X_slice) psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z) psi1p_gpu = kern.K(X_slice, Z)
psi2p_gpu = self.gpuCache['psi2p_gpu'] psi2p_gpu = self.gpuCache['psi2p_gpu']
if psi2p_gpu.shape[0] > nSlice:
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
else: else:
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
@ -430,7 +442,6 @@ class VarDTC_GPU(object):
if psi0p_gpu.shape[0] > nSlice: if psi0p_gpu.shape[0] > nSlice:
psi0p_gpu = psi0p_gpu[:nSlice] psi0p_gpu = psi0p_gpu[:nSlice]
psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
psi0p_gpu.set(np.asfortranarray(psi0)) psi0p_gpu.set(np.asfortranarray(psi0))
psi1p_gpu.set(np.asfortranarray(psi1)) psi1p_gpu.set(np.asfortranarray(psi1))
if uncertain_inputs: if uncertain_inputs:
@ -473,13 +484,13 @@ class VarDTC_GPU(object):
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0_gpu.fill(0.) dL_dpsi0_gpu.fill(-output_dim *beta/2.)
cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice) cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
if uncertain_inputs: if uncertain_inputs:
outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size) cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.szie, beta, dL_dpsi2_gpu.gpudata, 1)
else: else:
cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice) cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice)
@ -487,34 +498,44 @@ class VarDTC_GPU(object):
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
if not uncertain_inputs: if uncertain_inputs and not het_noise:
join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing) if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL']
else:
dL_dthetaL = 0
mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice) # if not uncertain_inputs:
# cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, nSlice, beta, psi1p_gpu.gpudata, nSlice, psi1p_gpu.gpudata, nSlice, 1.0, psi2p_gpu.gpudata, num_inducing)
# join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing)
# psiR = cublas.cublasDdot(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, psi2p_gpu.gpudata, 1)
dL_dthetaL_gpu.fill(0.) # # mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice)
#
cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1) # dL_dthetaL_gpu.fill(0.)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size) # dL_dthetaL = 0.
cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1) #
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim) # dL_dthetaL += cublas.cublasDdot(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata,1,betaYT_gpu_slice.gpudata,1)
#
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) # cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1)
cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1) # mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size) # cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size) # sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) #
# cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
thetaL_t_gpu.fill(0.) # cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1)
sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing) # mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size) # mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size) # cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) #
# thetaL_t_gpu.fill(0.)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim) # sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size) # mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim) # mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
# cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
#
# cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim)
# mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size)
# sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
if kern.useGPU: if kern.useGPU:
dL_dpsi0 = dL_dpsi0_gpu dL_dpsi0 = dL_dpsi0_gpu

View file

@ -116,7 +116,7 @@ class VarDTC_minibatch(object):
if het_noise: if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1) psi2_full += beta_slice*np.outer(psi1,psi1)
else: else:
psi2_full += np.outer(psi1,psi1) psi2_full += np.outer(psi1.T,psi1.T)
if not het_noise: if not het_noise:
psi0_full *= beta psi0_full *= beta
@ -170,13 +170,16 @@ class VarDTC_minibatch(object):
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
# phi_u_mean = np.dot(Kmm,v)
# LLInvKmm,_ = dtrtrs(LL,Kmm)
# # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm)
# phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm)
post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if uncertain_inputs and not het_noise:
dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm, post return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
@ -213,20 +216,21 @@ class VarDTC_minibatch(object):
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if not uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
elif het_noise:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
if het_noise: if het_noise:
beta = beta[n_start] # assuming batchsize==1 beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
#====================================================================== #======================================================================
# Load Intermediate Results # Load Intermediate Results
@ -239,7 +243,7 @@ class VarDTC_minibatch(object):
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,))) dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
@ -254,18 +258,20 @@ class VarDTC_minibatch(object):
#====================================================================== #======================================================================
if het_noise: if het_noise:
if uncertain_inputs:
psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2)
else:
psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else:
if uncertain_inputs:
if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL']
else:
dL_dthetaL = 0.
else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum() dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs: if uncertain_inputs:

View file

@ -6,7 +6,7 @@ The package for the psi statistics computation
""" """
import numpy as np import numpy as np
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this,Cacher
@Cache_this(limit=1) @Cache_this(limit=1)
def psicomputations(variance, lengthscale, Z, mu, S, gamma): def psicomputations(variance, lengthscale, Z, mu, S, gamma):
@ -88,7 +88,7 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def _psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
@ -211,3 +211,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ # _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
psiDerivativecomputations = Cacher(_psiDerivativecomputations, limit=1, ignore_args=(0,1,2,))

View file

@ -58,10 +58,11 @@ try:
__device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx) __device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{ {
// psi2 (n,m1,m2) // psi2 (n,m1,m2)
int m2 = idx/(M*N); int m2 = idx/M;
int m1 = (idx%(M*N))/N; int m1 = idx%M;
int n = idx%N;
double psi2=0;
for(int n=0;n<N;n++){
double psi2_exp=0; double psi2_exp=0;
for(int q=0;q<Q;q++){ for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)]; double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
@ -70,7 +71,9 @@ try:
double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0); double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
psi2_exp += LOGEXPSUM(exp1,exp2); psi2_exp += LOGEXPSUM(exp1,exp2);
} }
return var*var*exp(psi2_exp); psi2 += exp(psi2_exp);
}
return var*var*psi2;
} }
""") """)
@ -281,7 +284,7 @@ class PSICOMP_SSRBF(object):
'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'), 'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'), 'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'), 'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
# derivatives psi1 # derivatives psi1
'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),

View file

@ -123,7 +123,8 @@ class Cacher_wrap(object):
def __get__(self, obj, objtype=None): def __get__(self, obj, objtype=None):
return partial(self, obj) return partial(self, obj)
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
obj = args[0] obj = args[0] # <------------------- WHAT IF IT IS ONLY A FUNCTION!
#import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
try: try:
caches = obj.__cachers caches = obj.__cachers

View file

@ -19,6 +19,9 @@ try:
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step") strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
# np.trace(np.dot(A,B)) (also equivalent to (A*B.T).sum() ) A - n x m, B - m x n
traceDot = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="A[i]*B[(i%n)*m+i/n]", arguments="double *A, double *B, int n, int m")
#======================================================================================= #=======================================================================================
# Element-wise functions # Element-wise functions
#======================================================================================= #=======================================================================================