mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 22:12:38 +02:00
[GPU] vardtc_likelihood
This commit is contained in:
parent
f1d831c5f1
commit
daf5a877f3
2 changed files with 142 additions and 224 deletions
|
|
@ -15,7 +15,7 @@ try:
|
||||||
from scikits.cuda import cublas
|
from scikits.cuda import cublas
|
||||||
import pycuda.autoinit
|
import pycuda.autoinit
|
||||||
from pycuda.reduction import ReductionKernel
|
from pycuda.reduction import ReductionKernel
|
||||||
from ...util.linalg_gpu import logDiagSum, strideSum
|
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -49,7 +49,7 @@ class VarDTC_GPU(object):
|
||||||
# Initialize GPU caches
|
# Initialize GPU caches
|
||||||
self.gpuCache = None
|
self.gpuCache = None
|
||||||
|
|
||||||
def _initGPUCache(self, num_inducing, output_dim):
|
def _initGPUCache(self, num_inducing, output_dim, Y):
|
||||||
if self.gpuCache == None:
|
if self.gpuCache == None:
|
||||||
self.gpuCache = {# inference_likelihood
|
self.gpuCache = {# inference_likelihood
|
||||||
'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||||
|
|
@ -63,17 +63,19 @@ class VarDTC_GPU(object):
|
||||||
'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||||
'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||||
'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||||
|
'psi1Y_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||||
|
'psi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||||
|
'beta_gpu' :gpuarray.empty((output_dim,),np.float64,order='F'),
|
||||||
|
'Y_gpu' :gpuarray.to_gpu(np.asfortranarray(Y)),
|
||||||
|
'betaY_gpu' :gpuarray.empty(Y.shape,np.float64,order='F'),
|
||||||
|
'psi2_t_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
|
||||||
# inference_minibatch
|
# inference_minibatch
|
||||||
}
|
}
|
||||||
self.gpuCache['ones_gpu'].fill(1.0)
|
self.gpuCache['ones_gpu'].fill(1.0)
|
||||||
|
|
||||||
def set_limit(self, limit):
|
Y_gpu = self.gpuCache['Y_gpu']
|
||||||
self.get_trYYT.limit = limit
|
self._trYYT = cublas.cublasDdot(self.cublas_handle, Y_gpu.size, Y_gpu.gpudata, 1, Y_gpu.gpudata, 1)
|
||||||
self.get_YYTfactor.limit = limit
|
|
||||||
|
|
||||||
def _get_trYYT(self, Y):
|
|
||||||
return param_to_array(np.sum(np.square(Y)))
|
|
||||||
|
|
||||||
def _get_YYTfactor(self, Y):
|
def _get_YYTfactor(self, Y):
|
||||||
"""
|
"""
|
||||||
find a matrix L which satisfies LLT = YYT.
|
find a matrix L which satisfies LLT = YYT.
|
||||||
|
|
@ -94,7 +96,7 @@ class VarDTC_GPU(object):
|
||||||
Cached intermediate results: Kmm, KmmInv,
|
Cached intermediate results: Kmm, KmmInv,
|
||||||
"""
|
"""
|
||||||
|
|
||||||
num_inducing = Z.shape[0]
|
num_inducing = Z.shape[0]
|
||||||
num_data, output_dim = Y.shape
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
self._initGPUCache(num_inducing, output_dim)
|
self._initGPUCache(num_inducing, output_dim)
|
||||||
|
|
@ -107,59 +109,120 @@ class VarDTC_GPU(object):
|
||||||
#see whether we've got a different noise variance for each datum
|
#see whether we've got a different noise variance for each datum
|
||||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||||
het_noise = beta.size > 1
|
het_noise = beta.size > 1
|
||||||
trYYT = self.get_trYYT(Y)
|
trYYT = self._trYYT
|
||||||
|
|
||||||
|
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
|
||||||
|
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||||
|
beta_gpu = self.gpuCache['beta_gpu']
|
||||||
|
Y_gpu = self.gpuCache['Y_gpu']
|
||||||
|
betaY_gpu = self.gpuCache['betaY_gpu']
|
||||||
|
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
|
||||||
|
|
||||||
psi2_full = np.zeros((num_inducing,num_inducing))
|
if het_noise:
|
||||||
psi1Y_full = np.zeros((num_inducing,output_dim)) # DxM
|
beta_gpu.set(np.asfortranarray(beta))
|
||||||
psi0_full = 0
|
mul_bcast(betaY_gpu,beta_gpu,Y_gpu,beta_gpu.size)
|
||||||
YRY_full = 0
|
YRY_full = cublas.cublasDdot(self.cublas_handle, Y_gpu.size, betaY_gpu.gpudata, 1, Y_gpu.gpudata, 1)
|
||||||
|
else:
|
||||||
for n_start in xrange(0,num_data,self.batchsize):
|
beta_gpu.fill(beta)
|
||||||
|
betaY_gpu.fill(0.)
|
||||||
n_end = min(self.batchsize+n_start, num_data)
|
cublas.cublasDaxpy(self.cublas_handle, betaY_gpu.size, beta, Y_gpu.gpudata, 1, betaY_gpu, 1)
|
||||||
|
|
||||||
Y_slice = Y[n_start:n_end]
|
|
||||||
X_slice = X[n_start:n_end]
|
|
||||||
|
|
||||||
if uncertain_inputs:
|
|
||||||
psi0 = kern.psi0(Z, X_slice)
|
|
||||||
psi1 = kern.psi1(Z, X_slice)
|
|
||||||
psi2 = kern.psi2(Z, X_slice)
|
|
||||||
else:
|
|
||||||
psi0 = kern.Kdiag(X_slice)
|
|
||||||
psi1 = kern.K(X_slice, Z)
|
|
||||||
psi2 = None
|
|
||||||
|
|
||||||
if het_noise:
|
|
||||||
beta_slice = beta[n_start:n_end]
|
|
||||||
psi0_full += (beta_slice*psi0).sum()
|
|
||||||
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
|
|
||||||
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
|
|
||||||
else:
|
|
||||||
psi0_full += psi0.sum()
|
|
||||||
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
|
|
||||||
|
|
||||||
|
|
||||||
if uncertain_inputs:
|
|
||||||
if het_noise:
|
|
||||||
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
|
|
||||||
else:
|
|
||||||
psi2_full += psi2.sum(axis=0)
|
|
||||||
else:
|
|
||||||
if het_noise:
|
|
||||||
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
|
|
||||||
else:
|
|
||||||
psi2_full += tdot(psi1.T)
|
|
||||||
|
|
||||||
if not het_noise:
|
|
||||||
psi0_full *= beta
|
|
||||||
psi1Y_full *= beta
|
|
||||||
psi2_full *= beta
|
|
||||||
YRY_full = trYYT*beta
|
YRY_full = trYYT*beta
|
||||||
|
|
||||||
psi1Y_gpu = gpuarray.to_gpu(np.asfortranarray(psi1Y_full))
|
if kern.useGPU:
|
||||||
psi2_gpu = gpuarray.to_gpu(np.asfortranarray(psi2_full))
|
psi1Y_gpu.fill(0.)
|
||||||
|
psi2_gpu.fill(0.)
|
||||||
|
psi0_full = 0
|
||||||
|
|
||||||
|
for n_start in xrange(0,num_data,self.batchsize):
|
||||||
|
n_end = min(self.batchsize+n_start, num_data)
|
||||||
|
ndata = n_end - n_start
|
||||||
|
Y_slice = Y[n_start:n_end]
|
||||||
|
X_slice = X[n_start:n_end]
|
||||||
|
beta_gpu_slice = beta_gpu[n_start:n_end]
|
||||||
|
betaY_gpu_slice = betaY_gpu[n_start:n_end]
|
||||||
|
if ndata==self.batchsize:
|
||||||
|
psi2_t_gpu_slice = psi2_t_gpu
|
||||||
|
else:
|
||||||
|
psi2_t_gpu_slice = psi2_t_gpu[0:ndata]
|
||||||
|
if uncertain_inputs:
|
||||||
|
psi0p_gpu = kern.psi0(Z, X_slice)
|
||||||
|
psi1p_gpu = kern.psi1(Z, X_slice)
|
||||||
|
psi2p_gpu = kern.psi2(Z, X_slice)
|
||||||
|
else:
|
||||||
|
psi0p_gpu = kern.Kdiag(X_slice)
|
||||||
|
psi1p_gpu = kern.K(X_slice, Z)
|
||||||
|
|
||||||
|
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaY_gpu_slice.gpudata, ndata, 1.0, psi1Y_gpu.gpudata, num_inducing)
|
||||||
|
if het_noise:
|
||||||
|
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
|
||||||
|
else:
|
||||||
|
psi0_full += gpuarray.sum(psi0p_gpu).get()
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
if het_noise:
|
||||||
|
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
|
||||||
|
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
|
||||||
|
else:
|
||||||
|
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
|
||||||
|
else:
|
||||||
|
if het_noise:
|
||||||
|
psi1_t_gpu = psi2_t_gpu_slice[:,:,0]
|
||||||
|
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
|
||||||
|
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||||
|
else:
|
||||||
|
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||||
|
|
||||||
|
if not het_noise:
|
||||||
|
psi0_full *= beta
|
||||||
|
if uncertain_inputs:
|
||||||
|
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
|
||||||
|
|
||||||
|
else:
|
||||||
|
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
|
||||||
|
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
|
||||||
|
psi0_full = 0
|
||||||
|
YRY_full = 0
|
||||||
|
|
||||||
|
for n_start in xrange(0,num_data,self.batchsize):
|
||||||
|
n_end = min(self.batchsize+n_start, num_data)
|
||||||
|
Y_slice = Y[n_start:n_end]
|
||||||
|
X_slice = X[n_start:n_end]
|
||||||
|
if uncertain_inputs:
|
||||||
|
psi0 = kern.psi0(Z, X_slice)
|
||||||
|
psi1 = kern.psi1(Z, X_slice)
|
||||||
|
psi2 = kern.psi2(Z, X_slice)
|
||||||
|
else:
|
||||||
|
psi0 = kern.Kdiag(X_slice)
|
||||||
|
psi1 = kern.K(X_slice, Z)
|
||||||
|
|
||||||
|
if het_noise:
|
||||||
|
beta_slice = beta[n_start:n_end]
|
||||||
|
psi0_full += (beta_slice*psi0).sum()
|
||||||
|
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
|
||||||
|
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
|
||||||
|
else:
|
||||||
|
psi0_full += psi0.sum()
|
||||||
|
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
if het_noise:
|
||||||
|
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
|
||||||
|
else:
|
||||||
|
psi2_full += psi2.sum(axis=0)
|
||||||
|
else:
|
||||||
|
if het_noise:
|
||||||
|
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
|
||||||
|
else:
|
||||||
|
psi2_full += tdot(psi1.T)
|
||||||
|
|
||||||
|
if not het_noise:
|
||||||
|
psi0_full *= beta
|
||||||
|
psi1Y_full *= beta
|
||||||
|
psi2_full *= beta
|
||||||
|
YRY_full = trYYT*beta
|
||||||
|
|
||||||
|
psi1Y_gpu.set(psi1Y_full)
|
||||||
|
psi2_gpu.set(psi2_full)
|
||||||
|
|
||||||
#======================================================================
|
#======================================================================
|
||||||
# Compute Common Components
|
# Compute Common Components
|
||||||
|
|
|
||||||
|
|
@ -260,8 +260,11 @@ class PSICOMP_SSRBF(object):
|
||||||
self.gpuCache = None
|
self.gpuCache = None
|
||||||
|
|
||||||
def _initGPUCache(self, N, M, Q):
|
def _initGPUCache(self, N, M, Q):
|
||||||
|
if self.gpuCache and self.gpuCacheAll['mu_gpu'].shape[0]<N:
|
||||||
|
self._releaseMemory()
|
||||||
|
|
||||||
if self.gpuCache == None:
|
if self.gpuCache == None:
|
||||||
self.gpuCache = {
|
self.gpuCacheAll = {
|
||||||
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||||
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||||
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||||
|
|
@ -301,6 +304,21 @@ class PSICOMP_SSRBF(object):
|
||||||
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||||
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||||
}
|
}
|
||||||
|
nonN_list = ['l_gpu','Z_gpu','psi2exp2_gpu','grad_l_gpu','grad_Z_gpu']
|
||||||
|
self._gpuCache_Nlist = [k for k in self.gpuCacheAll.keys() if k not in nonN_list]
|
||||||
|
self.gpuCache = self.gpuCacheAll
|
||||||
|
elif self.gpuCacheAll['mu_gpu'].shape[0]>N:
|
||||||
|
self.gpuCache = self.gpuCacheAll.copy()
|
||||||
|
for k in self._gpuCache_Nlist:
|
||||||
|
self.gpuCache[k] = self.gpuCacheAll[k][0:N]
|
||||||
|
|
||||||
|
def _releaseMemory(self):
|
||||||
|
if not self.gpuCacheAll:
|
||||||
|
for k,v in self.gpuCacheAll:
|
||||||
|
v.gpudata.free()
|
||||||
|
del v
|
||||||
|
self.gpuCacheAll = None
|
||||||
|
self.gpuCache = None
|
||||||
|
|
||||||
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
||||||
"""Compute Psi statitsitcs"""
|
"""Compute Psi statitsitcs"""
|
||||||
|
|
@ -492,166 +510,3 @@ class PSICOMP_SSRBF(object):
|
||||||
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
|
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
|
||||||
|
|
||||||
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()
|
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()
|
||||||
|
|
||||||
@Cache_this(limit=1)
|
|
||||||
def _Z_distances(Z):
|
|
||||||
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
|
||||||
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
|
||||||
return Zhat, Zdist
|
|
||||||
|
|
||||||
def _psicomputations(variance, lengthscale, Z, mu, S, gamma):
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
@Cache_this(limit=1)
|
|
||||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi1 and psi2
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi1 NxM
|
|
||||||
# _dpsi1_dvariance NxM
|
|
||||||
# _dpsi1_dlengthscale NxMxQ
|
|
||||||
# _dpsi1_dZ NxMxQ
|
|
||||||
# _dpsi1_dgamma NxMxQ
|
|
||||||
# _dpsi1_dmu NxMxQ
|
|
||||||
# _dpsi1_dS NxMxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
# psi1
|
|
||||||
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
|
||||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
|
||||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
|
||||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
|
||||||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
|
||||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
|
|
||||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
|
||||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
|
||||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
|
||||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
|
||||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
|
||||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
|
||||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
|
||||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
|
||||||
_dpsi1_dvariance = _psi1 / variance # NxM
|
|
||||||
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
|
|
||||||
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
|
|
||||||
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
|
|
||||||
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
|
|
||||||
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
|
|
||||||
|
|
||||||
N = mu.shape[0]
|
|
||||||
M = Z.shape[0]
|
|
||||||
Q = mu.shape[1]
|
|
||||||
|
|
||||||
l_gpu = gpuarray.empty((Q,),np.float64, order='F')
|
|
||||||
l_gpu.fill(lengthscale2)
|
|
||||||
Z_gpu = gpuarray.to_gpu(np.asfortranarray(Z))
|
|
||||||
mu_gpu = gpuarray.to_gpu(np.asfortranarray(mu))
|
|
||||||
S_gpu = gpuarray.to_gpu(np.asfortranarray(S))
|
|
||||||
gamma_gpu = gpuarray.to_gpu(np.asfortranarray(gamma))
|
|
||||||
logGamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(gamma)))
|
|
||||||
log1Gamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(1.-gamma)))
|
|
||||||
logpsi1denom_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(S/lengthscale2+1.)))
|
|
||||||
psi1_gpu = gpuarray.empty((mu.shape[0],Z.shape[0]),np.float64, order='F')
|
|
||||||
psi1_neq_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
psi1exp1_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
psi1exp2_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
dpsi1_dvar_gpu = gpuarray.empty((N,M),np.float64, order='F')
|
|
||||||
dpsi1_dl_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
dpsi1_dZ_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
dpsi1_dgamma_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
dpsi1_dmu_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
dpsi1_dS_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
|
|
||||||
|
|
||||||
comp_dpsi1_dvar(dpsi1_dvar_gpu,psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
|
|
||||||
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
|
||||||
|
|
||||||
# print np.abs(dpsi1_dmu_gpu.get()-_dpsi1_dmu).max()
|
|
||||||
|
|
||||||
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
|
|
||||||
|
|
||||||
@Cache_this(limit=1)
|
|
||||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi1 and psi2
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi2 NxMxM
|
|
||||||
# _psi2_dvariance NxMxM
|
|
||||||
# _psi2_dlengthscale NxMxMxQ
|
|
||||||
# _psi2_dZ NxMxMxQ
|
|
||||||
# _psi2_dgamma NxMxMxQ
|
|
||||||
# _psi2_dmu NxMxMxQ
|
|
||||||
# _psi2_dS NxMxMxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
|
|
||||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
|
||||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
|
||||||
|
|
||||||
# psi2
|
|
||||||
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
|
||||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
|
||||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
|
||||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
|
|
||||||
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
|
||||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
|
||||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
|
||||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
|
||||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
|
||||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
|
||||||
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
|
||||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
|
||||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
|
||||||
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
|
|
||||||
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM
|
|
||||||
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
|
|
||||||
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
|
|
||||||
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
|
|
||||||
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
|
||||||
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
|
||||||
|
|
||||||
N = mu.shape[0]
|
|
||||||
M = Z.shape[0]
|
|
||||||
Q = mu.shape[1]
|
|
||||||
|
|
||||||
l_gpu = gpuarray.empty((Q,),np.float64, order='F')
|
|
||||||
l_gpu.fill(lengthscale2)
|
|
||||||
Z_gpu = gpuarray.to_gpu(np.asfortranarray(Z))
|
|
||||||
mu_gpu = gpuarray.to_gpu(np.asfortranarray(mu))
|
|
||||||
S_gpu = gpuarray.to_gpu(np.asfortranarray(S))
|
|
||||||
gamma_gpu = gpuarray.to_gpu(np.asfortranarray(gamma))
|
|
||||||
logGamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(gamma)))
|
|
||||||
log1Gamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(1.-gamma)))
|
|
||||||
logpsi2denom_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(2.*S/lengthscale2+1.)))
|
|
||||||
psi2_gpu = gpuarray.empty((mu.shape[0],Z.shape[0],Z.shape[0]),np.float64, order='F')
|
|
||||||
psi2_neq_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
psi2exp1_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
psi2exp2_gpu = gpuarray.empty((M,M,Q),np.float64, order='F')
|
|
||||||
dpsi2_dvar_gpu = gpuarray.empty((N,M,M),np.float64, order='F')
|
|
||||||
dpsi2_dl_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
dpsi2_dZ_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
dpsi2_dgamma_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
dpsi2_dmu_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
dpsi2_dS_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
|
|
||||||
|
|
||||||
#comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
|
||||||
|
|
||||||
comp_dpsi2_dvar(dpsi2_dvar_gpu,psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
|
||||||
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
|
||||||
|
|
||||||
# print np.abs(dpsi2_dvar_gpu.get()-_dpsi2_dvariance).max()
|
|
||||||
|
|
||||||
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue