diff --git a/GPy/kern/_src/psi_comp/rbf_psi_gpucomp.py b/GPy/kern/_src/psi_comp/rbf_psi_gpucomp.py new file mode 100644 index 00000000..7b604227 --- /dev/null +++ b/GPy/kern/_src/psi_comp/rbf_psi_gpucomp.py @@ -0,0 +1,454 @@ +""" +The module for psi-statistics for RBF kernel +""" + +import numpy as np +from GPy.util.caching import Cacher +from . import PSICOMP_RBF +from ....util import gpu_init + +try: + import pycuda.gpuarray as gpuarray + from pycuda.compiler import SourceModule +except: + pass + +gpu_code = """ + // define THREADNUM + + #define IDX_NMQ(n,m,q) ((q*M+m)*N+n) + #define IDX_NQ(n,q) (q*N+n) + #define IDX_NM(n,m) (m*N+n) + #define IDX_MQ(m,q) (q*M+m) + #define IDX_MM(m1,m2) (m2*M+m1) + #define IDX_NQB(n,q,b) ((b*Q+q)*N+n) + #define IDX_QB(q,b) (b*Q+q) + + // Divide data evenly + __device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) { + int residue = (total_data)%psize; + if(pidx= blockDim.x) { + for(int i=blockDim.x+threadIdx.x; i=1;s=s/2) { + if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];} + __syncthreads(); + } + } + + __global__ void psi1computations(double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q) + { + int m_start, m_end; + divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end); + + for(int m=m_start; mnm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) + _psi1 = variance*np.exp(_psi1_log) + + return _psi1 + +def __psi2computations(variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM + + lengthscale2 = np.square(lengthscale) + + _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N + _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM + Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ + denom = 1./(2.*S+lengthscale2) + _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) + _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) + + + return _psi2 + +def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS + +def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi1 = _psi1computations(variance, lengthscale, Z, mu, S) + Lpsi1 = dL_dpsi1*_psi1 + Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ + denom = 1./(S+lengthscale2) + Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ + _dL_dvar = Lpsi1.sum()/variance + _dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom) + _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. + _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) + _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + +def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + denom = 1./(2*S+lengthscale2) + denom2 = np.square(denom) + + _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM + + Lpsi2 = dL_dpsi2[None,:,:]*_psi2 + Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N + Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ + Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Zhat = Lpsi2Z + Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 + + _dL_dvar = Lpsi2sum.sum()*2/variance + _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) + _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] + _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ + 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) + _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- + (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + +_psi1computations = Cacher(__psi1computations, limit=1) +_psi2computations = Cacher(__psi2computations, limit=1) diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py index e220e0d0..8f845d32 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py @@ -263,7 +263,7 @@ class PSICOMP_SSRBF(object): self.cublas_handle = gpu_init.cublas_handle self.gpuCache = None self.gpuCacheAll = None - + def _initGPUCache(self, N, M, Q): if self.gpuCache!=None and self.gpuCache['mu_gpu'].shape[0] == N: return diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 221ca593..409c2e41 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -9,6 +9,7 @@ from stationary import Stationary from GPy.util.caching import Cache_this from ...core.parameterization import variational from psi_comp import PSICOMP_RBF +from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU from ...util.config import * class RBF(Stationary): @@ -26,6 +27,8 @@ class RBF(Stationary): self.weave_options = {} self.group_spike_prob = False self.psicomp = PSICOMP_RBF() + if self.useGPU: + self.psicomp = PSICOMP_RBF_GPU() def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -39,25 +42,27 @@ class RBF(Stationary): def psi0(self, Z, variational_posterior): if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] else: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] def psi1(self, Z, variational_posterior): if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] else: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] else: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): if self.useGPU: - self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] + self.variance.gradient = dL_dvar + self.lengthscale.gradient = dL_dlengscale else: dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] self.variance.gradient = dL_dvar @@ -65,12 +70,12 @@ class RBF(Stationary): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): if self.useGPU: - return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] else: return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): if self.useGPU: - return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] else: return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] diff --git a/GPy/kern/_src/trunclinear.py b/GPy/kern/_src/trunclinear.py index 540c5bc8..76ed31f7 100644 --- a/GPy/kern/_src/trunclinear.py +++ b/GPy/kern/_src/trunclinear.py @@ -73,7 +73,7 @@ class TruncLinear(Kern): return XX def Kdiag(self, X): - return self.variances*(np.square(X-self.delta)).sum(axis=-1) + return (self.variances*np.square(X-self.delta)).sum(axis=-1) def update_gradients_full(self, dL_dK, X, X2=None): dK_dvar = self._product(X, X2)