[GPU] psi2 ssgplvm

This commit is contained in:
Zhenwen Dai 2014-04-01 12:09:40 +01:00
parent b945e8d01f
commit 98816659dd
2 changed files with 123 additions and 21 deletions

View file

@ -90,7 +90,7 @@ try:
# The kernel form computing psi1 het_noise
comp_dpsi1_dvar = ElementwiseKernel(
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi11exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"dpsi1_dvar[i] = comp_dpsi1_dvar_element(psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
"comp_dpsi1_dvar",
preamble="""
@ -99,7 +99,7 @@ try:
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi11exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
{
int n = idx%N;
int m = idx/N;
@ -107,9 +107,9 @@ try:
double psi1_sum = 0;
for(int q=0;q<Q;q++){
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)])/2.0 + exp1_e;
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0)
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi1_q = LOGEXPSUM(exp1,exp2);
psi1_neq[IDX_NMQ(n,m,q)] = -psi1_q;
@ -125,17 +125,16 @@ try:
""")
# The kernel form computing psi1 het_noise
comp_dpsi1_der = ElementwiseKernel(
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double var, double *psi1_neq, double psi1exp1, double *psi11exp2, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi1_dvar[i] = comp_dpsi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, var, psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_dpsi1_der",
comp_psi1_der = ElementwiseKernel(
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi1_dl[i] = comp_psi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, psi1_neq, psi1exp1, psi1exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi1_der",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double var, double *psi1_neq, double psi1exp1, double *psi11exp2, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
__device__ double comp_psi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
int q = idx/(M*N);
int m = (idx%(M*N))/N;
@ -146,6 +145,7 @@ try:
double Z_c = Z[IDX_MQ(m,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi1exp1_c = psi1exp1[IDX_NMQ(n,m,q)];
double psi1exp2_c = psi1exp2[IDX_MQ(m,q)];
@ -153,13 +153,101 @@ try:
double denom_sqrt = sqrt(denom);
double Zmu = Z_c-mu[IDX_NQ(n,q)];
double psi1_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c
double gamma1 = 1-gamma_c;
dpsi1_dgamma[IDX_NMQ(n,m,q)] = var*neq*(psi1exp1_c/denom_sqrt - psi1exp2_c);
dpsi1_dmu[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*Zmu*psi1exp1_c);
dpsi1_dS[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*(Zmu*Zmu/(S_c+l_c)-1.0)*psi1exp1_c)/2.0;
dpsi1_dZ[IDX_NMQ(n,m,q)] = var*neq*(-psi1_common*Zmu*psi1exp1_c-gamma1*Z_c/l_c*psi1exp2_c);
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)/2.0;
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)*l_sqrt_c;
}
""")
# The kernel form computing psi1 het_noise
comp_dpsi2_dvar = ElementwiseKernel(
"double *dpsi2_dvar, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
"dpsi2_dvar[i] = comp_dpsi2_dvar_element(psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
"comp_dpsi2_dvar",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{
// psi2 (n,m1,m2)
int m2 = idx/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double psi2_sum=0;
for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
double exp1_e = - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 +exp1_e;
double exp2_e = - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi2_q = LOGEXPSUM(exp1,exp2);
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = -psi2_q;
psi2exp1[IDX_NMMQ(n,m1,m2,q)] = exp(exp1_e);
psi2exp2[IDX_MMQ(m1,m2,q)] = exp(exp2_e);
psi2_sum += psi2_q;
}
for(int q=0;q<Q;q++) {
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = exp(psi2_neq[IDX_NMMQ(n,m1,m2,q)]+psi2_sum);
}
return 2*var*exp(psi2_sum);
}
""")
# The kernel form computing psi1 het_noise
comp_psi2_der = ElementwiseKernel(
"double *dpsi2_dl, double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi2_dl[i] = comp_psi2_der_element(dpsi2_dmu, dpsi2_dS, dpsi2_dgamma, dpsi2_dZ, psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi2_der",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
__device__ double comp_psi2_der_element(double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
// dpsi2 (n,m1,m2,q)
int q = idx/(M*M*N);
int m2 = (idx%(M*M*N))/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double neq = psi2_neq[IDX_NMMQ(n,m1,m2,q)];
double gamma_c = gamma[IDX_NQ(n,q)];
double Z1_c = Z[IDX_MQ(m1,q)];
double Z2_c = Z[IDX_MQ(m2,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi2exp1_c = psi1exp1[IDX_NMMQ(n,m1,m2,q)];
double psi2exp2_c = psi1exp2[IDX_MMQ(m1,m2,q)];
double dZ = Z1_c - Z2_c;
double muZ = mu[IDX_NQ(n,q)] - (Z1_c+Z2_c)/2.0;
double Z2 = Z1_c*Z1_c+Z2_c*Z2_c;
double denom = 2.0*S_c/l_c+1.0;
double denom_sqrt = sqrt(denom);
double psi2_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c;
double var2 = var*var;
dpsi2_dgamma[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2exp1_c/denom_sqrt - psi2exp2_c);
dpsi2_dmu[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(-2.0*psi2_common*muZ*psi2exp1_c);
dpsi2_dS[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(2.0*muZ*muZ/(2.0*S_c+l_c)-1.0)*psi2exp1_c);
dpsi2_dZ[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(dZ*denom/-2.0+muZ)*psi2exp1_c-gamma1*Z_c/l_c*psi2exp2_c)*2.0;
return var2*neq*(psi2_common*(S_c/l_c+dZ*dZ*denom/(4.0*l_c)+muZ*muZ/(2.0*S_c+l_c))*psi2exp1_c+gamma1*Z2/(2.0*l_c)*psi2exp2_c)*l_sqrt_c*2.0;
}
""")
@ -287,12 +375,12 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
M = Z.shape[0]
Q = mu.shape[1]
l_gpu = gpuarray.gpuarray.empty((Q,),np.float64, order='F')
l_gpu = gpuarray.empty((Q,),np.float64, order='F')
l_gpu.fill(lengthscale2)
Z_gpu = gpuarray.to_gpu(np.asfortranarray(Z))
mu_gpu = gpuarray.to_gpu(np.asfortranarray(mu))
S_gpu = gpuarray.to_gpu(np.asfortranarray(S))
#gamma_gpu = gpuarray.to_gpu(gamma)
gamma_gpu = gpuarray.to_gpu(np.asfortranarray(gamma))
logGamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(gamma)))
log1Gamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(1.-gamma)))
logpsi1denom_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(S/lengthscale2+1.)))
@ -308,8 +396,9 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
dpsi1_dS_gpu = gpuarray.empty((N,M,Q),np.float64, order='F')
comp_dpsi1_dvar(dpsi1_dvar_gpu,psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
print np.abs(dpsi1_dvar_gpu.get()-_dpsi1_dvariance).max()
# print np.abs(dpsi1_dmu_gpu.get()-_dpsi1_dmu).max()
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
@ -363,18 +452,31 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
M = Z.shape[0]
Q = mu.shape[1]
# l_gpu = gpuarray.to_gpu(np.asfortranarray(lengthscale2))
l_gpu = gpuarray.empty((Q,),np.float64, order='F')
l_gpu.fill(lengthscale2)
Z_gpu = gpuarray.to_gpu(np.asfortranarray(Z))
mu_gpu = gpuarray.to_gpu(np.asfortranarray(mu))
S_gpu = gpuarray.to_gpu(np.asfortranarray(S))
#gamma_gpu = gpuarray.to_gpu(gamma)
gamma_gpu = gpuarray.to_gpu(np.asfortranarray(gamma))
logGamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(gamma)))
log1Gamma_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(1.-gamma)))
logpsi2denom_gpu = gpuarray.to_gpu(np.asfortranarray(np.log(2.*S/lengthscale2+1.)))
psi2_gpu = gpuarray.empty((mu.shape[0],Z.shape[0],Z.shape[0]),np.float64, order='F')
psi2_neq_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
psi2exp1_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
psi2exp2_gpu = gpuarray.empty((M,M,Q),np.float64, order='F')
dpsi2_dvar_gpu = gpuarray.empty((N,M,M),np.float64, order='F')
dpsi2_dl_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
dpsi2_dZ_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
dpsi2_dgamma_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
dpsi2_dmu_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
dpsi2_dS_gpu = gpuarray.empty((N,M,M,Q),np.float64, order='F')
comp_psi2(psi2_gpu, variance, lengthscale2, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
#comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
comp_dpsi2_dvar(dpsi2_dvar_gpu,psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
print np.abs(psi2_gpu.get()-_psi2).max()
print np.abs(dpsi2_dvar_gpu.get()-_dpsi2_dvariance).max()
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale

View file

@ -26,8 +26,8 @@ class RBF(Stationary):
self.weave_options = {}
self.group_spike_prob = False
if self.useGPU:
self.psicomp = PSICOMP_SSRBF()
# if self.useGPU:
# self.psicomp = PSICOMP_SSRBF()
def K_of_r(self, r):