From 5875a33d219189335ba0c7fcbedc48dd274d2cb5 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 1 Apr 2014 18:10:35 +0100 Subject: [PATCH] [GPU] bug fix --- GPy/inference/latent_function_inference/var_dtc_gpu.py | 5 +++-- GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py | 5 ++--- GPy/util/linalg_gpu.py | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py index 75a07992..e223af3c 100644 --- a/GPy/inference/latent_function_inference/var_dtc_gpu.py +++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py @@ -15,7 +15,7 @@ try: from scikits.cuda import cublas import pycuda.autoinit from pycuda.reduction import ReductionKernel - from ...util.linalg_gpu import logDiagSum + from ...util.linalg_gpu import logDiagSum, strideSum except: pass @@ -212,7 +212,8 @@ class VarDTC_GPU(object): cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1) cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing) cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing) - tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1) + #tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1) + tr_LmInvPsi2LmInvT = strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1) print np.abs(vvt-vvt_gpu.get()).max() print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT) diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py index b116d9cc..da948661 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py @@ -414,7 +414,7 @@ class PSICOMP_SSRBF(object): grad_dl_gpu = self.gpuCache['grad_l_gpu'] # variance - variance.gradient = cublas.cublasDasum(self.cublas_handle, dL_dpsi0.size, dL_dpsi0, 1) \ + variance.gradient = gpuarray.sum(dL_dpsi0) \ + cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \ + cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1) @@ -429,8 +429,7 @@ class PSICOMP_SSRBF(object): else: linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size) linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size) - lengthscale.gradient = cublas.cublasDasum(self.cublas_handle, psi1_comb_gpu.size, psi1_comb_gpu, 1) \ - + cublas.cublasDasum(self.cublas_handle, psi2_comb_gpu.size, psi2_comb_gpu, 1) + lengthscale.gradient = gpuarray.sum(psi1_comb_gpu) + gpuarray.sum(psi2_comb_gpu) def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): pass diff --git a/GPy/util/linalg_gpu.py b/GPy/util/linalg_gpu.py index 73d57e1f..60eb8101 100644 --- a/GPy/util/linalg_gpu.py +++ b/GPy/util/linalg_gpu.py @@ -16,6 +16,8 @@ try: # logDiagSum(A, A.shape[0]+1) logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step") + strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step") + #======================================================================================= # Element-wise functions #=======================================================================================