mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-11 21:12:38 +02:00
[GPU] bug fix
This commit is contained in:
parent
af56b9951c
commit
5875a33d21
3 changed files with 7 additions and 5 deletions
|
|
@ -15,7 +15,7 @@ try:
|
|||
from scikits.cuda import cublas
|
||||
import pycuda.autoinit
|
||||
from pycuda.reduction import ReductionKernel
|
||||
from ...util.linalg_gpu import logDiagSum
|
||||
from ...util.linalg_gpu import logDiagSum, strideSum
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -212,7 +212,8 @@ class VarDTC_GPU(object):
|
|||
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1)
|
||||
#tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1)
|
||||
tr_LmInvPsi2LmInvT = strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1)
|
||||
print np.abs(vvt-vvt_gpu.get()).max()
|
||||
print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
|
||||
|
||||
|
|
|
|||
|
|
@ -414,7 +414,7 @@ class PSICOMP_SSRBF(object):
|
|||
grad_dl_gpu = self.gpuCache['grad_l_gpu']
|
||||
|
||||
# variance
|
||||
variance.gradient = cublas.cublasDasum(self.cublas_handle, dL_dpsi0.size, dL_dpsi0, 1) \
|
||||
variance.gradient = gpuarray.sum(dL_dpsi0) \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1)
|
||||
|
||||
|
|
@ -429,8 +429,7 @@ class PSICOMP_SSRBF(object):
|
|||
else:
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
|
||||
lengthscale.gradient = cublas.cublasDasum(self.cublas_handle, psi1_comb_gpu.size, psi1_comb_gpu, 1) \
|
||||
+ cublas.cublasDasum(self.cublas_handle, psi2_comb_gpu.size, psi2_comb_gpu, 1)
|
||||
lengthscale.gradient = gpuarray.sum(psi1_comb_gpu) + gpuarray.sum(psi2_comb_gpu)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ try:
|
|||
# logDiagSum(A, A.shape[0]+1)
|
||||
logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step")
|
||||
|
||||
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
|
||||
|
||||
#=======================================================================================
|
||||
# Element-wise functions
|
||||
#=======================================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue