changed gpu interface for mpi

This commit is contained in:
Zhenwen Dai 2015-09-07 16:27:31 +01:00
parent 6f881607d8
commit 607c214284
7 changed files with 46 additions and 89 deletions

View file

@ -6,12 +6,6 @@ import numpy as np
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False): def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2 # here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
@ -26,12 +20,6 @@ def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2
return psi0, psi1, psi2 return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S): def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
@ -46,12 +34,6 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
return _psi1 return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S): def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
@ -86,13 +68,6 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1 # Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1 # _dL_dvariance 1
@ -118,13 +93,6 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2: # Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1 # _dL_dvariance 1

View file

@ -7,13 +7,6 @@ from ....util.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
from ....util import gpu_init from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -242,6 +235,10 @@ gpu_code = """
class PSICOMP_RBF_GPU(PSICOMP_RBF): class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=256, blocknum=30, GPU_direct=False): def __init__(self, threadnum=256, blocknum=30, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -264,7 +261,8 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
memo[id(self)] = s memo[id(self)] = s
return s return s
def _initGPUCache(self, N, M, Q): def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None: if self.gpuCache == None:
self.gpuCache = { self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -320,13 +318,14 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
def get_dimensions(self, Z, variational_posterior): def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1] return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
S - NxQ S - NxQ
""" """
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance) self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
@ -355,8 +354,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
else: else:
return psi0, psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3)) @Cache_this(limit=5, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -9,7 +9,7 @@ from ....util.linalg import tdot
import numpy as np import numpy as np
def psicomputations(variance, Z, variational_posterior): def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
""" """
Compute psi-statistics for ss-linear kernel Compute psi-statistics for ss-linear kernel
""" """

View file

@ -6,14 +6,7 @@ The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
import numpy as np import numpy as np
from ....util.caching import Cache_this from ....util.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -292,6 +285,11 @@ gpu_code = """
class PSICOMP_SSRBF_GPU(PSICOMP_RBF): class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -314,7 +312,8 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
memo[id(self)] = s memo[id(self)] = s
return s return s
def _initGPUCache(self, N, M, Q): def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None: if self.gpuCache == None:
self.gpuCache = { self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -377,12 +376,13 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1] return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
S - NxQ S - NxQ
""" """
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
@ -409,8 +409,10 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
else: else:
return psi0, psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3)) @Cache_this(limit=1, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -15,6 +15,5 @@ from . import caching
from . import diag from . import diag
from . import initialization from . import initialization
from . import multioutput from . import multioutput
from . import linalg_gpu
from . import parallel from . import parallel

View file

@ -16,33 +16,27 @@ try:
except: except:
pass pass
try:
if MPI_enabled and MPI.COMM_WORLD.size>1:
from .parallel import get_id_within_node
gpuid = get_id_within_node()
import pycuda.driver
pycuda.driver.init()
if gpuid>=pycuda.driver.Device.count():
print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!')
#MPI.COMM_WORLD.Abort()
raise
gpu_device = pycuda.driver.Device(gpuid)
gpu_context = gpu_device.make_context()
gpu_initialized = True
else:
import pycuda.autoinit
gpu_initialized = True
except:
pass
try:
from scikits.cuda import cublas
import scikits.cuda.linalg as culinalg
culinalg.init()
cublas_handle = cublas.cublasCreate()
except:
pass
def initGPU():
try:
if MPI_enabled and MPI.COMM_WORLD.size>1:
from .parallel import get_id_within_node
gpuid = get_id_within_node()
import pycuda.driver
pycuda.driver.init()
if gpuid>=pycuda.driver.Device.count():
print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!')
raise
gpu_device = pycuda.driver.Device(gpuid)
gpu_context = gpu_device.make_context()
gpu_initialized = True
else:
import pycuda.autoinit
gpu_initialized = True
except:
pass
def closeGPU(): def closeGPU():
if gpu_context is not None: if gpu_context is not None:
gpu_context.detach() gpu_context.detach()

View file

@ -61,12 +61,5 @@ try:
except: except:
pass pass
try:
import scikits.cuda.linalg as culinalg
from scikits.cuda import cublas
from scikits.cuda.cula import culaExceptions
except:
pass