mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
rbf kernel gpu implementation in progress
This commit is contained in:
parent
0b75aa8b0f
commit
db9b9bc857
4 changed files with 467 additions and 8 deletions
454
GPy/kern/_src/psi_comp/rbf_psi_gpucomp.py
Normal file
454
GPy/kern/_src/psi_comp/rbf_psi_gpucomp.py
Normal file
|
|
@ -0,0 +1,454 @@
|
||||||
|
"""
|
||||||
|
The module for psi-statistics for RBF kernel
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from GPy.util.caching import Cacher
|
||||||
|
from . import PSICOMP_RBF
|
||||||
|
from ....util import gpu_init
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pycuda.gpuarray as gpuarray
|
||||||
|
from pycuda.compiler import SourceModule
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
gpu_code = """
|
||||||
|
// define THREADNUM
|
||||||
|
|
||||||
|
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||||
|
#define IDX_NQ(n,q) (q*N+n)
|
||||||
|
#define IDX_NM(n,m) (m*N+n)
|
||||||
|
#define IDX_MQ(m,q) (q*M+m)
|
||||||
|
#define IDX_MM(m1,m2) (m2*M+m1)
|
||||||
|
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
|
||||||
|
#define IDX_QB(q,b) (b*Q+q)
|
||||||
|
|
||||||
|
// Divide data evenly
|
||||||
|
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
|
||||||
|
int residue = (total_data)%psize;
|
||||||
|
if(pidx<residue) {
|
||||||
|
int size = total_data/psize+1;
|
||||||
|
*start = size*pidx;
|
||||||
|
*end = *start+size;
|
||||||
|
} else {
|
||||||
|
int size = total_data/psize;
|
||||||
|
*start = size*pidx+residue;
|
||||||
|
*end = *start+size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void reduce_sum(double* array, int array_size) {
|
||||||
|
int s;
|
||||||
|
if(array_size >= blockDim.x) {
|
||||||
|
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
|
||||||
|
array[threadIdx.x] += array[i];
|
||||||
|
}
|
||||||
|
array_size = blockDim.x;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
for(int i=1; i<=array_size;i*=2) {s=i;}
|
||||||
|
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
|
||||||
|
__syncthreads();
|
||||||
|
for(s=s/2;s>=1;s=s/2) {
|
||||||
|
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void psi1computations(double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
|
||||||
|
{
|
||||||
|
int m_start, m_end;
|
||||||
|
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
|
||||||
|
|
||||||
|
for(int m=m_start; m<m_end; m++) {
|
||||||
|
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
|
||||||
|
double log_psi1 = 0;
|
||||||
|
for(int q=0;q<Q;q++) {
|
||||||
|
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
|
||||||
|
log_psi1 += (muZ*muZ/(S[IDX_NQ(n,q)]+l[q]))/(-2.);
|
||||||
|
log_psi1 += log(S[IDX_NQ(n,q)]/l[q]+1)/(-2.);
|
||||||
|
}
|
||||||
|
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void psi2computations(double *psi2, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
|
||||||
|
{
|
||||||
|
int psi2_idx_start, psi2_idx_end;
|
||||||
|
__shared__ double psi2_local[THREADNUM];
|
||||||
|
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
|
||||||
|
|
||||||
|
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
|
||||||
|
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
|
||||||
|
int m2 = psi2_idx - (m1+1)*m1/2;
|
||||||
|
|
||||||
|
psi2_local[threadIdx.x] = 0;
|
||||||
|
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
|
||||||
|
double log_psi2_n = 0;
|
||||||
|
for(int q=0;q<Q;q++) {
|
||||||
|
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
|
||||||
|
double muZhat = mu[IDX_NQ(n,q)]- (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.;
|
||||||
|
log_psi2_n += dZ*dZ/(-4.*l[q])-muZhat*muZhat/(2.*S[IDX_NQ(n,q)]+l[q]);
|
||||||
|
log_psi2_n += log(2.*S[IDX_NQ(n,q)]/l[q]+1)/(-2.);
|
||||||
|
}
|
||||||
|
psi2_local[threadIdx.x] += exp(log_psi2_n);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
reduce_sum(psi2_local, THREADNUM);
|
||||||
|
if(threadIdx.x==0) {
|
||||||
|
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
|
||||||
|
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi1, double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
|
||||||
|
{
|
||||||
|
int m_start, m_end;
|
||||||
|
__shared__ double g_local[THREADNUM];
|
||||||
|
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
|
||||||
|
int P = int(ceil(double(N)/THREADNUM));
|
||||||
|
|
||||||
|
double dvar_local = 0;
|
||||||
|
for(int q=0;q<Q;q++) {
|
||||||
|
double dl_local = 0;
|
||||||
|
for(int p=0;p<P;p++) {
|
||||||
|
int n = p*THREADNUM + threadIdx.x;
|
||||||
|
double dmu_local = 0;
|
||||||
|
double dS_local = 0;
|
||||||
|
for(int m=m_start; m<m_end; m++) {
|
||||||
|
if(n<N) {
|
||||||
|
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
|
||||||
|
if(q==0) {dvar_local += lpsi1;}
|
||||||
|
|
||||||
|
double Snq = S[IDX_NQ(n,q)];
|
||||||
|
double lq = l[q];
|
||||||
|
double Zmu = Z[IDX_MQ(m,q)] - mu[IDX_NQ(n,q)];
|
||||||
|
double denom = Snq+lq;
|
||||||
|
double Zmu2_denom = Zmu*Zmu/denom;
|
||||||
|
|
||||||
|
dmu_local += lpsi1*Zmu/denom;
|
||||||
|
dS_local += lpsi1*(Zmu2_denom-1.)/denom;
|
||||||
|
dl_local += lpsi1*(Zmu2_denom+Snq/lq)/denom*sqrt(lq);
|
||||||
|
g_local[threadIdx.x] = -lpsi1*Zmu/denom;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
|
||||||
|
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
|
||||||
|
}
|
||||||
|
if(n<N) {
|
||||||
|
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
|
||||||
|
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
|
||||||
|
}
|
||||||
|
__threadfence_block();
|
||||||
|
}
|
||||||
|
g_local[threadIdx.x] = dl_local;
|
||||||
|
__syncthreads();
|
||||||
|
reduce_sum(g_local, THREADNUM);
|
||||||
|
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
|
||||||
|
}
|
||||||
|
g_local[threadIdx.x] = dvar_local;
|
||||||
|
__syncthreads();
|
||||||
|
reduce_sum(g_local, THREADNUM);
|
||||||
|
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
assert gpu_init.initSuccess, "GPU initialization failed!"
|
||||||
|
self.cublas_handle = gpu_init.cublas_handle
|
||||||
|
self.gpuCache = None
|
||||||
|
|
||||||
|
self.threadnum = 128
|
||||||
|
self.blocknum = 3
|
||||||
|
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
|
||||||
|
self.g_psi1computations = module.get_function('psi1computations')
|
||||||
|
self.g_psi2computations = module.get_function('psi2computations')
|
||||||
|
self.g_psi1compDer = module.get_function('psi1compDer')
|
||||||
|
|
||||||
|
def _initGPUCache(self, N, M, Q):
|
||||||
|
if self.gpuCache == None:
|
||||||
|
self.gpuCache = {
|
||||||
|
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||||
|
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||||
|
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||||
|
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||||
|
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
|
||||||
|
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
|
||||||
|
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
|
||||||
|
# derivatives
|
||||||
|
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
|
||||||
|
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
|
||||||
|
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
|
||||||
|
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
|
||||||
|
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
|
||||||
|
# gradients
|
||||||
|
'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||||
|
'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def sync_params(self, lengthscale, Z, mu, S):
|
||||||
|
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
|
||||||
|
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
|
||||||
|
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
|
||||||
|
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
|
||||||
|
|
||||||
|
def reset_derivative(self):
|
||||||
|
self.gpuCache['dvar_gpu'].fill(0.)
|
||||||
|
self.gpuCache['dl_gpu'].fill(0.)
|
||||||
|
self.gpuCache['dZ_gpu'].fill(0.)
|
||||||
|
self.gpuCache['dmu_gpu'].fill(0.)
|
||||||
|
self.gpuCache['dS_gpu'].fill(0.)
|
||||||
|
|
||||||
|
# @Cache_this(limit=1, ignore_args=(0,))
|
||||||
|
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi0, psi1 and psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi1 NxM
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
N = mu.shape[0]
|
||||||
|
M = Z.shape[0]
|
||||||
|
Q = Z.shape[1]
|
||||||
|
self._initGPUCache(N,M,Q)
|
||||||
|
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
|
||||||
|
psi0_gpu = self.gpuCache['psi0_gpu']
|
||||||
|
psi1_gpu = self.gpuCache['psi1_gpu']
|
||||||
|
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||||
|
l_gpu = self.gpuCache['l_gpu']
|
||||||
|
Z_gpu = self.gpuCache['Z_gpu']
|
||||||
|
mu_gpu = self.gpuCache['mu_gpu']
|
||||||
|
S_gpu = self.gpuCache['S_gpu']
|
||||||
|
|
||||||
|
psi0_gpu.fill(variance)
|
||||||
|
self.g_psi1computations(psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
|
||||||
|
self.g_psi2computations(psi2_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
|
||||||
|
return psi0_gpu.get(), psi1_gpu.get(), psi2_gpu.get()
|
||||||
|
|
||||||
|
psi0 = np.empty(mu.shape[0])
|
||||||
|
psi0[:] = variance
|
||||||
|
|
||||||
|
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
|
self.g_psi1computations(psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
|
||||||
|
psi1g = psi1_gpu.get()
|
||||||
|
print np.abs(psi1-psi1g).max()
|
||||||
|
|
||||||
|
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
||||||
|
self.g_psi2computations(psi2_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
|
||||||
|
psi2g = psi2_gpu.get()
|
||||||
|
print np.abs(psi2-psi2g).max()
|
||||||
|
|
||||||
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
|
# @Cache_this(limit=1, ignore_args=(0,1,2,3))
|
||||||
|
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||||
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
||||||
|
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
N = mu.shape[0]
|
||||||
|
M = Z.shape[0]
|
||||||
|
Q = Z.shape[1]
|
||||||
|
psi0_gpu = self.gpuCache['psi0_gpu']
|
||||||
|
psi1_gpu = self.gpuCache['psi1_gpu']
|
||||||
|
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||||
|
l_gpu = self.gpuCache['l_gpu']
|
||||||
|
Z_gpu = self.gpuCache['Z_gpu']
|
||||||
|
mu_gpu = self.gpuCache['mu_gpu']
|
||||||
|
S_gpu = self.gpuCache['S_gpu']
|
||||||
|
dvar_gpu = self.gpuCache['dvar_gpu']
|
||||||
|
dl_gpu = self.gpuCache['dl_gpu']
|
||||||
|
dZ_gpu = self.gpuCache['dZ_gpu']
|
||||||
|
dmu_gpu = self.gpuCache['dmu_gpu']
|
||||||
|
dS_gpu = self.gpuCache['dS_gpu']
|
||||||
|
|
||||||
|
dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
||||||
|
self.reset_derivative()
|
||||||
|
self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1))
|
||||||
|
dZg = dZ_gpu.get()
|
||||||
|
print dZ_psi1, dZg
|
||||||
|
# print np.abs(dvar_psi1-dvarg).max()
|
||||||
|
|
||||||
|
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||||
|
|
||||||
|
dL_dlengscale = dl_psi1 + dl_psi2
|
||||||
|
if not ARD:
|
||||||
|
dL_dlengscale = dL_dlengscale.sum()
|
||||||
|
|
||||||
|
dL_dmu = dmu_psi1 + dmu_psi2
|
||||||
|
dL_dS = dS_psi1 + dS_psi2
|
||||||
|
dL_dZ = dZ_psi1 + dZ_psi2
|
||||||
|
|
||||||
|
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||||
|
|
||||||
|
|
||||||
|
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi0, psi1 and psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi1 NxM
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
|
||||||
|
psi0 = np.empty(mu.shape[0])
|
||||||
|
psi0[:] = variance
|
||||||
|
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
|
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
||||||
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
|
def __psi1computations(variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi1
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi1 NxM
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
# psi1
|
||||||
|
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
|
||||||
|
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
|
||||||
|
_psi1 = variance*np.exp(_psi1_log)
|
||||||
|
|
||||||
|
return _psi1
|
||||||
|
|
||||||
|
def __psi2computations(variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi2 MxM
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
|
||||||
|
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
|
||||||
|
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
|
||||||
|
denom = 1./(2.*S+lengthscale2)
|
||||||
|
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
|
||||||
|
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
||||||
|
|
||||||
|
|
||||||
|
return _psi2
|
||||||
|
|
||||||
|
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||||
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
||||||
|
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
|
||||||
|
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||||
|
|
||||||
|
dL_dlengscale = dl_psi1 + dl_psi2
|
||||||
|
if not ARD:
|
||||||
|
dL_dlengscale = dL_dlengscale.sum()
|
||||||
|
|
||||||
|
dL_dmu = dmu_psi1 + dmu_psi2
|
||||||
|
dL_dS = dS_psi1 + dS_psi2
|
||||||
|
dL_dZ = dZ_psi1 + dZ_psi2
|
||||||
|
|
||||||
|
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||||
|
|
||||||
|
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
dL_dpsi1 - NxM
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi1
|
||||||
|
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||||
|
# _dL_dvariance 1
|
||||||
|
# _dL_dlengthscale Q
|
||||||
|
# _dL_dZ MxQ
|
||||||
|
# _dL_dgamma NxQ
|
||||||
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
|
Lpsi1 = dL_dpsi1*_psi1
|
||||||
|
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
||||||
|
denom = 1./(S+lengthscale2)
|
||||||
|
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
||||||
|
_dL_dvar = Lpsi1.sum()/variance
|
||||||
|
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
|
||||||
|
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
|
||||||
|
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
|
||||||
|
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
|
||||||
|
|
||||||
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
|
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
dL_dpsi2 - MxM
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi2
|
||||||
|
# Produced the derivatives w.r.t. psi2:
|
||||||
|
# _dL_dvariance 1
|
||||||
|
# _dL_dlengthscale Q
|
||||||
|
# _dL_dZ MxQ
|
||||||
|
# _dL_dgamma NxQ
|
||||||
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
denom = 1./(2*S+lengthscale2)
|
||||||
|
denom2 = np.square(denom)
|
||||||
|
|
||||||
|
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||||
|
|
||||||
|
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
|
||||||
|
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
||||||
|
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
||||||
|
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
Lpsi2Zhat = Lpsi2Z
|
||||||
|
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
|
||||||
|
|
||||||
|
_dL_dvar = Lpsi2sum.sum()*2/variance
|
||||||
|
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
||||||
|
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
||||||
|
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
||||||
|
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||||
|
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
||||||
|
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
||||||
|
|
||||||
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
|
_psi1computations = Cacher(__psi1computations, limit=1)
|
||||||
|
_psi2computations = Cacher(__psi2computations, limit=1)
|
||||||
|
|
@ -9,6 +9,7 @@ from stationary import Stationary
|
||||||
from GPy.util.caching import Cache_this
|
from GPy.util.caching import Cache_this
|
||||||
from ...core.parameterization import variational
|
from ...core.parameterization import variational
|
||||||
from psi_comp import PSICOMP_RBF
|
from psi_comp import PSICOMP_RBF
|
||||||
|
from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
|
||||||
from ...util.config import *
|
from ...util.config import *
|
||||||
|
|
||||||
class RBF(Stationary):
|
class RBF(Stationary):
|
||||||
|
|
@ -26,6 +27,8 @@ class RBF(Stationary):
|
||||||
self.weave_options = {}
|
self.weave_options = {}
|
||||||
self.group_spike_prob = False
|
self.group_spike_prob = False
|
||||||
self.psicomp = PSICOMP_RBF()
|
self.psicomp = PSICOMP_RBF()
|
||||||
|
if self.useGPU:
|
||||||
|
self.psicomp = PSICOMP_RBF_GPU()
|
||||||
|
|
||||||
def K_of_r(self, r):
|
def K_of_r(self, r):
|
||||||
return self.variance * np.exp(-0.5 * r**2)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
@ -39,25 +42,27 @@ class RBF(Stationary):
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
||||||
|
|
||||||
def psi1(self, Z, variational_posterior):
|
def psi1(self, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
||||||
|
self.variance.gradient = dL_dvar
|
||||||
|
self.lengthscale.gradient = dL_dlengscale
|
||||||
else:
|
else:
|
||||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
||||||
self.variance.gradient = dL_dvar
|
self.variance.gradient = dL_dvar
|
||||||
|
|
@ -65,12 +70,12 @@ class RBF(Stationary):
|
||||||
|
|
||||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ class TruncLinear(Kern):
|
||||||
return XX
|
return XX
|
||||||
|
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
return self.variances*(np.square(X-self.delta)).sum(axis=-1)
|
return (self.variances*np.square(X-self.delta)).sum(axis=-1)
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
dK_dvar = self._product(X, X2)
|
dK_dvar = self._product(X, X2)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue