ENH: implementing the Cholesky backpropagation through Scipy's BLAS

This commit is contained in:
David Menéndez Hurtado 2015-08-17 12:21:54 +02:00
parent 10c19d853f
commit da7e4e3af6
5 changed files with 5156 additions and 3467 deletions

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@
import numpy as np import numpy as np
from cython.parallel import prange, parallel from cython.parallel import prange, parallel
cimport numpy as np cimport numpy as np
cimport scipy.linalg.cython_blas as cblas
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M): def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
"""take a matrix N x D and return a D X M x M array where """take a matrix N x D and return a D X M x M array where
@ -20,12 +21,13 @@ def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M)) cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M))
cdef int d, m, mm cdef int d, m, mm
for d in range(D): with nogil:
count = 0 for d in range(D):
for m in range(M): count = 0
for mm in range(m+1): for m in range(M):
ret[d, m, mm] = flat[count,d] for mm in range(m+1):
count += 1 ret[d, m, mm] = flat[count,d]
count += 1
return ret return ret
def triang_to_flat(np.ndarray[double, ndim=3] L): def triang_to_flat(np.ndarray[double, ndim=3] L):
@ -33,75 +35,82 @@ def triang_to_flat(np.ndarray[double, ndim=3] L):
cdef int M = L.shape[1] cdef int M = L.shape[1]
cdef int N = M*(M+1)/2 cdef int N = M*(M+1)/2
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D)) cdef np.ndarray[double, ndim=2, mode='c'] flat = np.empty((N, D))
cdef int d, m, mm cdef int d, m, mm
for d in range(D): with nogil:
count = 0 for d in range(D):
for m in range(M): count = 0
for mm in range(m+1): for m in range(M):
flat[count,d] = L[d, m, mm] for mm in range(m+1):
count += 1 flat[count,d] = L[d, m, mm]
count += 1
return flat return flat
def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L): def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL).copy() cdef np.ndarray[double, ndim=2, mode='c'] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0] cdef int N = L.shape[0]
cdef int k, j, i cdef int k, j, i
for k in range(N - 1, -1, -1): with nogil:
for j in range(k + 1, N): for k in range(N - 1, -1, -1):
for i in range(j, N): for j in range(k + 1, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k] for i in range(j, N):
dL_dK[j, k] -= dL_dK[i, j] * L[i, k] dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
for j in range(k + 1, N): dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
dL_dK[j, k] /= L[k, k] for j in range(k + 1, N):
dL_dK[k, k] -= L[j, k] * dL_dK[j, k] dL_dK[j, k] /= L[k, k]
dL_dK[k, k] /= (2. * L[k, k]) dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK return dL_dK
def backprop_gradient_par(double[:,:] dL, double[:,:] L): def backprop_gradient_par(double[:,:] dL, double[:,:] L):
cdef double[:,:] dL_dK = np.tril(dL).copy() cdef double[:,::1] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0] cdef int N = L.shape[0]
cdef int k, j, i cdef int k, j, i
for k in range(N - 1, -1, -1): with nogil:
with nogil, parallel(): for k in range(N - 1, -1, -1):
for i in prange(k + 1, N): with parallel():
for j in range(k+1, i+1): for i in prange(k + 1, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k] for j in range(k+1, i+1):
for j in range(i, N): dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[i, k] -= dL_dK[j, i] * L[j, k] for j in range(i, N):
for j in range(k + 1, N): dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
dL_dK[j, k] /= L[k, k] for j in range(k + 1, N):
dL_dK[k, k] -= L[j, k] * dL_dK[j, k] dL_dK[j, k] /= L[k, k]
dL_dK[k, k] /= (2. * L[k, k]) dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK return dL_dK
#here's a pure C version... cdef void chol_backprop(int N, double[:] dL, double[:] L) nogil:
cdef extern from "cholesky_backprop.h" nogil: cdef int i, k, n
void chol_backprop(int N, double* dL, double* L)
# DSYMV required constant arguments
cdef double alpha=-1, beta=1
cdef int incx=1
# DSCAL required arguments
cdef double scale
dL[N*N - 1] /= (2. * L[N*N - 1])
for k in range(N-2, -1, -1):
n = N-k-1
cblas.dsymv(uplo='l', n=&n, alpha=&alpha, a=&dL[(N*(k+1) + k+1)], lda=&N, x=&L[k*N+k+1], incx=&incx,
beta=&beta, y=&dL[N*(k+1)+k], incy=&N)
for i in xrange(0, N - k - 1):
dL[N * (k + 1 + i) + k] -= dL[N * (k + 1) + k + i * (N + 1) + 1] * L[k * N + k + 1 + i]
scale = 1.0/L[k*N+k]
cblas.dscal(&n, &scale , &dL[(k+1)*N+k], &N)
dL[k*N + k] -= cblas.ddot(&n, &dL[(k+1)*N+k], &N, &L[k*N+k+1], &incx)
dL[k*N + k] /= (2.0 * L[k*N + k])
def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L): def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig cdef np.ndarray[double, ndim=2, mode='c'] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0] cdef int N = L.shape[0]
with nogil: with nogil:
chol_backprop(N, <double*> dL_dK.data, <double*> L.data) chol_backprop(N, dL_dK, L)
return dL_dK return dL_dK
cdef extern from "cholesky_backprop.h" nogil:
void old_chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c_old(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
with nogil:
old_chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK

View file

@ -1,51 +0,0 @@
#include <cblas.h>
void chol_backprop(int N, double* dL, double* L){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int i,k;
dL[N*N - 1] /= (2. * L[N*N - 1]);
for(k=N-2;k>(-1);k--){
cblas_dsymv(CblasRowMajor, CblasLower,
N-k-1, -1,
&dL[(N*(k+1) + k+1)],N,
&L[k*N+k+1],1,
1, &dL[N*(k+1)+k], N);
for(i=0;i<(N-k-1); i++){
dL[N*(k+1+i)+k] -= dL[N*(k+1)+k+i*(N+1)+1] * L[k*N+k+1+i];
}
cblas_dscal(N-k-1, 1.0/L[k*N+k], &dL[(k+1)*N+k], N);
dL[k*N + k] -= cblas_ddot(N-k-1, &dL[(k+1)*N+k], N, &L[k*N+k+1], 1);
dL[k*N + k] /= (2.0 * L[k*N + k]);
}
}
double mydot(int n, double* a, int stride_a, double* b, int stride_b){
double ret = 0;
for(int i=0; i<n; i++){
ret += a[i*stride_a]*b[i*stride_b];
}
return ret;
}
void old_chol_backprop(int N, double* dL, double* U){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int iN, kN,i,j,k;
dL[N*N-1] /= (2. * U[N*N-1]);
for(k=N-2;k>(-1);k--){
kN = k*N;
#pragma omp parallel for private(i,iN)
for(i=k+1; i<N; i++){
iN = i*N;
dL[iN+k] -= mydot(i-k, &dL[iN+k+1], 1, &U[kN+k+1], 1);
dL[iN+k] -= mydot(N-i, &dL[iN+i], N, &U[kN+i], 1);
}
for(i=(k + 1); i<N; i++){
iN = i*N;
dL[iN + k] /= U[kN + k];
dL[kN + k] -= U[kN + i] * dL[iN + k];
}
dL[kN + k] /= (2. * U[kN + k]);
}
}

View file

@ -1,5 +0,0 @@
#include <cblas.h>
void dsymv(int N, double*A, double*b, double*y);
double mydot(int n, double* a, int stride_a, double* b, int stride_b);
void chol_backprop(int N, double* dL, double* L);

View file

@ -21,9 +21,9 @@ ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
extra_compile_args=compile_flags, extra_compile_args=compile_flags,
extra_link_args = ['-lgomp']), extra_link_args = ['-lgomp']),
Extension(name='GPy.util.choleskies_cython', Extension(name='GPy.util.choleskies_cython',
sources=['GPy/util/choleskies_cython.c', 'GPy/util/cholesky_backprop.c'], sources=['GPy/util/choleskies_cython.c'],
include_dirs=[np.get_include()]+(['/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Versions/Current/Headers'] if sys.platform=='darwin' else []), include_dirs=[np.get_include()]+(['/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Versions/Current/Headers'] if sys.platform=='darwin' else []),
extra_link_args = ['-lgomp', '-lblas'], extra_link_args = ['-lgomp'],
extra_compile_args=compile_flags+['-std=c99']), extra_compile_args=compile_flags+['-std=c99']),
Extension(name='GPy.util.linalg_cython', Extension(name='GPy.util.linalg_cython',
sources=['GPy/util/linalg_cython.c'], sources=['GPy/util/linalg_cython.c'],
@ -64,7 +64,7 @@ setup(name = 'GPy',
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],
test_suite = 'GPy.testing', test_suite = 'GPy.testing',
long_description=read('README.md'), long_description=read('README.md'),
install_requires=['numpy>=1.7', 'scipy>=0.12'], install_requires=['numpy>=1.7', 'scipy>=0.16', 'cython>=0.22'],
extras_require = {'docs':['matplotlib >=1.3','Sphinx','IPython']}, extras_require = {'docs':['matplotlib >=1.3','Sphinx','IPython']},
classifiers=['License :: OSI Approved :: BSD License', classifiers=['License :: OSI Approved :: BSD License',
'Natural Language :: English', 'Natural Language :: English',