parallelizing backprop of cholesky

This commit is contained in:
James Hensman 2015-06-08 15:25:16 +01:00
parent 59771c8956
commit 04c14a9b4c
5 changed files with 14239 additions and 123 deletions

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@
# Copyright James Hensman and Alan Saul 2015
import numpy as np
from cython.parallel import prange, parallel
cimport numpy as np
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
@ -57,3 +58,30 @@ def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
def backprop_gradient_par(double[:,:] dL, double[:,:] L):
cdef double[:,:] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0]
cdef int k, j, i
for k in range(N - 1, -1, -1):
with nogil, parallel():
for i in prange(k + 1, N):
for j in range(k+1, i+1):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
for j in range(i, N):
dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
cdef extern from "cholesky_backprop.h":
void chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK

View file

@ -0,0 +1,22 @@
void chol_backprop(int N, double* dL, double* L){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int i,j,k;
for(k=N-1;k>(-1);k--){
#pragma omp parallel for private(i,j)
for(i=k+1;i<N; i++){
for(j=k+1;j<(i+1);j++){
dL[i*N + k] -= dL[i *N + j] * L[j*N + k];
}
for(j=i;j<N;j++){
dL[i*N + k] -= dL[j*N + i] * L[j*N +k];
}
}
for(i=k + 1; i<N; i++){
dL[i*N + k] /= L[k*N + k];
dL[k*N + k] -= L[i*N + k] * dL[i*N + k];
}
dL[k*N + k] /= (2. * L[k*N + k]);
}
}

View file

@ -0,0 +1 @@
void chol_backprop(int N, double* dL, double* L);