ENH: various Cython enhancements, mostly releasing the GIL when not needed

2026-05-15 06:52:39 +02:00 · 2015-08-17 14:42:39 +02:00 · 2015-08-17 14:42:39 +02:00 · a379382dd5
commit a379382dd5
parent 10c19d853f
6 changed files with 6795 additions and 4572 deletions
--- a/GPy/kern/_src/coregionalize_cython.c
+++ b/GPy/kern/_src/coregionalize_cython.c
--- a/GPy/kern/_src/coregionalize_cython.pyx
+++ b/GPy/kern/_src/coregionalize_cython.pyx
@ -1,12 +1,14 @@
-#cython: boundscheck=True
-#cython: wraparound=True
+#cython: boundscheck=False
+#cython: wraparound=False
+#cython: nonecheck=False
 import cython
 import numpy as np
 cimport numpy as np

 def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
    cdef int N = X.size
-    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
+    cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
+    with nogil:
        for n in range(N):
            for m in range(N):
                K[n, m] = B[X[n], X[m]]
@ -15,16 +17,18 @@ def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
 def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
    cdef int N = X.size
    cdef int M = X2.size
-    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
+    cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
+    with nogil:
        for n in range(N):
            for m in range(M):
                K[n, m] = B[X[n], X2[m]]
    return K

 def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
-        cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
+        cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
        cdef int N = index.size
        cdef int M = index2.size
+        with nogil:
            for i in range(N):
                for j in range(M):
                    dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
--- a/GPy/kern/_src/stationary_cython.c
+++ b/GPy/kern/_src/stationary_cython.c
--- a/GPy/kern/_src/stationary_cython.pyx
+++ b/GPy/kern/_src/stationary_cython.pyx
@ -4,14 +4,15 @@
 import numpy as np
 cimport numpy as np
 from cython.parallel import prange
+cimport cython

 ctypedef np.float64_t DTYPE_t
 
 cdef extern from "stationary_utils.h":
-    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
+    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil

 cdef extern from "stationary_utils.h":
-    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
+    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
 
 def grad_X(int N, int D, int M,
        np.ndarray[DTYPE_t, ndim=2] _X,
@ -22,8 +23,10 @@ def grad_X(int N, int D, int M,
    cdef double *X2 = <double*> _X2.data
    cdef double *tmp = <double*> _tmp.data
    cdef double *grad = <double*> _grad.data
+    with nogil:
        _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.

+@cython.cdivision(True)
 def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
    cdef int n,d,nd,m
    for nd in prange(N * D, nogil=True):
@ -33,8 +36,6 @@ def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:
        for m in range(M):
            grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])

-
-
 def lengthscale_grads_in_c(int N, int M, int Q,
        np.ndarray[DTYPE_t, ndim=2] _tmp,
        np.ndarray[DTYPE_t, ndim=2] _X,
@ -44,16 +45,16 @@ def lengthscale_grads_in_c(int N, int M, int Q,
    cdef double *X = <double*> _X.data
    cdef double *X2 = <double*> _X2.data
    cdef double *grad = <double*> _grad.data
+    with nogil:
        _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.

 def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
    cdef int q, n, m
    cdef double gradq, dist
+    with nogil:
        for q in range(Q):
            grad[q] = 0.0
            for n in range(N):
                for m in range(M):
                    dist = X[n,q] - X2[m,q]
                    grad[q] += tmp[n, m] * dist * dist
-
-
--- a/GPy/util/linalg_cython.c
+++ b/GPy/util/linalg_cython.c
--- a/GPy/util/linalg_cython.pyx
+++ b/GPy/util/linalg_cython.pyx
@ -1,3 +1,4 @@
+from libc.math cimport sqrt
 cimport numpy as np
 from cpython cimport bool
 import cython
@ -19,16 +20,18 @@ def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
+@cython.cdivision(True)
 def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
-    cdef double r
-    cdef double c
-    cdef double s
+    cdef double r, c, s
+    cdef int j, i
+
+    with nogil:
        for j in xrange(N):
-        r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
+            r = sqrt(L[j, j] * L[j, j] + x[j] * x[j])
            c = r / L[j, j]
            s = x[j] / L[j, j]
            L[j, j] = r
            for i in xrange(j):
                L[i, j] = (L[i, j] + s * x[i]) / c
-            x[i] = c*x[i] - s*L[i,j];
-        r = np.sqrt(L[j,j])
+                x[i] = c * x[i] - s * L[i, j]
+            r = sqrt(L[j, j])