ENH: various Cython enhancements, mostly releasing the GIL when not needed

2026-05-12 13:32:39 +02:00 · 2015-08-17 14:42:39 +02:00 · 2015-08-17 14:42:39 +02:00 · a379382dd5
commit a379382dd5
parent 10c19d853f
6 changed files with 6795 additions and 4572 deletions
--- a/GPy/kern/_src/coregionalize_cython.c
+++ b/GPy/kern/_src/coregionalize_cython.c
--- a/GPy/kern/_src/coregionalize_cython.pyx
+++ b/GPy/kern/_src/coregionalize_cython.pyx
@ -1,33 +1,37 @@
-#cython: boundscheck=True
+#cython: boundscheck=False
-#cython: wraparound=True
+#cython: wraparound=False
 #cython: nonecheck=False
 import cython
 import numpy as np
 cimport numpy as np
 def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
    cdef int N = X.size
-    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
+    cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
-    for n in range(N):
+    with nogil:
-        for m in range(N):
+        for n in range(N):
-            K[n,m] = B[X[n],X[m]]
+            for m in range(N):
                K[n, m] = B[X[n], X[m]]
    return K
 def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
    cdef int N = X.size
    cdef int M = X2.size
-    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
+    cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
-    for n in range(N):
+    with nogil:
-        for m in range(M):
+        for n in range(N):
-            K[n,m] = B[X[n],X2[m]]
+            for m in range(M):
                K[n, m] = B[X[n], X2[m]]
    return K
 def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
-        cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
+        cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
        cdef int N = index.size
        cdef int M = index2.size
-        for i in range(N):
+        with nogil:
-            for j in range(M):
+            for i in range(N):
-                dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
+                for j in range(M):
                    dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
        return dL_dK_small
--- a/GPy/kern/_src/stationary_cython.c
+++ b/GPy/kern/_src/stationary_cython.c
--- a/GPy/kern/_src/stationary_cython.pyx
+++ b/GPy/kern/_src/stationary_cython.pyx
@ -4,14 +4,15 @@
 import numpy as np
 cimport numpy as np
 from cython.parallel import prange
 cimport cython
 ctypedef np.float64_t DTYPE_t
 cdef extern from "stationary_utils.h":
-    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
+    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil
 cdef extern from "stationary_utils.h":
-    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
+    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
 def grad_X(int N, int D, int M,
        np.ndarray[DTYPE_t, ndim=2] _X,
@ -22,18 +23,18 @@ def grad_X(int N, int D, int M,
    cdef double *X2 = <double*> _X2.data
    cdef double *tmp = <double*> _tmp.data
    cdef double *grad = <double*> _grad.data
-    _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
+    with nogil:
        _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
@cython.cdivision(True)
 def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
    cdef int n,d,nd,m
-    for nd in prange(N*D, nogil=True):
+    for nd in prange(N * D, nogil=True):
-        n = nd/D
+        n = nd / D
-        d = nd%D
+        d = nd % D
        grad[n,d] = 0.0
        for m in range(M):
-            grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
+            grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])
 def lengthscale_grads_in_c(int N, int M, int Q,
        np.ndarray[DTYPE_t, ndim=2] _tmp,
@ -44,16 +45,16 @@ def lengthscale_grads_in_c(int N, int M, int Q,
    cdef double *X = <double*> _X.data
    cdef double *X2 = <double*> _X2.data
    cdef double *grad = <double*> _grad.data
-    _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
+    with nogil:
        _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
 def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
    cdef int q, n, m
    cdef double gradq, dist
-    for q in range(Q):
+    with nogil:
-        grad[q] = 0.0
+        for q in range(Q):
-        for n in range(N):
+            grad[q] = 0.0
-            for m in range(M):
+            for n in range(N):
-                dist = X[n,q] - X2[m,q]
+                for m in range(M):
-                grad[q] += tmp[n,m]*dist*dist
+                    dist = X[n,q] - X2[m,q]
-
+                    grad[q] += tmp[n, m] * dist * dist
--- a/GPy/util/linalg_cython.c
+++ b/GPy/util/linalg_cython.c
--- a/GPy/util/linalg_cython.pyx
+++ b/GPy/util/linalg_cython.pyx
@ -1,3 +1,4 @@
 from libc.math cimport sqrt
 cimport numpy as np
 from cpython cimport bool
 import cython
@ -19,16 +20,18 @@ def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.cdivision(True)
 def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
-    cdef double r
+    cdef double r, c, s
-    cdef double c
+    cdef int j, i
-    cdef double s
+
-    for j in xrange(N):
+    with nogil:
-        r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
+        for j in xrange(N):
-        c = r / L[j,j]
+            r = sqrt(L[j, j] * L[j, j] + x[j] * x[j])
-        s = x[j] / L[j,j]
+            c = r / L[j, j]
-        L[j,j] = r
+            s = x[j] / L[j, j]
-        for i in xrange(j):
+            L[j, j] = r
-            L[i,j] = (L[i,j] + s*x[i])/c
+            for i in xrange(j):
-            x[i] = c*x[i] - s*L[i,j];
+                L[i, j] = (L[i, j] + s * x[i]) / c
-        r = np.sqrt(L[j,j])
+                x[i] = c * x[i] - s * L[i, j]
            r = sqrt(L[j, j])