Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-07 02:52:40 +02:00 · 2015-04-30 15:28:27 +02:00 · 2015-04-30 15:28:27 +02:00 · 139fda270c
commit 139fda270c
parent 435308d5da 55b77064d4
33 changed files with 26259 additions and 430 deletions
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -5,12 +5,8 @@ from .kern import Kern
 import numpy as np
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
-from ...util.config import config # for assesing whether to use weave
-
-try:
-    from scipy import weave
-except ImportError:
-    config.set('weave', 'working', 'False')
+from ...util.config import config # for assesing whether to use cython
+import coregionalize_cython

 class Coregionalize(Kern):
    """
@ -61,13 +57,8 @@ class Coregionalize(Kern):
        self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)

    def K(self, X, X2=None):
-        if config.getboolean('weave', 'working'):
-            try:
-                return self._K_weave(X, X2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                return self._K_numpy(X, X2)
+        if config.getboolean('cython', 'working'):
+            return self._K_cython(X, X2)
        else:
            return self._K_numpy(X, X2)

@ -80,36 +71,10 @@ class Coregionalize(Kern):
            index2 = np.asarray(X2, dtype=np.int)
            return self.B[index,index2.T]

-    def _K_weave(self, X, X2=None):
-        """compute the kernel function using scipy.weave"""
-        index = np.asarray(X, dtype=np.int)
-
+    def _K_cython(self, X, X2=None):
        if X2 is None:
-            target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
-            code="""
-            for(int i=0;i<N; i++){
-              target[i+i*N] = B[index[i]+output_dim*index[i]];
-              for(int j=0; j<i; j++){
-                  target[j+i*N] = B[index[i]+output_dim*index[j]];
-                  target[i+j*N] = target[j+i*N];
-                }
-              }
-            """
-            N, B, output_dim = index.size, self.B, self.output_dim
-            weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
-        else:
-            index2 = np.asarray(X2, dtype=np.int)
-            target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
-            code="""
-            for(int i=0;i<num_inducing; i++){
-              for(int j=0; j<N; j++){
-                  target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
-                }
-              }
-            """
-            N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
-            weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
-        return target
+            return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
+        return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])


    def Kdiag(self, X):
@ -122,19 +87,13 @@ class Coregionalize(Kern):
        else:
            index2 = np.asarray(X2, dtype=np.int)

-        #attempt to use weave for a nasty double indexing loop: fall back to numpy
-        if config.getboolean('weave', 'working'):
-            try:
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
+        #attempt to use cython for a nasty double indexing loop: fall back to numpy
+        if config.getboolean('cython', 'working'):
+            dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
        else:
            dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)


-
        dkappa = np.diag(dL_dK_small)
        dL_dK_small += dL_dK_small.T
        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -142,19 +101,6 @@ class Coregionalize(Kern):
        self.W.gradient = dW
        self.kappa.gradient = dkappa

-    def _gradient_reduce_weave(self, dL_dK, index, index2):
-        dL_dK_small = np.zeros_like(self.B)
-        code="""
-        for(int i=0; i<num_inducing; i++){
-          for(int j=0; j<N; j++){
-            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
-          }
-        }
-        """
-        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
-        weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
-        return dL_dK_small
-
    def _gradient_reduce_numpy(self, dL_dK, index, index2):
        index, index2 = index[:,0], index2[:,0]
        dL_dK_small = np.zeros_like(self.B)
@ -164,6 +110,11 @@ class Coregionalize(Kern):
                dL_dK_small[j,i] = tmp1[:,index2==j].sum()
        return dL_dK_small

+    def _gradient_reduce_cython(self, dL_dK, index, index2):
+        index, index2 = index[:,0], index2[:,0]
+        return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
+
+
    def update_gradients_diag(self, dL_dKdiag, X):
        index = np.asarray(X, dtype=np.int).flatten()
        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
--- a/GPy/kern/_src/coregionalize_cython.c
+++ b/GPy/kern/_src/coregionalize_cython.c
--- a/GPy/kern/_src/coregionalize_cython.pyx
+++ b/GPy/kern/_src/coregionalize_cython.pyx
@ -0,0 +1,34 @@
+#cython: boundscheck=True
+#cython: wraparound=True
+import cython
+import numpy as np
+cimport numpy as np
+
+def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
+    cdef int N = X.size
+    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
+    for n in range(N):
+        for m in range(N):
+            K[n,m] = B[X[n],X[m]]
+    return K
+
+def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
+    cdef int N = X.size
+    cdef int M = X2.size
+    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
+    for n in range(N):
+        for m in range(M):
+            K[n,m] = B[X[n],X2[m]]
+    return K
+
+def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
+        cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
+        cdef int N = index.size
+        cdef int M = index2.size
+        for i in range(N):
+            for j in range(M):
+                dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
+        return dL_dK_small
+
+
+
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -9,13 +9,15 @@ from ...util.linalg import tdot
 from ... import util
 import numpy as np
 from scipy import integrate
-from ...util.config import config # for assesing whether to use weave
+from ...util.config import config # for assesing whether to use cython
 from ...util.caching import Cache_this

 try:
-    from scipy import weave
+    import stationary_cython
 except ImportError:
-    config.set('weave', 'working', 'False')
+    print('warning: failed to import cython module: falling back to numpy')
+    config.set('cython', 'working', 'false')
+

 class Stationary(Kern):
    """
@ -153,28 +155,18 @@ class Stationary(Kern):
        (dL_dK), compute the gradient wrt the parameters of this kernel,
        and store in the parameters object as e.g. self.variance.gradient
        """
-        self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
+        self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance

        #now the lengthscale gradient(s)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        if self.ARD:
-            #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
-            #d =  X[:, None, :] - X2[None, :, :]
-            #x_xl3 = np.square(d)
-            #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
+
            tmp = dL_dr*self._inv_dist(X, X2)
            if X2 is None: X2 = X
-
-
-            if config.getboolean('weave', 'working'):
-                try:
-                    self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
-                except:
-                    print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                    config.set('weave', 'working', 'False')
-                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
+            if config.getboolean('cython', 'working'):
+                self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
            else:
-                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
+                self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
        else:
            r = self._scaled_dist(X, X2)
            self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -189,43 +181,27 @@ class Stationary(Kern):
        dist = self._scaled_dist(X, X2).copy()
        return 1./np.where(dist != 0., dist, np.inf)

-    def weave_lengthscale_grads(self, tmp, X, X2):
-        """Use scipy.weave to compute derivatives wrt the lengthscales"""
+    def _lengthscale_grads_pure(self, tmp, X, X2):
+        return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
+
+    def _lengthscale_grads_cython(self, tmp, X, X2):
        N,M = tmp.shape
-        Q = X.shape[1]
-        if hasattr(X, 'values'):X = X.values
-        if hasattr(X2, 'values'):X2 = X2.values
+        Q = self.input_dim
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
        grads = np.zeros(self.input_dim)
-        code = """
-        double gradq;
-        for(int q=0; q<Q; q++){
-          gradq = 0;
-          for(int n=0; n<N; n++){
-            for(int m=0; m<M; m++){
-              gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
-            }
-          }
-          grads(q) = gradq;
-        }
-        """
-        weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
+        stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
        return -grads/self.lengthscale**3

    def gradients_X(self, dL_dK, X, X2=None):
        """
        Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
        """
-        if config.getboolean('weave', 'working'):
-            try:
-                return self.gradients_X_weave(dL_dK, X, X2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                return self.gradients_X_(dL_dK, X, X2)
+        if config.getboolean('cython', 'working'):
+            return self._gradients_X_cython(dL_dK, X, X2)
        else:
-            return self.gradients_X_(dL_dK, X, X2)
+            return self._gradients_X_pure(dL_dK, X, X2)

-    def gradients_X_(self, dL_dK, X, X2=None):
+    def _gradients_X_pure(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
@ -235,54 +211,25 @@ class Stationary(Kern):

        #The high-memory numpy way:
        #d =  X[:, None, :] - X2[None, :, :]
-        #ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
+        #grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2

        #the lower memory way with a loop
-        ret = np.empty(X.shape, dtype=np.float64)
+        grad = np.empty(X.shape, dtype=np.float64)
        for q in range(self.input_dim):
-            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
-        ret /= self.lengthscale**2
+            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
+        return grad/self.lengthscale**2

-        return ret
-
-    def gradients_X_weave(self, dL_dK, X, X2=None):
+    def _gradients_X_cython(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
        if X2 is None:
            tmp = tmp + tmp.T
            X2 = X
-
-        code = """
-        int n,m,d;
-        double retnd;
-        #pragma omp parallel for private(n,d, retnd, m)
-        for(d=0;d<D;d++){
-          for(n=0;n<N;n++){
-            retnd = 0.0;
-            for(m=0;m<M;m++){
-              retnd += tmp(n,m)*(X(n,d)-X2(m,d));
-            }
-            ret(n,d) = retnd;
-          }
-        }
-
-        """
-        if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
-        if hasattr(X2, 'values'):X2 = X2.values
-        ret = np.zeros(X.shape)
-        N,D = X.shape
-        N,M = tmp.shape
-        from scipy import weave
-        support_code = """
-        #include <omp.h>
-        #include <stdio.h>
-        """
-        weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
-                         'extra_link_args'   : ['-lgomp']}
-        weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
-        return ret/self.lengthscale**2
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
+        grad = np.zeros(X.shape)
+        stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
+        return grad/self.lengthscale**2

    def gradients_X_diag(self, dL_dKdiag, X):
        return np.zeros(X.shape)
@ -290,6 +237,9 @@ class Stationary(Kern):
    def input_sensitivity(self, summarize=True):
        return self.variance*np.ones(self.input_dim)/self.lengthscale**2

+
+
+
 class Exponential(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
--- a/GPy/kern/_src/stationary_cython.c
+++ b/GPy/kern/_src/stationary_cython.c
--- a/GPy/kern/_src/stationary_cython.pyx
+++ b/GPy/kern/_src/stationary_cython.pyx
@ -0,0 +1,36 @@
+#cython: boundscheck=False
+#cython: wraparound=False
+import numpy as np
+cimport numpy as np
+
+ctypedef np.float64_t DTYPE_t
+ 
+cdef extern from "stationary_utils.h":
+    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
+
+cdef extern from "stationary_utils.h":
+    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
+ 
+def grad_X(int N, int D, int M,
+        np.ndarray[DTYPE_t, ndim=2] _X,
+        np.ndarray[DTYPE_t, ndim=2] _X2,
+        np.ndarray[DTYPE_t, ndim=2] _tmp,
+        np.ndarray[DTYPE_t, ndim=2] _grad):
+    cdef double *X = <double*> _X.data
+    cdef double *X2 = <double*> _X2.data
+    cdef double *tmp = <double*> _tmp.data
+    cdef double *grad = <double*> _grad.data
+    _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
+
+def lengthscale_grads(int N, int M, int Q,
+        np.ndarray[DTYPE_t, ndim=2] _tmp,
+        np.ndarray[DTYPE_t, ndim=2] _X,
+        np.ndarray[DTYPE_t, ndim=2] _X2,
+        np.ndarray[DTYPE_t, ndim=1] _grad):
+    cdef double *tmp = <double*> _tmp.data
+    cdef double *X = <double*> _X.data
+    cdef double *X2 = <double*> _X2.data
+    cdef double *grad = <double*> _grad.data
+    _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
+
+
--- a/GPy/kern/_src/stationary_utils.c
+++ b/GPy/kern/_src/stationary_utils.c
@ -0,0 +1,35 @@
+void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
+int n,m,d;
+double retnd;
+//#pragma omp parallel for private(n,d, retnd, m)
+for(d=0;d<D;d++){
+  for(n=0;n<N;n++){
+    retnd = 0.0;
+    for(m=0;m<M;m++){
+      retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
+    }
+    grad[n*D+d] = retnd;
+  }
+}
+} //grad_X
+
+
+void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
+int n,m,q;
+double gradq, dist;
+#pragma omp parallel for private(n,m, gradq, dist)
+for(q=0; q<Q; q++){
+  gradq = 0;
+  for(n=0; n<N; n++){
+    for(m=0; m<M; m++){
+        dist = X[n*Q+q]-X2[m*Q+q];
+        gradq += tmp[n*M+m]*dist*dist;
+    }
+  }
+  grad[q] = gradq;
+}
+} //lengthscale_grads
+
+
+
+
--- a/GPy/kern/_src/stationary_utils.h
+++ b/GPy/kern/_src/stationary_utils.h
@ -0,0 +1,3 @@
+#include <omp.h>
+void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
+void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);