Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-08 11:32:39 +02:00 · 2015-04-30 15:28:27 +02:00 · 2015-04-30 15:28:27 +02:00 · 139fda270c
commit 139fda270c
parent 435308d5da 55b77064d4
33 changed files with 26259 additions and 430 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -18,7 +18,8 @@ before_install:
 install:
  - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
-  - pip install . 
+  #- pip install . 
  - python setup.py build_ext --inplace
  #--use-mirrors
  #
 # command to run tests, e.g. python setup.py test
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -485,3 +485,38 @@ class GP(Model):
        """
        from ..inference.latent_function_inference.inferenceX import infer_newX
        return infer_newX(self, Y_new, optimize=optimize)
    def log_predictive_density(self, x_test, y_test, Y_metadata=None):
        """
        Calculation of the log predictive density
        .. math:
            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
        :param x_test: test locations (x_{*})
        :type x_test: (Nx1) array
        :param y_test: test observations (y_{*})
        :type y_test: (Nx1) array
        :param Y_metadata: metadata associated with the test points
        """
        mu_star, var_star = self._raw_predict(x_test)
        return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
    def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
        """
        Calculation of the log predictive density by sampling
        .. math:
            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
        :param x_test: test locations (x_{*})
        :type x_test: (Nx1) array
        :param y_test: test observations (y_{*})
        :type y_test: (Nx1) array
        :param Y_metadata: metadata associated with the test points
        :param num_samples: number of samples to use in monte carlo integration
        :type num_samples: int
        """
        mu_star, var_star = self._raw_predict(x_test)
        return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -5,6 +5,7 @@ import numpy
 from numpy.lib.function_base import vectorize
 from .lists_and_dicts import IntArrayDict
 from functools import reduce
 from transformations import Transformation
 def extract_properties_to_index(index, props):
    prop_index = dict()
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -6,10 +6,10 @@ import numpy; np = numpy
 import itertools
 from re import compile, _pattern_type
 from .param import ParamConcatenation
-from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
+from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
 import logging
-from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
+from index_operations import ParameterIndexOperationsView
 logger = logging.getLogger("parameters changed meta")
 class ParametersChangedMeta(type):
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -730,7 +730,7 @@ class DGPLVM(Prior):
 # ******************************************
-from parameterized import Parameterized
+from .. import Parameterized
 from .. import Param
 class DGPLVM_Lamda(Prior, Parameterized):
    """
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
 class SVGP(SparseGP):
-    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
+    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
        """
        Stochastic Variational GP.
@ -41,8 +41,12 @@ class SVGP(SparseGP):
        SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
                 name=name, Y_metadata=Y_metadata, normalizer=False)
-        self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
+        #assume the number of latent functions is one per col of Y unless specified
-        chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1])))
+        if num_latent_functions is None:
            num_latent_functions = Y.shape[1]
        self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
        chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
        self.chol = Param('q_u_chol', chol)
        self.link_parameter(self.chol)
        self.link_parameter(self.m)
--- a/GPy/defaults.cfg
+++ b/GPy/defaults.cfg
@ -25,3 +25,6 @@ MKL = False
 [weave]
 #if true, try to use weave, and fall back to numpy. if false, just use numpy.
 working = True
 [cython]
 working = True
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@ -8,12 +8,16 @@ class SVGP(LatentFunctionInference):
    def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
-        num_inducing = Z.shape[0]
+        num_data, _ = Y.shape
-        num_data, num_outputs = Y.shape
+        num_inducing, num_outputs = q_u_mean.shape
        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
-        S = np.einsum('ijk,ljk->ilk', L, L) #L.dot(L.T)
+
        S = np.empty((num_outputs, num_inducing, num_inducing))
        [np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
        S = S.swapaxes(0,2)
        #Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
        Si = choleskies.multiple_dpotri(L)
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
@ -41,11 +45,12 @@ class SVGP(LatentFunctionInference):
        #compute the marginal means and variances of q(f)
        A = np.dot(Knm, Kmmi)
        mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
-        v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
+        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
        v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)
        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
-        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
+        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()
        #gradient of the KL term (assuming zero mean function)
        dKL_dm = Kmmim.copy()
@ -78,11 +83,14 @@ class SVGP(LatentFunctionInference):
        Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
        Admu = A.T.dot(dF_dmu)
        AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
-        tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
+        #tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
        tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
        dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
        dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
-        tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
+        #tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
-        dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
+        tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
        #dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
        dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
        dF_dm = Admu
        dF_dS = AdvA
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -5,12 +5,8 @@ from .kern import Kern
 import numpy as np
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
-from ...util.config import config # for assesing whether to use weave
+from ...util.config import config # for assesing whether to use cython
-
+import coregionalize_cython
 try:
    from scipy import weave
 except ImportError:
    config.set('weave', 'working', 'False')
 class Coregionalize(Kern):
    """
@ -61,13 +57,8 @@ class Coregionalize(Kern):
        self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
    def K(self, X, X2=None):
-        if config.getboolean('weave', 'working'):
+        if config.getboolean('cython', 'working'):
-            try:
+            return self._K_cython(X, X2)
                return self._K_weave(X, X2)
            except:
                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                config.set('weave', 'working', 'False')
                return self._K_numpy(X, X2)
        else:
            return self._K_numpy(X, X2)
@ -80,36 +71,10 @@ class Coregionalize(Kern):
            index2 = np.asarray(X2, dtype=np.int)
            return self.B[index,index2.T]
-    def _K_weave(self, X, X2=None):
+    def _K_cython(self, X, X2=None):
        """compute the kernel function using scipy.weave"""
        index = np.asarray(X, dtype=np.int)
        if X2 is None:
-            target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
+            return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
-            code="""
+        return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])
            for(int i=0;i<N; i++){
              target[i+i*N] = B[index[i]+output_dim*index[i]];
              for(int j=0; j<i; j++){
                  target[j+i*N] = B[index[i]+output_dim*index[j]];
                  target[i+j*N] = target[j+i*N];
                }
              }
            """
            N, B, output_dim = index.size, self.B, self.output_dim
            weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
        else:
            index2 = np.asarray(X2, dtype=np.int)
            target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
            code="""
            for(int i=0;i<num_inducing; i++){
              for(int j=0; j<N; j++){
                  target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
                }
              }
            """
            N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
            weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
        return target
    def Kdiag(self, X):
@ -122,19 +87,13 @@ class Coregionalize(Kern):
        else:
            index2 = np.asarray(X2, dtype=np.int)
-        #attempt to use weave for a nasty double indexing loop: fall back to numpy
+        #attempt to use cython for a nasty double indexing loop: fall back to numpy
-        if config.getboolean('weave', 'working'):
+        if config.getboolean('cython', 'working'):
-            try:
+            dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
            except:
                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                config.set('weave', 'working', 'False')
                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
        else:
            dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
        dkappa = np.diag(dL_dK_small)
        dL_dK_small += dL_dK_small.T
        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -142,19 +101,6 @@ class Coregionalize(Kern):
        self.W.gradient = dW
        self.kappa.gradient = dkappa
    def _gradient_reduce_weave(self, dL_dK, index, index2):
        dL_dK_small = np.zeros_like(self.B)
        code="""
        for(int i=0; i<num_inducing; i++){
          for(int j=0; j<N; j++){
            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
          }
        }
        """
        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
        weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
        return dL_dK_small
    def _gradient_reduce_numpy(self, dL_dK, index, index2):
        index, index2 = index[:,0], index2[:,0]
        dL_dK_small = np.zeros_like(self.B)
@ -164,6 +110,11 @@ class Coregionalize(Kern):
                dL_dK_small[j,i] = tmp1[:,index2==j].sum()
        return dL_dK_small
    def _gradient_reduce_cython(self, dL_dK, index, index2):
        index, index2 = index[:,0], index2[:,0]
        return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
    def update_gradients_diag(self, dL_dKdiag, X):
        index = np.asarray(X, dtype=np.int).flatten()
        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
--- a/GPy/kern/_src/coregionalize_cython.c
+++ b/GPy/kern/_src/coregionalize_cython.c
--- a/GPy/kern/_src/coregionalize_cython.pyx
+++ b/GPy/kern/_src/coregionalize_cython.pyx
@ -0,0 +1,34 @@
 #cython: boundscheck=True
 #cython: wraparound=True
 import cython
 import numpy as np
 cimport numpy as np
 def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
    cdef int N = X.size
    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
    for n in range(N):
        for m in range(N):
            K[n,m] = B[X[n],X[m]]
    return K
 def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
    cdef int N = X.size
    cdef int M = X2.size
    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
    for n in range(N):
        for m in range(M):
            K[n,m] = B[X[n],X2[m]]
    return K
 def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
        cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
        cdef int N = index.size
        cdef int M = index2.size
        for i in range(N):
            for j in range(M):
                dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
        return dL_dK_small
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -9,13 +9,15 @@ from ...util.linalg import tdot
 from ... import util
 import numpy as np
 from scipy import integrate
-from ...util.config import config # for assesing whether to use weave
+from ...util.config import config # for assesing whether to use cython
 from ...util.caching import Cache_this
 try:
-    from scipy import weave
+    import stationary_cython
 except ImportError:
-    config.set('weave', 'working', 'False')
+    print('warning: failed to import cython module: falling back to numpy')
    config.set('cython', 'working', 'false')
 class Stationary(Kern):
    """
@ -153,28 +155,18 @@ class Stationary(Kern):
        (dL_dK), compute the gradient wrt the parameters of this kernel,
        and store in the parameters object as e.g. self.variance.gradient
        """
-        self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
+        self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance
        #now the lengthscale gradient(s)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        if self.ARD:
-            #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
+
            #d =  X[:, None, :] - X2[None, :, :]
            #x_xl3 = np.square(d)
            #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
            tmp = dL_dr*self._inv_dist(X, X2)
            if X2 is None: X2 = X
-
+            if config.getboolean('cython', 'working'):
-
+                self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
            if config.getboolean('weave', 'working'):
                try:
                    self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
                except:
                    print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                    config.set('weave', 'working', 'False')
                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
            else:
-                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
+                self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
        else:
            r = self._scaled_dist(X, X2)
            self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -189,43 +181,27 @@ class Stationary(Kern):
        dist = self._scaled_dist(X, X2).copy()
        return 1./np.where(dist != 0., dist, np.inf)
-    def weave_lengthscale_grads(self, tmp, X, X2):
+    def _lengthscale_grads_pure(self, tmp, X, X2):
-        """Use scipy.weave to compute derivatives wrt the lengthscales"""
+        return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
    def _lengthscale_grads_cython(self, tmp, X, X2):
        N,M = tmp.shape
-        Q = X.shape[1]
+        Q = self.input_dim
-        if hasattr(X, 'values'):X = X.values
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
        if hasattr(X2, 'values'):X2 = X2.values
        grads = np.zeros(self.input_dim)
-        code = """
+        stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
        double gradq;
        for(int q=0; q<Q; q++){
          gradq = 0;
          for(int n=0; n<N; n++){
            for(int m=0; m<M; m++){
              gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
            }
          }
          grads(q) = gradq;
        }
        """
        weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
        return -grads/self.lengthscale**3
    def gradients_X(self, dL_dK, X, X2=None):
        """
        Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
        """
-        if config.getboolean('weave', 'working'):
+        if config.getboolean('cython', 'working'):
-            try:
+            return self._gradients_X_cython(dL_dK, X, X2)
                return self.gradients_X_weave(dL_dK, X, X2)
            except:
                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                config.set('weave', 'working', 'False')
                return self.gradients_X_(dL_dK, X, X2)
        else:
-            return self.gradients_X_(dL_dK, X, X2)
+            return self._gradients_X_pure(dL_dK, X, X2)
-    def gradients_X_(self, dL_dK, X, X2=None):
+    def _gradients_X_pure(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
@ -235,54 +211,25 @@ class Stationary(Kern):
        #The high-memory numpy way:
        #d =  X[:, None, :] - X2[None, :, :]
-        #ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
+        #grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
        #the lower memory way with a loop
-        ret = np.empty(X.shape, dtype=np.float64)
+        grad = np.empty(X.shape, dtype=np.float64)
        for q in range(self.input_dim):
-            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
+            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
-        ret /= self.lengthscale**2
+        return grad/self.lengthscale**2
-        return ret
+    def _gradients_X_cython(self, dL_dK, X, X2=None):
    def gradients_X_weave(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
        if X2 is None:
            tmp = tmp + tmp.T
            X2 = X
-
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
-        code = """
+        grad = np.zeros(X.shape)
-        int n,m,d;
+        stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
-        double retnd;
+        return grad/self.lengthscale**2
        #pragma omp parallel for private(n,d, retnd, m)
        for(d=0;d<D;d++){
          for(n=0;n<N;n++){
            retnd = 0.0;
            for(m=0;m<M;m++){
              retnd += tmp(n,m)*(X(n,d)-X2(m,d));
            }
            ret(n,d) = retnd;
          }
        }
        """
        if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
        if hasattr(X2, 'values'):X2 = X2.values
        ret = np.zeros(X.shape)
        N,D = X.shape
        N,M = tmp.shape
        from scipy import weave
        support_code = """
        #include <omp.h>
        #include <stdio.h>
        """
        weave_options = {'headers'           : ['<omp.h>'],
                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
                         'extra_link_args'   : ['-lgomp']}
        weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
        return ret/self.lengthscale**2
    def gradients_X_diag(self, dL_dKdiag, X):
        return np.zeros(X.shape)
@ -290,6 +237,9 @@ class Stationary(Kern):
    def input_sensitivity(self, summarize=True):
        return self.variance*np.ones(self.input_dim)/self.lengthscale**2
 class Exponential(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
--- a/GPy/kern/_src/stationary_cython.c
+++ b/GPy/kern/_src/stationary_cython.c
--- a/GPy/kern/_src/stationary_cython.pyx
+++ b/GPy/kern/_src/stationary_cython.pyx
@ -0,0 +1,36 @@
 #cython: boundscheck=False
 #cython: wraparound=False
 import numpy as np
 cimport numpy as np
 ctypedef np.float64_t DTYPE_t
 cdef extern from "stationary_utils.h":
    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
 cdef extern from "stationary_utils.h":
    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
 def grad_X(int N, int D, int M,
        np.ndarray[DTYPE_t, ndim=2] _X,
        np.ndarray[DTYPE_t, ndim=2] _X2,
        np.ndarray[DTYPE_t, ndim=2] _tmp,
        np.ndarray[DTYPE_t, ndim=2] _grad):
    cdef double *X = <double*> _X.data
    cdef double *X2 = <double*> _X2.data
    cdef double *tmp = <double*> _tmp.data
    cdef double *grad = <double*> _grad.data
    _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
 def lengthscale_grads(int N, int M, int Q,
        np.ndarray[DTYPE_t, ndim=2] _tmp,
        np.ndarray[DTYPE_t, ndim=2] _X,
        np.ndarray[DTYPE_t, ndim=2] _X2,
        np.ndarray[DTYPE_t, ndim=1] _grad):
    cdef double *tmp = <double*> _tmp.data
    cdef double *X = <double*> _X.data
    cdef double *X2 = <double*> _X2.data
    cdef double *grad = <double*> _grad.data
    _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
--- a/GPy/kern/_src/stationary_utils.c
+++ b/GPy/kern/_src/stationary_utils.c
@ -0,0 +1,35 @@
 void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
 int n,m,d;
 double retnd;
 //#pragma omp parallel for private(n,d, retnd, m)
 for(d=0;d<D;d++){
  for(n=0;n<N;n++){
    retnd = 0.0;
    for(m=0;m<M;m++){
      retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
    }
    grad[n*D+d] = retnd;
  }
 }
 } //grad_X
 void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
 int n,m,q;
 double gradq, dist;
 #pragma omp parallel for private(n,m, gradq, dist)
 for(q=0; q<Q; q++){
  gradq = 0;
  for(n=0; n<N; n++){
    for(m=0; m<M; m++){
        dist = X[n*Q+q]-X2[m*Q+q];
        gradq += tmp[n*M+m]*dist*dist;
    }
  }
  grad[q] = gradq;
 }
 } //lengthscale_grads
--- a/GPy/kern/_src/stationary_utils.h
+++ b/GPy/kern/_src/stationary_utils.h
@ -0,0 +1,3 @@
 #include <omp.h>
 void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
 void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -41,6 +41,14 @@ class Likelihood(Parameterized):
        self.log_concave = False
        self.not_block_really = False
    def request_num_latent_functions(self, Y):
        """
        The likelihood should infer how many latent functions are needed for the likelihood
        Default is the number of outputs
        """
        return Y.shape[1]
    def _gradients(self,partial):
        return np.zeros(0)
@ -118,15 +126,19 @@ class Likelihood(Parameterized):
            """Generate a function which can be integrated
            to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
            def f(fi_star):
-                #exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
+                #exponent = np.exp(-(1./(2*vi))*np.square(mi-fi_star))
                #from GPy.util.misc import safe_exp
                #exponent = safe_exp(exponent)
-                #return self.pdf(f_star, y, y_m)*exponent
+                #res = safe_exp(self.logpdf(fi_star, yi, yi_m))*exponent
                #More stable in the log space
-                return np.exp(self.logpdf(fi_star, yi, yi_m)
+                res = np.exp(self.logpdf(fi_star, yi, yi_m)
                              - 0.5*np.log(2*np.pi*vi)
-                              - 0.5*np.square(mi-fi_star)/vi)
+                              - 0.5*np.square(fi_star-mi)/vi)
                if not np.isfinite(res):
                    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
                return res
            return f
        p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
@ -134,6 +146,36 @@ class Likelihood(Parameterized):
        p_ystar = np.array(p_ystar).reshape(-1, 1)
        return np.log(p_ystar)
    def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
        """
        Calculation of the log predictive density via sampling
        .. math:
            log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
            f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
        :param y_test: test observations (y_{*})
        :type y_test: (Nx1) array
        :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
        :type mu_star: (Nx1) array
        :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
        :type var_star: (Nx1) array
        :param num_samples: num samples of p(f_{*}|mu_{*}, var_{*}) to take
        :type num_samples: int
        """
        assert y_test.shape==mu_star.shape
        assert y_test.shape==var_star.shape
        assert y_test.shape[1] == 1
        #Take samples of p(f*|y)
        #fi_samples = np.random.randn(num_samples)*np.sqrt(var_star) + mu_star
        fi_samples = np.random.normal(mu_star, np.sqrt(var_star), size=(mu_star.shape[0], num_samples))
        from scipy.misc import logsumexp
        log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
        return log_p_ystar
    def _moments_match_ep(self,obs,tau,v):
        """
        Calculation of moments using quadrature
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@ -10,6 +10,7 @@ from scipy.special import gammaln, gamma
 from .likelihood import Likelihood
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
 from scipy.special import psi as digamma
 class StudentT(Likelihood):
    """
@ -28,16 +29,13 @@ class StudentT(Likelihood):
        super(StudentT, self).__init__(gp_link, name='Student_T')
        # sigma2 is not a noise parameter, it is a squared scale.
        self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
-        self.v = Param('deg_free', float(deg_free))
+        self.v = Param('deg_free', float(deg_free), Logexp())
        self.link_parameter(self.sigma2)
        self.link_parameter(self.v)
-        self.v.constrain_fixed()
+        #self.v.constrain_fixed()
        self.log_concave = False
    #def parameters_changed(self):
        #self.variance = (self.v / float(self.v - 2)) * self.sigma2
    def update_gradients(self, grads):
        """
        Pull out the gradients, be careful as the order must match the order
@ -224,20 +222,46 @@ class StudentT(Likelihood):
                           )
        return d2logpdf_dlink2_dvar
    def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None):
        e = y - inv_link_f
        e2 = np.square(e)
        df = float(self.v[:])
        s2 = float(self.sigma2[:])
        dlogpdf_dv =  0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
        dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df))
        dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df))
        return dlogpdf_dv
    def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None):
        e = y - inv_link_f
        e2 = np.square(e)
        df = float(self.v[:])
        s2 = float(self.sigma2[:])
        dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2
        return dlogpdf_df_dv
    def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None):
        e = y - inv_link_f
        e2 = np.square(e)
        df = float(self.v[:])
        s2 = float(self.sigma2[:])
        e2_s2v = e**2 + s2*df
        d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3
        return d2logpdf_df2_dv
    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
-        dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
+        dlogpdf_dv = self.dlogpdf_link_dv(f, y, Y_metadata=Y_metadata)
        return np.array((dlogpdf_dvar, dlogpdf_dv))
    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
-        dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
+        dlogpdf_dlink_dv = self.dlogpdf_dlink_dv(f, y, Y_metadata=Y_metadata)
        return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
-        d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
+        d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata)
        return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
    def predictive_mean(self, mu, sigma, Y_metadata=None):
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -219,7 +219,7 @@ def plot_fit_f(model, *args, **kwargs):
    kwargs['plot_raw'] = True
    plot_fit(model,*args, **kwargs)
-def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
+def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
    """
    Convenience function for returning back fixed_inputs where the other inputs
    are fixed using fix_routine
@ -229,6 +229,8 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
    :type non_fixed_inputs: list
    :param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
    :type fix_routine: string
    :param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
    :type as_list: boolean
    """
    f_inputs = []
    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
@ -241,6 +243,11 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
                f_inputs.append( (i, np.mean(X[:,i])) )
            if fix_routine == 'median':
                f_inputs.append( (i, np.median(X[:,i])) )
-            elif fix_routine == 'zero':
+            else: # set to zero zero
                f_inputs.append( (i, 0) )
            if not as_list:
                X[:,i] = f_inputs[-1][1]
    if as_list:
        return f_inputs
    else:
        return X
--- a/GPy/testing/cython_tests.py
+++ b/GPy/testing/cython_tests.py
@ -0,0 +1,65 @@
 import numpy as np
 import scipy as sp
 from GPy.util import choleskies
 import GPy
 """
 These tests make sure that the opure python and cython codes work the same
 """
 class CythonTestChols(np.testing.TestCase):
    def setUp(self):
        self.flat = np.random.randn(45, 5)
        self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)])
    def test_flat_to_triang(self):
        L1 = choleskies._flat_to_triang_pure(self.flat)
        L2 = choleskies._flat_to_triang_cython(self.flat)
        np.testing.assert_allclose(L1, L2)
    def test_triang_to_flat(self):
        A1 = choleskies._triang_to_flat_pure(self.triang)
        A2 = choleskies._triang_to_flat_cython(self.triang)
        np.testing.assert_allclose(A1, A2)
 class test_stationary(np.testing.TestCase):
    def setUp(self):
        self.k = GPy.kern.RBF(10)
        self.X = np.random.randn(300,10)
        self.Z = np.random.randn(20,10)
        self.dKxx = np.random.randn(300,300)
        self.dKzz = np.random.randn(20,20)
        self.dKxz = np.random.randn(300,20)
    def test_square_gradX(self):
        g1 = self.k._gradients_X_cython(self.dKxx, self.X)
        g2 = self.k._gradients_X_pure(self.dKxx, self.X)
        np.testing.assert_allclose(g1, g2)
    def test_rect_gradx(self):
        g1 = self.k._gradients_X_cython(self.dKxz, self.X, self.Z)
        g2 = self.k._gradients_X_pure(self.dKxz, self.X, self.Z)
        np.testing.assert_allclose(g1, g2)
    def test_square_lengthscales(self):
        g1 = self.k._lengthscale_grads_pure(self.dKxx, self.X, self.X)
        g2 = self.k._lengthscale_grads_cython(self.dKxx, self.X, self.X)
        np.testing.assert_allclose(g1, g2)
    def test_rect_lengthscales(self):
        g1 = self.k._lengthscale_grads_pure(self.dKxz, self.X, self.Z)
        g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
        np.testing.assert_allclose(g1, g2)
 class test_choleskies_backprop(np.testing.TestCase):
    def setUp(self):
        self.dL, self.L = np.random.randn(2, 100, 100)
    def test(self):
        r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
        r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
        np.testing.assert_allclose(r1, r2)
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -366,9 +366,9 @@ class KernelTestsNonContinuous(unittest.TestCase):
        X2 = self.X2[self.X2[:,-1]!=2]
        self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
-class Coregionalize_weave_test(unittest.TestCase):
+class Coregionalize_cython_test(unittest.TestCase):
    """
-    Make sure that the coregionalize kernel work with and without weave enabled
+    Make sure that the coregionalize kernel work with and without cython enabled
    """
    def setUp(self):
        self.k = GPy.kern.Coregionalize(1, output_dim=12)
@ -378,36 +378,42 @@ class Coregionalize_weave_test(unittest.TestCase):
    def test_sym(self):
        dL_dK = np.random.randn(self.N1, self.N1)
-        GPy.util.config.config.set('weave', 'working', 'True')
+        GPy.util.config.config.set('cython', 'working', 'True')
-        K_weave = self.k.K(self.X)
+        K_cython = self.k.K(self.X)
        self.k.update_gradients_full(dL_dK, self.X)
-        grads_weave = self.k.gradient.copy()
+        grads_cython = self.k.gradient.copy()
-        GPy.util.config.config.set('weave', 'working', 'False')
+        GPy.util.config.config.set('cython', 'working', 'False')
        K_numpy = self.k.K(self.X)
        self.k.update_gradients_full(dL_dK, self.X)
        grads_numpy = self.k.gradient.copy()
-        self.assertTrue(np.allclose(K_numpy, K_weave))
+        self.assertTrue(np.allclose(K_numpy, K_cython))
-        self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(grads_numpy, grads_cython))
        #reset the cython state for any other tests
        GPy.util.config.config.set('cython', 'working', 'true')
    def test_nonsym(self):
        dL_dK = np.random.randn(self.N1, self.N2)
-        GPy.util.config.config.set('weave', 'working', 'True')
+        GPy.util.config.config.set('cython', 'working', 'True')
-        K_weave = self.k.K(self.X, self.X2)
+        K_cython = self.k.K(self.X, self.X2)
        self.k.gradient = 0.
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
-        grads_weave = self.k.gradient.copy()
+        grads_cython = self.k.gradient.copy()
-        GPy.util.config.config.set('weave', 'working', 'False')
+        GPy.util.config.config.set('cython', 'working', 'False')
        K_numpy = self.k.K(self.X, self.X2)
        self.k.gradient = 0.
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
        grads_numpy = self.k.gradient.copy()
-        self.assertTrue(np.allclose(K_numpy, K_weave))
+        self.assertTrue(np.allclose(K_numpy, K_cython))
-        self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(grads_numpy, grads_cython))
        #reset the cython state for any other tests
        GPy.util.config.config.set('cython', 'working', 'true')
    #reset the weave state for any other tests
    GPy.util.config.config.set('weave', 'working', 'False')
 class KernelTestsProductWithZeroValues(unittest.TestCase):
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -93,6 +93,9 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
            if not grad.checkgrad(verbose=True):
                gradchecking = False
            if not grad.checkgrad(verbose=True):
                gradchecking = False
    return gradchecking
@ -116,6 +119,7 @@ class TestNoiseModels(object):
        self.integer_Y = np.where(tmp > 0, tmp, 0)
        self.var = 0.2
        self.deg_free = 4.0
        #Make a bigger step as lower bound can be quite curved
        self.step = 1e-4
@ -135,56 +139,56 @@ class TestNoiseModels(object):
                }
        """
        self.noise_models = {"Student_t_default": {
-            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+            "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
            "grad_params": {
-                "names": [".*t_scale2"],
+                "names": [".*t_scale2", ".*deg_free"],
-                "vals": [self.var],
+                "vals": [self.var, self.deg_free],
-                "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
            },
            "laplace": True
            },
            "Student_t_1_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
+                    "names": [".*t_scale2", ".*deg_free"],
-                    "vals": [1.0],
+                    "vals": [1.0, 8.0],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_small_deg_free": {
                "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
+                    "names": [".*t_scale2", ".*deg_free"],
-                    "vals": [self.var],
+                    "vals": [self.var, 1.5],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_small_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
+                    "names": [".*t_scale2", ".*deg_free"],
-                    "vals": [0.001],
+                    "vals": [0.001, self.deg_free],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_large_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
+                    "names": [".*t_scale2", ".*deg_free"],
-                    "vals": [10.0],
+                    "vals": [10.0, self.deg_free],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_approx_gauss": {
                "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
+                    "names": [".*t_scale2", ".*deg_free"],
-                    "vals": [self.var],
+                    "vals": [self.var, 1000],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
--- a/GPy/testing/linalg_test.py
+++ b/GPy/testing/linalg_test.py
@ -1,6 +1,7 @@
 import numpy as np
 import scipy as sp
-from ..util.linalg import jitchol
+from GPy.util.linalg import jitchol
 import GPy
 class LinalgTests(np.testing.TestCase):
    def setUp(self):
@ -35,3 +36,17 @@ class LinalgTests(np.testing.TestCase):
            return False
        except sp.linalg.LinAlgError:
            return True
    def test_einsum_ijk_jlk_to_il(self):
        A = np.random.randn(50, 150, 5)
        B = np.random.randn(150, 100, 5)
        pure = np.einsum('ijk,jlk->il', A, B)
        quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
        np.testing.assert_allclose(pure, quick)
    def test_einsum_ij_jlk_to_ilk(self):
        A = np.random.randn(15, 150, 5)
        B = np.random.randn(150, 50, 5)
        pure = np.einsum('ijk,jlk->il', A, B)
        quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
        np.testing.assert_allclose(pure, quick)
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@ -1,14 +1,11 @@
-# Copyright James Hensman and Max Zwiessele 2014
+# Copyright James Hensman and Max Zwiessele 2014, 2015
 # Licensed under the GNU GPL version 3.0
 import numpy as np
 from . import linalg
 from .config import config
-try:
+import choleskies_cython
    from scipy import weave
 except ImportError:
    config.set('weave', 'working', 'False')
 def safe_root(N):
    i = np.sqrt(N)
@ -17,36 +14,6 @@ def safe_root(N):
        raise ValueError("N is not square!")
    return j
 def _flat_to_triang_weave(flat):
    """take a matrix N x D and return a M X M x D array where
    N = M(M+1)/2
    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
    This is the weave implementation
    """
    N, D = flat.shape
    M = (-1 + safe_root(8*N+1))/2
    ret = np.zeros((M, M, D))
    flat = np.ascontiguousarray(flat)
    code = """
    int count = 0;
    for(int m=0; m<M; m++)
    {
      for(int mm=0; mm<=m; mm++)
      {
        for(int d=0; d<D; d++)
        {
          ret[d + m*D*M + mm*D] = flat[count];
          count++;
        }
      }
    }
    """
    weave.inline(code, ['flat', 'ret', 'D', 'M'])
    return ret
 def _flat_to_triang_pure(flat_mat):
    N, D = flat_mat.shape
    M = (-1 + safe_root(8*N+1))//2
@ -59,34 +26,11 @@ def _flat_to_triang_pure(flat_mat):
              count = count+1
    return ret
-if config.getboolean('weave', 'working'):
+def _flat_to_triang_cython(flat_mat):
-	flat_to_triang =  _flat_to_triang_weave
+    N, D = flat_mat.shape
-else:
+    M = (-1 + safe_root(8*N+1))//2
-        flat_to_triang =  _flat_to_triang_pure
+    return choleskies_cython.flat_to_triang(flat_mat, M)
 def _triang_to_flat_weave(L):
    M, _, D = L.shape
    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
    N = M*(M+1)/2
    flat = np.empty((N, D))
    code = """
    int count = 0;
    for(int m=0; m<M; m++)
    {
      for(int mm=0; mm<=m; mm++)
      {
        for(int d=0; d<D; d++)
        {
          flat[count] = L[d + m*D*M + mm*D];
          count++;
        }
      }
    }
    """
    weave.inline(code, ['flat', 'L', 'D', 'M'])
    return flat
 def _triang_to_flat_pure(L):
    M, _, D = L.shape
@ -101,41 +45,41 @@ def _triang_to_flat_pure(L):
                count = count +1
    return flat
-if config.getboolean('weave', 'working'):
+def _triang_to_flat_cython(L):
-    triang_to_flat =  _triang_to_flat_weave
+    return choleskies_cython.triang_to_flat(L)
-else:
+
-    triang_to_flat =  _triang_to_flat_pure
+def _backprop_gradient_pure(dL, L):
    """
    Given the derivative of an objective fn with respect to the cholesky L,
    compute the derivate with respect to the original matrix K, defined as
        K = LL^T
    where L was obtained by Cholesky decomposition
    """
    dL_dK = np.tril(dL).copy()
    N = L.shape[0]
    for k in xrange(N - 1, -1, -1):
        for j in xrange(k + 1, N):
            for i in xrange(j, N):
                dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
                dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
        for j in xrange(k + 1, N):
            dL_dK[j, k] /= L[k, k]
            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
        dL_dK[k, k] /= (2 * L[k, k])
    return dL_dK
 def triang_to_cov(L):
    return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
 def multiple_dpotri_old(Ls):
    M, _, D = Ls.shape
    Kis = np.rollaxis(Ls, -1).copy()
    [dpotri(Kis[i,:,:], overwrite_c=1, lower=1) for i in range(D)]
    code = """
    for(int d=0; d<D; d++)
    {
      for(int m=0; m<M; m++)
      {
        for(int mm=0; mm<m; mm++)
        {
          Kis[d*M*M + mm*M + m ] = Kis[d*M*M + m*M + mm];
        }
      }
    }
    """
    weave.inline(code, ['Kis', 'D', 'M'])
    Kis = np.rollaxis(Kis, 0, 3) #wtf rollaxis?
    return Kis
 def multiple_dpotri(Ls):
    return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])
 def indexes_to_fix_for_low_rank(rank, size):
    """
-    work out which indexes of the flatteneed array should be fixed if we want the cholesky to represent a low rank matrix
+    Work out which indexes of the flatteneed array should be fixed if we want
    the cholesky to represent a low rank matrix
    """
    #first we'll work out what to keep, and the do the set difference.
@ -153,15 +97,11 @@ def indexes_to_fix_for_low_rank(rank, size):
    return np.setdiff1d(np.arange((size**2+size)/2), keep)
-
+if config.getboolean('cython', 'working'):
-#class cholchecker(GPy.core.Model):
+    triang_to_flat = _triang_to_flat_cython
-    #def __init__(self, L, name='cholchecker'):
+    flat_to_triang = _flat_to_triang_cython
-        #super(cholchecker, self).__init__(name)
+    backprop_gradient = choleskies_cython.backprop_gradient
-        #self.L = GPy.core.Param('L',L)
+else:
-        #self.link_parameter(self.L)
+    backprop_gradient = _backprop_gradient_pure
-    #def parameters_changed(self):
+    triang_to_flat =  _triang_to_flat_pure
-        #LL = flat_to_triang(self.L)
+    flat_to_triang = _flat_to_triang_pure
        #Ki = multiple_dpotri(LL)
        #self.L.gradient = 2*np.einsum('ijk,jlk->ilk', Ki, LL)
        #self._loglik = np.sum([np.sum(np.log(np.abs(np.diag()))) for i in range(self.L.shape[-1])])
 #
--- a/GPy/util/choleskies_cython.c
+++ b/GPy/util/choleskies_cython.c
--- a/GPy/util/choleskies_cython.pyx
+++ b/GPy/util/choleskies_cython.pyx
@ -0,0 +1,59 @@
 #cython: wraparaound=False
 #cython: boundscheck=False
 #cython: nonecheck=False
 # Copyright James Hensman and Alan Saul 2015
 import numpy as np
 cimport numpy as np
 def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
    """take a matrix N x D and return a M X M x D array where
    N = M(M+1)/2
    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
    """
    cdef int N = flat.shape[0]
    cdef int D = flat.shape[1]
    cdef int count = 0
    cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D))
    cdef int d, m, mm
    for d in range(D):
        count = 0
        for m in range(M):
            for mm in range(m+1):
                ret[m, mm, d] = flat[count,d]
                count += 1
    return ret
 def triang_to_flat(np.ndarray[double, ndim=3] L):
    cdef int M = L.shape[0]
    cdef int D = L.shape[2]
    cdef int N = M*(M+1)/2
    cdef int count = 0
    cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
    cdef int d, m, mm
    for d in range(D):
        count = 0
        for m in range(M):
            for mm in range(m+1):
                flat[count,d] = L[m, mm, d]
                count += 1
    return flat
 def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
    cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL).copy()
    cdef int N = L.shape[0]
    cdef int k, j, i
    for k in range(N - 1, -1, -1):
        for j in range(k + 1, N):
            for i in range(j, N):
                dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
                dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
        for j in range(k + 1, N):
            dL_dK[j, k] /= L[k, k]
            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
        dL_dK[k, k] /= (2. * L[k, k])
    return dL_dK
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,11 +15,7 @@ import warnings
 import os
 from .config import config
 import logging
-
+import linalg_cython
 try:
    from scipy import weave
 except ImportError:
    config.set('weave', 'working', 'False')
 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
@ -422,114 +418,33 @@ def DSYR(*args, **kwargs):
 def symmetrify(A, upper=False):
    """
-    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
+    Take the square matrix A and make it symmetrical by copting elements from
    the lower half to the upper
    works IN PLACE.
-    note: tries to use weave, falls back to a slower numpy version
+    note: tries to use cython, falls back to a slower numpy version
    """
-    if config.getboolean('weave', 'working'):
+    if config.getboolean('cython', 'working'):
-        try:
+        _symmetrify_cython(A, upper)
            symmetrify_weave(A, upper)
        except:
            print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
            config.set('weave', 'working', 'False')
            symmetrify_numpy(A, upper)
    else:
-        symmetrify_numpy(A, upper)
+        _symmetrify_numpy(A, upper)
-def symmetrify_weave(A, upper=False):
+def _symmetrify_cython(A, upper=False):
-    """
+    return linalg_cython.symmetrify(A, upper)
    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
-    works IN PLACE.
+def _symmetrify_numpy(A, upper=False):
    """
    N, M = A.shape
    assert N == M
    c_contig_code = """
    int iN;
    for (int i=1; i<N; i++){
      iN = i*N;
      for (int j=0; j<i; j++){
        A[i+j*N] = A[iN+j];
      }
    }
    """
    f_contig_code = """
    int iN;
    for (int i=1; i<N; i++){
      iN = i*N;
      for (int j=0; j<i; j++){
        A[iN+j] = A[i+j*N];
      }
    }
    """
    N = int(N) # for safe type casting
    if A.flags['C_CONTIGUOUS'] and upper:
        weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
    elif A.flags['C_CONTIGUOUS'] and not upper:
        weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
    elif A.flags['F_CONTIGUOUS'] and upper:
        weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
    elif A.flags['F_CONTIGUOUS'] and not upper:
        weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
    else:
        if upper:
            tmp = np.tril(A.T)
        else:
            tmp = np.tril(A)
        A[:] = 0.0
        A += tmp
        A += np.tril(tmp, -1).T
 def symmetrify_numpy(A, upper=False):
    """
    Force a matrix to be symmetric
    """
    triu = np.triu_indices_from(A,k=1)
    if upper:
        A.T[triu] = A[triu]
    else:
        A[triu] = A.T[triu]
 #This function appears to be unused. It's use of weave makes it problematic
 #Commenting out for now
 #def cholupdate(L, x):
 #    """
 #    update the LOWER cholesky factor of a pd matrix IN PLACE
 #
 #    if L is the lower chol. of K, then this function computes L\_
 #    where L\_ is the lower chol of K + x*x^T
 #    """
 #    support_code = """
 #    #include <math.h>
 #    """
 #    code = """
 #    double r,c,s;
 #    int j,i;
 #    for(j=0; j<N; j++){
 #      r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
 #      c = r / L(j,j);
 #      s = x(j) / L(j,j);
 #      L(j,j) = r;
 #      for (i=j+1; i<N; i++){
 #        L(i,j) = (L(i,j) + s*x(i))/c;
 #        x(i) = c*x(i) - s*L(i,j);
 #      }
 #    }
 #    """
 #    x = x.copy()
 #    N = x.size
 #    weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
 def backsub_both_sides(L, X, transpose='left'):
-    """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
+    """
    Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky
    """
    if transpose == 'left':
        tmp, _ = dtrtrs(L, X, lower=1, trans=1)
        return dtrtrs(L, tmp.T, lower=1, trans=1)[0].T
@ -537,3 +452,16 @@ def backsub_both_sides(L, X, transpose='left'):
        tmp, _ = dtrtrs(L, X, lower=1, trans=0)
        return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
 def ij_jlk_to_ilk(A, B):
    """
    Faster version of einsum 'ij,jlk->ilk'
    """
    return A.dot(B.reshape(B.shape[0], -1)).reshape(A.shape[0], B.shape[1], B.shape[2])
 def ijk_jlk_to_il(A, B):
    """
    Faster version of einsum einsum('ijk,jlk->il', A,B)
    """
    res = np.zeros((A.shape[0], B.shape[1]))
    [np.add(np.dot(A[:,:,k], B[:,:,k]), res, res) for k in range(B.shape[-1])]
    return res
--- a/GPy/util/linalg_cython.c
+++ b/GPy/util/linalg_cython.c
--- a/GPy/util/linalg_cython.pyx
+++ b/GPy/util/linalg_cython.pyx
@ -0,0 +1,34 @@
 cimport numpy as np
 from cpython cimport bool
 import cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
 def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
    cdef int N = A.shape[0]
    if not upper:
        for i in xrange(N):
            for j in xrange(i):
                A[j, i] = A[i, j]
    else:
        for j in xrange(N):
            for i in xrange(j):
                A[j, i] = A[i, j]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
 def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
    cdef double r
    cdef double c
    cdef double s
    for j in xrange(N):
        r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
        c = r / L[j,j]
        s = x[j] / L[j,j]
        L[j,j] = r
        for i in xrange(j):
            L[i,j] = (L[i,j] + s*x[i])/c
            x[i] = c*x[i] - s*L[i,j];
        r = np.sqrt(L[j,j])
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -42,9 +42,6 @@ def chain_1(df_dg, dg_dx):
    """
    if np.all(dg_dx==1.):
        return df_dg
    if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1:
        #import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
        raise NotImplementedError('Not implemented for matricies yet')
    return df_dg * dg_dx
 def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@ -56,8 +53,6 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
    """
    if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
        return d2f_dg2
    if  len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
        raise NotImplementedError('Not implemented for matricies yet')
    dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
    #dg_dx_2 = dg_dx**2
    return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
@ -71,11 +66,7 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
    """
    if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
        return d3f_dg3
    if (  (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
           or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
        raise NotImplementedError('Not implemented for matricies yet')
    dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
    #dg_dx_3 = dg_dx**3
    return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
 def opt_wrapper(m, **kwargs):
@ -133,10 +124,11 @@ def kmm_init(X, m = 10):
 ### make a parameter to its corresponding array:
 def param_to_array(*param):
    """
-Convert an arbitrary number of parameters to :class:ndarray class objects. This is for
+    Convert an arbitrary number of parameters to :class:ndarray class objects.
-converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
+    This is for converting parameter objects to numpy arrays, when using
-In scipy.weave.blitz there is no automatic array detection (even when the array inherits
+    scipy.weave.inline routine.  In scipy.weave.blitz there is no automatic
-from :class:ndarray)"""
+    array detection (even when the array inherits from :class:ndarray)
    """
    import warnings
    warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
    assert len(param) > 0, "At least one parameter needed"
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-from GPy.core.parameterization import Parameterized, Param
+from ..core.parameterization import Parameterized, Param
 from ..core.parameterization.transformations import Logexp
 class WarpingFunction(Parameterized):
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -6,3 +6,6 @@ include *.cfg
 recursive-include doc *.cfg
 include *.json
 recursive-include doc *.json
 recursive-include GPy *.c
 recursive-include GPy *.so
 recursive-include GPy *.pyx
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 import os
-from setuptools import setup
+from setuptools import setup, Extension
 import numpy as np
 # Version number
 version = '0.6.1'
@ -10,6 +11,27 @@ version = '0.6.1'
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read()
 #compile_flags = ["-march=native", '-fopenmp', '-O3', ]
 compile_flags = [ '-fopenmp', '-O3', ]
 ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
                      sources=['GPy/kern/_src/stationary_cython.c','GPy/kern/_src/stationary_utils.c'],
                      include_dirs=[np.get_include()],
                      extra_compile_args=compile_flags,
                      extra_link_args = ['-lgomp']),
            Extension(name='GPy.util.choleskies_cython',
                      sources=['GPy/util/choleskies_cython.c'],
                      include_dirs=[np.get_include()],
                      extra_compile_args=compile_flags),
            Extension(name='GPy.util.linalg_cython',
                      sources=['GPy/util/linalg_cython.c'],
                      include_dirs=[np.get_include()],
                      extra_compile_args=compile_flags),
            Extension(name='GPy.kern._src.coregionalize_cython',
                      sources=['GPy/kern/_src/coregionalize_cython.c'],
                      include_dirs=[np.get_include()],
                      extra_compile_args=compile_flags)]
 setup(name = 'GPy',
      version = version,
      author = read('AUTHORS.txt'),
@ -18,6 +40,7 @@ setup(name = 'GPy',
      license = "BSD 3-clause",
      keywords = "machine-learning gaussian-processes kernels",
      url = "http://sheffieldml.github.com/GPy/",
      ext_modules = ext_mods,
      packages = ["GPy.models",
                  "GPy.inference.optimization",
                  "GPy.inference.mcmc",