Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-08 19:42:39 +02:00 · 2015-04-30 15:28:27 +02:00 · 2015-04-30 15:28:27 +02:00 · 139fda270c
commit 139fda270c
parent 435308d5da 55b77064d4
33 changed files with 26259 additions and 430 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -18,7 +18,8 @@ before_install:

 install:
  - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
-  - pip install . 
+  #- pip install . 
+  - python setup.py build_ext --inplace
  #--use-mirrors
  #
 # command to run tests, e.g. python setup.py test
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -485,3 +485,38 @@ class GP(Model):
        """
        from ..inference.latent_function_inference.inferenceX import infer_newX
        return infer_newX(self, Y_new, optimize=optimize)
+
+    def log_predictive_density(self, x_test, y_test, Y_metadata=None):
+        """
+        Calculation of the log predictive density
+
+        .. math:
+            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
+
+        :param x_test: test locations (x_{*})
+        :type x_test: (Nx1) array
+        :param y_test: test observations (y_{*})
+        :type y_test: (Nx1) array
+        :param Y_metadata: metadata associated with the test points
+        """
+        mu_star, var_star = self._raw_predict(x_test)
+        return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
+
+    def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
+        """
+        Calculation of the log predictive density by sampling
+
+        .. math:
+            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
+
+        :param x_test: test locations (x_{*})
+        :type x_test: (Nx1) array
+        :param y_test: test observations (y_{*})
+        :type y_test: (Nx1) array
+        :param Y_metadata: metadata associated with the test points
+        :param num_samples: number of samples to use in monte carlo integration
+        :type num_samples: int
+        """
+        mu_star, var_star = self._raw_predict(x_test)
+        return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
+
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -5,6 +5,7 @@ import numpy
 from numpy.lib.function_base import vectorize
 from .lists_and_dicts import IntArrayDict
 from functools import reduce
+from transformations import Transformation

 def extract_properties_to_index(index, props):
    prop_index = dict()
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -6,10 +6,10 @@ import numpy; np = numpy
 import itertools
 from re import compile, _pattern_type
 from .param import ParamConcatenation
-from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
+from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing

 import logging
-from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
+from index_operations import ParameterIndexOperationsView
 logger = logging.getLogger("parameters changed meta")

 class ParametersChangedMeta(type):
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -730,7 +730,7 @@ class DGPLVM(Prior):

 # ******************************************

-from parameterized import Parameterized
+from .. import Parameterized
 from .. import Param
 class DGPLVM_Lamda(Prior, Parameterized):
    """
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf


 class SVGP(SparseGP):
-    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
+    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
        """
        Stochastic Variational GP.

@ -41,8 +41,12 @@ class SVGP(SparseGP):
        SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
                 name=name, Y_metadata=Y_metadata, normalizer=False)

-        self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
-        chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1])))
+        #assume the number of latent functions is one per col of Y unless specified
+        if num_latent_functions is None:
+            num_latent_functions = Y.shape[1]
+
+        self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
+        chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
        self.chol = Param('q_u_chol', chol)
        self.link_parameter(self.chol)
        self.link_parameter(self.m)
--- a/GPy/defaults.cfg
+++ b/GPy/defaults.cfg
@ -25,3 +25,6 @@ MKL = False
 [weave]
 #if true, try to use weave, and fall back to numpy. if false, just use numpy.
 working = True
+
+[cython]
+working = True
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@ -8,12 +8,16 @@ class SVGP(LatentFunctionInference):

    def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):

-        num_inducing = Z.shape[0]
-        num_data, num_outputs = Y.shape
+        num_data, _ = Y.shape
+        num_inducing, num_outputs = q_u_mean.shape

        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
-        S = np.einsum('ijk,ljk->ilk', L, L) #L.dot(L.T)
+
+
+        S = np.empty((num_outputs, num_inducing, num_inducing))
+        [np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
+        S = S.swapaxes(0,2)
        #Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
        Si = choleskies.multiple_dpotri(L)
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
@ -41,11 +45,12 @@ class SVGP(LatentFunctionInference):
        #compute the marginal means and variances of q(f)
        A = np.dot(Knm, Kmmi)
        mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
-        v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
+        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
+        v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)

        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
-        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
+        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()
        #gradient of the KL term (assuming zero mean function)
        dKL_dm = Kmmim.copy()
@ -78,11 +83,14 @@ class SVGP(LatentFunctionInference):
        Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
        Admu = A.T.dot(dF_dmu)
        AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
-        tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
+        #tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
+        tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
        dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
        dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
-        tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
-        dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
+        #tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
+        tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
+        #dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
+        dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
        dF_dm = Admu
        dF_dS = AdvA

--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -5,12 +5,8 @@ from .kern import Kern
 import numpy as np
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
-from ...util.config import config # for assesing whether to use weave
-
-try:
-    from scipy import weave
-except ImportError:
-    config.set('weave', 'working', 'False')
+from ...util.config import config # for assesing whether to use cython
+import coregionalize_cython

 class Coregionalize(Kern):
    """
@ -61,13 +57,8 @@ class Coregionalize(Kern):
        self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)

    def K(self, X, X2=None):
-        if config.getboolean('weave', 'working'):
-            try:
-                return self._K_weave(X, X2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                return self._K_numpy(X, X2)
+        if config.getboolean('cython', 'working'):
+            return self._K_cython(X, X2)
        else:
            return self._K_numpy(X, X2)

@ -80,36 +71,10 @@ class Coregionalize(Kern):
            index2 = np.asarray(X2, dtype=np.int)
            return self.B[index,index2.T]

-    def _K_weave(self, X, X2=None):
-        """compute the kernel function using scipy.weave"""
-        index = np.asarray(X, dtype=np.int)
-
+    def _K_cython(self, X, X2=None):
        if X2 is None:
-            target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
-            code="""
-            for(int i=0;i<N; i++){
-              target[i+i*N] = B[index[i]+output_dim*index[i]];
-              for(int j=0; j<i; j++){
-                  target[j+i*N] = B[index[i]+output_dim*index[j]];
-                  target[i+j*N] = target[j+i*N];
-                }
-              }
-            """
-            N, B, output_dim = index.size, self.B, self.output_dim
-            weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
-        else:
-            index2 = np.asarray(X2, dtype=np.int)
-            target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
-            code="""
-            for(int i=0;i<num_inducing; i++){
-              for(int j=0; j<N; j++){
-                  target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
-                }
-              }
-            """
-            N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
-            weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
-        return target
+            return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
+        return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])


    def Kdiag(self, X):
@ -122,19 +87,13 @@ class Coregionalize(Kern):
        else:
            index2 = np.asarray(X2, dtype=np.int)

-        #attempt to use weave for a nasty double indexing loop: fall back to numpy
-        if config.getboolean('weave', 'working'):
-            try:
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
+        #attempt to use cython for a nasty double indexing loop: fall back to numpy
+        if config.getboolean('cython', 'working'):
+            dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
        else:
            dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)


-
        dkappa = np.diag(dL_dK_small)
        dL_dK_small += dL_dK_small.T
        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -142,19 +101,6 @@ class Coregionalize(Kern):
        self.W.gradient = dW
        self.kappa.gradient = dkappa

-    def _gradient_reduce_weave(self, dL_dK, index, index2):
-        dL_dK_small = np.zeros_like(self.B)
-        code="""
-        for(int i=0; i<num_inducing; i++){
-          for(int j=0; j<N; j++){
-            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
-          }
-        }
-        """
-        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
-        weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
-        return dL_dK_small
-
    def _gradient_reduce_numpy(self, dL_dK, index, index2):
        index, index2 = index[:,0], index2[:,0]
        dL_dK_small = np.zeros_like(self.B)
@ -164,6 +110,11 @@ class Coregionalize(Kern):
                dL_dK_small[j,i] = tmp1[:,index2==j].sum()
        return dL_dK_small

+    def _gradient_reduce_cython(self, dL_dK, index, index2):
+        index, index2 = index[:,0], index2[:,0]
+        return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
+
+
    def update_gradients_diag(self, dL_dKdiag, X):
        index = np.asarray(X, dtype=np.int).flatten()
        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
--- a/GPy/kern/_src/coregionalize_cython.c
+++ b/GPy/kern/_src/coregionalize_cython.c
--- a/GPy/kern/_src/coregionalize_cython.pyx
+++ b/GPy/kern/_src/coregionalize_cython.pyx
@ -0,0 +1,34 @@
+#cython: boundscheck=True
+#cython: wraparound=True
+import cython
+import numpy as np
+cimport numpy as np
+
+def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
+    cdef int N = X.size
+    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
+    for n in range(N):
+        for m in range(N):
+            K[n,m] = B[X[n],X[m]]
+    return K
+
+def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
+    cdef int N = X.size
+    cdef int M = X2.size
+    cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
+    for n in range(N):
+        for m in range(M):
+            K[n,m] = B[X[n],X2[m]]
+    return K
+
+def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
+        cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
+        cdef int N = index.size
+        cdef int M = index2.size
+        for i in range(N):
+            for j in range(M):
+                dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
+        return dL_dK_small
+
+
+
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -9,13 +9,15 @@ from ...util.linalg import tdot
 from ... import util
 import numpy as np
 from scipy import integrate
-from ...util.config import config # for assesing whether to use weave
+from ...util.config import config # for assesing whether to use cython
 from ...util.caching import Cache_this

 try:
-    from scipy import weave
+    import stationary_cython
 except ImportError:
-    config.set('weave', 'working', 'False')
+    print('warning: failed to import cython module: falling back to numpy')
+    config.set('cython', 'working', 'false')
+

 class Stationary(Kern):
    """
@ -153,28 +155,18 @@ class Stationary(Kern):
        (dL_dK), compute the gradient wrt the parameters of this kernel,
        and store in the parameters object as e.g. self.variance.gradient
        """
-        self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
+        self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance

        #now the lengthscale gradient(s)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        if self.ARD:
-            #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
-            #d =  X[:, None, :] - X2[None, :, :]
-            #x_xl3 = np.square(d)
-            #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
+
            tmp = dL_dr*self._inv_dist(X, X2)
            if X2 is None: X2 = X
-
-
-            if config.getboolean('weave', 'working'):
-                try:
-                    self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
-                except:
-                    print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                    config.set('weave', 'working', 'False')
-                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
+            if config.getboolean('cython', 'working'):
+                self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
            else:
-                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
+                self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
        else:
            r = self._scaled_dist(X, X2)
            self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -189,43 +181,27 @@ class Stationary(Kern):
        dist = self._scaled_dist(X, X2).copy()
        return 1./np.where(dist != 0., dist, np.inf)

-    def weave_lengthscale_grads(self, tmp, X, X2):
-        """Use scipy.weave to compute derivatives wrt the lengthscales"""
+    def _lengthscale_grads_pure(self, tmp, X, X2):
+        return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
+
+    def _lengthscale_grads_cython(self, tmp, X, X2):
        N,M = tmp.shape
-        Q = X.shape[1]
-        if hasattr(X, 'values'):X = X.values
-        if hasattr(X2, 'values'):X2 = X2.values
+        Q = self.input_dim
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
        grads = np.zeros(self.input_dim)
-        code = """
-        double gradq;
-        for(int q=0; q<Q; q++){
-          gradq = 0;
-          for(int n=0; n<N; n++){
-            for(int m=0; m<M; m++){
-              gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
-            }
-          }
-          grads(q) = gradq;
-        }
-        """
-        weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
+        stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
        return -grads/self.lengthscale**3

    def gradients_X(self, dL_dK, X, X2=None):
        """
        Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
        """
-        if config.getboolean('weave', 'working'):
-            try:
-                return self.gradients_X_weave(dL_dK, X, X2)
-            except:
-                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-                config.set('weave', 'working', 'False')
-                return self.gradients_X_(dL_dK, X, X2)
+        if config.getboolean('cython', 'working'):
+            return self._gradients_X_cython(dL_dK, X, X2)
        else:
-            return self.gradients_X_(dL_dK, X, X2)
+            return self._gradients_X_pure(dL_dK, X, X2)

-    def gradients_X_(self, dL_dK, X, X2=None):
+    def _gradients_X_pure(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
@ -235,54 +211,25 @@ class Stationary(Kern):

        #The high-memory numpy way:
        #d =  X[:, None, :] - X2[None, :, :]
-        #ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
+        #grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2

        #the lower memory way with a loop
-        ret = np.empty(X.shape, dtype=np.float64)
+        grad = np.empty(X.shape, dtype=np.float64)
        for q in range(self.input_dim):
-            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
-        ret /= self.lengthscale**2
+            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
+        return grad/self.lengthscale**2

-        return ret
-
-    def gradients_X_weave(self, dL_dK, X, X2=None):
+    def _gradients_X_cython(self, dL_dK, X, X2=None):
        invdist = self._inv_dist(X, X2)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        tmp = invdist*dL_dr
        if X2 is None:
            tmp = tmp + tmp.T
            X2 = X
-
-        code = """
-        int n,m,d;
-        double retnd;
-        #pragma omp parallel for private(n,d, retnd, m)
-        for(d=0;d<D;d++){
-          for(n=0;n<N;n++){
-            retnd = 0.0;
-            for(m=0;m<M;m++){
-              retnd += tmp(n,m)*(X(n,d)-X2(m,d));
-            }
-            ret(n,d) = retnd;
-          }
-        }
-
-        """
-        if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
-        if hasattr(X2, 'values'):X2 = X2.values
-        ret = np.zeros(X.shape)
-        N,D = X.shape
-        N,M = tmp.shape
-        from scipy import weave
-        support_code = """
-        #include <omp.h>
-        #include <stdio.h>
-        """
-        weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
-                         'extra_link_args'   : ['-lgomp']}
-        weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
-        return ret/self.lengthscale**2
+        X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
+        grad = np.zeros(X.shape)
+        stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
+        return grad/self.lengthscale**2

    def gradients_X_diag(self, dL_dKdiag, X):
        return np.zeros(X.shape)
@ -290,6 +237,9 @@ class Stationary(Kern):
    def input_sensitivity(self, summarize=True):
        return self.variance*np.ones(self.input_dim)/self.lengthscale**2

+
+
+
 class Exponential(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
--- a/GPy/kern/_src/stationary_cython.c
+++ b/GPy/kern/_src/stationary_cython.c
--- a/GPy/kern/_src/stationary_cython.pyx
+++ b/GPy/kern/_src/stationary_cython.pyx
@ -0,0 +1,36 @@
+#cython: boundscheck=False
+#cython: wraparound=False
+import numpy as np
+cimport numpy as np
+
+ctypedef np.float64_t DTYPE_t
+ 
+cdef extern from "stationary_utils.h":
+    void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
+
+cdef extern from "stationary_utils.h":
+    void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
+ 
+def grad_X(int N, int D, int M,
+        np.ndarray[DTYPE_t, ndim=2] _X,
+        np.ndarray[DTYPE_t, ndim=2] _X2,
+        np.ndarray[DTYPE_t, ndim=2] _tmp,
+        np.ndarray[DTYPE_t, ndim=2] _grad):
+    cdef double *X = <double*> _X.data
+    cdef double *X2 = <double*> _X2.data
+    cdef double *tmp = <double*> _tmp.data
+    cdef double *grad = <double*> _grad.data
+    _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
+
+def lengthscale_grads(int N, int M, int Q,
+        np.ndarray[DTYPE_t, ndim=2] _tmp,
+        np.ndarray[DTYPE_t, ndim=2] _X,
+        np.ndarray[DTYPE_t, ndim=2] _X2,
+        np.ndarray[DTYPE_t, ndim=1] _grad):
+    cdef double *tmp = <double*> _tmp.data
+    cdef double *X = <double*> _X.data
+    cdef double *X2 = <double*> _X2.data
+    cdef double *grad = <double*> _grad.data
+    _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
+
+
--- a/GPy/kern/_src/stationary_utils.c
+++ b/GPy/kern/_src/stationary_utils.c
@ -0,0 +1,35 @@
+void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
+int n,m,d;
+double retnd;
+//#pragma omp parallel for private(n,d, retnd, m)
+for(d=0;d<D;d++){
+  for(n=0;n<N;n++){
+    retnd = 0.0;
+    for(m=0;m<M;m++){
+      retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
+    }
+    grad[n*D+d] = retnd;
+  }
+}
+} //grad_X
+
+
+void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
+int n,m,q;
+double gradq, dist;
+#pragma omp parallel for private(n,m, gradq, dist)
+for(q=0; q<Q; q++){
+  gradq = 0;
+  for(n=0; n<N; n++){
+    for(m=0; m<M; m++){
+        dist = X[n*Q+q]-X2[m*Q+q];
+        gradq += tmp[n*M+m]*dist*dist;
+    }
+  }
+  grad[q] = gradq;
+}
+} //lengthscale_grads
+
+
+
+
--- a/GPy/kern/_src/stationary_utils.h
+++ b/GPy/kern/_src/stationary_utils.h
@ -0,0 +1,3 @@
+#include <omp.h>
+void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
+void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -41,6 +41,14 @@ class Likelihood(Parameterized):
        self.log_concave = False
        self.not_block_really = False

+    def request_num_latent_functions(self, Y):
+        """
+        The likelihood should infer how many latent functions are needed for the likelihood
+
+        Default is the number of outputs
+        """
+        return Y.shape[1]
+
    def _gradients(self,partial):
        return np.zeros(0)

@ -118,15 +126,19 @@ class Likelihood(Parameterized):
            """Generate a function which can be integrated
            to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
            def f(fi_star):
-                #exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
+                #exponent = np.exp(-(1./(2*vi))*np.square(mi-fi_star))
                #from GPy.util.misc import safe_exp
                #exponent = safe_exp(exponent)
-                #return self.pdf(f_star, y, y_m)*exponent
+                #res = safe_exp(self.logpdf(fi_star, yi, yi_m))*exponent

                #More stable in the log space
-                return np.exp(self.logpdf(fi_star, yi, yi_m)
+                res = np.exp(self.logpdf(fi_star, yi, yi_m)
                              - 0.5*np.log(2*np.pi*vi)
-                              - 0.5*np.square(mi-fi_star)/vi)
+                              - 0.5*np.square(fi_star-mi)/vi)
+                if not np.isfinite(res):
+                    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+                return res
+
            return f

        p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
@ -134,6 +146,36 @@ class Likelihood(Parameterized):
        p_ystar = np.array(p_ystar).reshape(-1, 1)
        return np.log(p_ystar)

+    def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
+        """
+        Calculation of the log predictive density via sampling
+
+        .. math:
+            log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
+            f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
+
+        :param y_test: test observations (y_{*})
+        :type y_test: (Nx1) array
+        :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
+        :type mu_star: (Nx1) array
+        :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
+        :type var_star: (Nx1) array
+        :param num_samples: num samples of p(f_{*}|mu_{*}, var_{*}) to take
+        :type num_samples: int
+        """
+        assert y_test.shape==mu_star.shape
+        assert y_test.shape==var_star.shape
+        assert y_test.shape[1] == 1
+
+        #Take samples of p(f*|y)
+        #fi_samples = np.random.randn(num_samples)*np.sqrt(var_star) + mu_star
+        fi_samples = np.random.normal(mu_star, np.sqrt(var_star), size=(mu_star.shape[0], num_samples))
+
+        from scipy.misc import logsumexp
+        log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
+        return log_p_ystar
+
+
    def _moments_match_ep(self,obs,tau,v):
        """
        Calculation of moments using quadrature
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@ -10,6 +10,7 @@ from scipy.special import gammaln, gamma
 from .likelihood import Likelihood
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
+from scipy.special import psi as digamma

 class StudentT(Likelihood):
    """
@ -28,16 +29,13 @@ class StudentT(Likelihood):
        super(StudentT, self).__init__(gp_link, name='Student_T')
        # sigma2 is not a noise parameter, it is a squared scale.
        self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
-        self.v = Param('deg_free', float(deg_free))
+        self.v = Param('deg_free', float(deg_free), Logexp())
        self.link_parameter(self.sigma2)
        self.link_parameter(self.v)
-        self.v.constrain_fixed()
+        #self.v.constrain_fixed()

        self.log_concave = False

-    #def parameters_changed(self):
-        #self.variance = (self.v / float(self.v - 2)) * self.sigma2
-
    def update_gradients(self, grads):
        """
        Pull out the gradients, be careful as the order must match the order
@ -224,20 +222,46 @@ class StudentT(Likelihood):
                           )
        return d2logpdf_dlink2_dvar

+    def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None):
+        e = y - inv_link_f
+        e2 = np.square(e)
+        df = float(self.v[:])
+        s2 = float(self.sigma2[:])
+        dlogpdf_dv =  0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
+        dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df))
+        dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df))
+        return dlogpdf_dv
+
+    def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None):
+        e = y - inv_link_f
+        e2 = np.square(e)
+        df = float(self.v[:])
+        s2 = float(self.sigma2[:])
+        dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2
+        return dlogpdf_df_dv
+
+    def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None):
+        e = y - inv_link_f
+        e2 = np.square(e)
+        df = float(self.v[:])
+        s2 = float(self.sigma2[:])
+        e2_s2v = e**2 + s2*df
+        d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3
+        return d2logpdf_df2_dv
+
    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
-        dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
+        dlogpdf_dv = self.dlogpdf_link_dv(f, y, Y_metadata=Y_metadata)
        return np.array((dlogpdf_dvar, dlogpdf_dv))

    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
-        dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
+        dlogpdf_dlink_dv = self.dlogpdf_dlink_dv(f, y, Y_metadata=Y_metadata)
        return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))

    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
-        d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
-
+        d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata)
        return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))

    def predictive_mean(self, mu, sigma, Y_metadata=None):
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -219,7 +219,7 @@ def plot_fit_f(model, *args, **kwargs):
    kwargs['plot_raw'] = True
    plot_fit(model,*args, **kwargs)

-def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
+def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
    """
    Convenience function for returning back fixed_inputs where the other inputs
    are fixed using fix_routine
@ -229,6 +229,8 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
    :type non_fixed_inputs: list
    :param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
    :type fix_routine: string
+    :param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
+    :type as_list: boolean
    """
    f_inputs = []
    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
@ -241,6 +243,11 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
                f_inputs.append( (i, np.mean(X[:,i])) )
            if fix_routine == 'median':
                f_inputs.append( (i, np.median(X[:,i])) )
-            elif fix_routine == 'zero':
+            else: # set to zero zero
                f_inputs.append( (i, 0) )
-    return f_inputs
+            if not as_list:
+                X[:,i] = f_inputs[-1][1]
+    if as_list:
+        return f_inputs
+    else:
+        return X
--- a/GPy/testing/cython_tests.py
+++ b/GPy/testing/cython_tests.py
@ -0,0 +1,65 @@
+import numpy as np
+import scipy as sp
+from GPy.util import choleskies
+import GPy
+
+"""
+These tests make sure that the opure python and cython codes work the same
+"""
+
+class CythonTestChols(np.testing.TestCase):
+    def setUp(self):
+        self.flat = np.random.randn(45, 5)
+        self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)])
+    def test_flat_to_triang(self):
+        L1 = choleskies._flat_to_triang_pure(self.flat)
+        L2 = choleskies._flat_to_triang_cython(self.flat)
+        np.testing.assert_allclose(L1, L2)
+    def test_triang_to_flat(self):
+        A1 = choleskies._triang_to_flat_pure(self.triang)
+        A2 = choleskies._triang_to_flat_cython(self.triang)
+        np.testing.assert_allclose(A1, A2)
+
+class test_stationary(np.testing.TestCase):
+    def setUp(self):
+        self.k = GPy.kern.RBF(10)
+        self.X = np.random.randn(300,10)
+        self.Z = np.random.randn(20,10)
+        self.dKxx = np.random.randn(300,300)
+        self.dKzz = np.random.randn(20,20)
+        self.dKxz = np.random.randn(300,20)
+
+    def test_square_gradX(self):
+        g1 = self.k._gradients_X_cython(self.dKxx, self.X)
+        g2 = self.k._gradients_X_pure(self.dKxx, self.X)
+        np.testing.assert_allclose(g1, g2)
+
+    def test_rect_gradx(self):
+        g1 = self.k._gradients_X_cython(self.dKxz, self.X, self.Z)
+        g2 = self.k._gradients_X_pure(self.dKxz, self.X, self.Z)
+        np.testing.assert_allclose(g1, g2)
+
+    def test_square_lengthscales(self):
+        g1 = self.k._lengthscale_grads_pure(self.dKxx, self.X, self.X)
+        g2 = self.k._lengthscale_grads_cython(self.dKxx, self.X, self.X)
+        np.testing.assert_allclose(g1, g2)
+
+    def test_rect_lengthscales(self):
+        g1 = self.k._lengthscale_grads_pure(self.dKxz, self.X, self.Z)
+        g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
+        np.testing.assert_allclose(g1, g2)
+
+class test_choleskies_backprop(np.testing.TestCase):
+    def setUp(self):
+        self.dL, self.L = np.random.randn(2, 100, 100)
+    def test(self):
+        r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
+        r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
+        np.testing.assert_allclose(r1, r2)
+
+
+
+
+
+
+
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -366,9 +366,9 @@ class KernelTestsNonContinuous(unittest.TestCase):
        X2 = self.X2[self.X2[:,-1]!=2]
        self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))

-class Coregionalize_weave_test(unittest.TestCase):
+class Coregionalize_cython_test(unittest.TestCase):
    """
-    Make sure that the coregionalize kernel work with and without weave enabled
+    Make sure that the coregionalize kernel work with and without cython enabled
    """
    def setUp(self):
        self.k = GPy.kern.Coregionalize(1, output_dim=12)
@ -378,36 +378,42 @@ class Coregionalize_weave_test(unittest.TestCase):

    def test_sym(self):
        dL_dK = np.random.randn(self.N1, self.N1)
-        GPy.util.config.config.set('weave', 'working', 'True')
-        K_weave = self.k.K(self.X)
+        GPy.util.config.config.set('cython', 'working', 'True')
+        K_cython = self.k.K(self.X)
        self.k.update_gradients_full(dL_dK, self.X)
-        grads_weave = self.k.gradient.copy()
+        grads_cython = self.k.gradient.copy()

-        GPy.util.config.config.set('weave', 'working', 'False')
+        GPy.util.config.config.set('cython', 'working', 'False')
        K_numpy = self.k.K(self.X)
        self.k.update_gradients_full(dL_dK, self.X)
        grads_numpy = self.k.gradient.copy()

-        self.assertTrue(np.allclose(K_numpy, K_weave))
-        self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(K_numpy, K_cython))
+        self.assertTrue(np.allclose(grads_numpy, grads_cython))
+
+        #reset the cython state for any other tests
+        GPy.util.config.config.set('cython', 'working', 'true')

    def test_nonsym(self):
        dL_dK = np.random.randn(self.N1, self.N2)
-        GPy.util.config.config.set('weave', 'working', 'True')
-        K_weave = self.k.K(self.X, self.X2)
+        GPy.util.config.config.set('cython', 'working', 'True')
+        K_cython = self.k.K(self.X, self.X2)
+        self.k.gradient = 0.
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
-        grads_weave = self.k.gradient.copy()
+        grads_cython = self.k.gradient.copy()

-        GPy.util.config.config.set('weave', 'working', 'False')
+        GPy.util.config.config.set('cython', 'working', 'False')
        K_numpy = self.k.K(self.X, self.X2)
+        self.k.gradient = 0.
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
        grads_numpy = self.k.gradient.copy()

-        self.assertTrue(np.allclose(K_numpy, K_weave))
-        self.assertTrue(np.allclose(grads_numpy, grads_weave))
+        self.assertTrue(np.allclose(K_numpy, K_cython))
+        self.assertTrue(np.allclose(grads_numpy, grads_cython))
+
+        #reset the cython state for any other tests
+        GPy.util.config.config.set('cython', 'working', 'true')

-    #reset the weave state for any other tests
-    GPy.util.config.config.set('weave', 'working', 'False')


 class KernelTestsProductWithZeroValues(unittest.TestCase):
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -93,6 +93,9 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
            if not grad.checkgrad(verbose=True):
                gradchecking = False

+            if not grad.checkgrad(verbose=True):
+                gradchecking = False
+
    return gradchecking


@ -116,6 +119,7 @@ class TestNoiseModels(object):
        self.integer_Y = np.where(tmp > 0, tmp, 0)

        self.var = 0.2
+        self.deg_free = 4.0

        #Make a bigger step as lower bound can be quite curved
        self.step = 1e-4
@ -135,56 +139,56 @@ class TestNoiseModels(object):
                }
        """
        self.noise_models = {"Student_t_default": {
-            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+            "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
            "grad_params": {
-                "names": [".*t_scale2"],
-                "vals": [self.var],
-                "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                "names": [".*t_scale2", ".*deg_free"],
+                "vals": [self.var, self.deg_free],
+                "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
            },
            "laplace": True
            },
            "Student_t_1_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
-                    "vals": [1.0],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "names": [".*t_scale2", ".*deg_free"],
+                    "vals": [1.0, 8.0],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_small_deg_free": {
                "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
-                    "vals": [self.var],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "names": [".*t_scale2", ".*deg_free"],
+                    "vals": [self.var, 1.5],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_small_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
-                    "vals": [0.001],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "names": [".*t_scale2", ".*deg_free"],
+                    "vals": [0.001, self.deg_free],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_large_var": {
-                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
-                    "vals": [10.0],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "names": [".*t_scale2", ".*deg_free"],
+                    "vals": [10.0, self.deg_free],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
            "Student_t_approx_gauss": {
                "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
                "grad_params": {
-                    "names": [".*t_scale2"],
-                    "vals": [self.var],
-                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                    "names": [".*t_scale2", ".*deg_free"],
+                    "vals": [self.var, 1000],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_positive)]
                },
                "laplace": True
            },
--- a/GPy/testing/linalg_test.py
+++ b/GPy/testing/linalg_test.py
@ -1,6 +1,7 @@
 import numpy as np
 import scipy as sp
-from ..util.linalg import jitchol
+from GPy.util.linalg import jitchol
+import GPy

 class LinalgTests(np.testing.TestCase):
    def setUp(self):
@ -35,3 +36,17 @@ class LinalgTests(np.testing.TestCase):
            return False
        except sp.linalg.LinAlgError:
            return True
+
+    def test_einsum_ijk_jlk_to_il(self):
+        A = np.random.randn(50, 150, 5)
+        B = np.random.randn(150, 100, 5)
+        pure = np.einsum('ijk,jlk->il', A, B)
+        quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
+        np.testing.assert_allclose(pure, quick)
+
+    def test_einsum_ij_jlk_to_ilk(self):
+        A = np.random.randn(15, 150, 5)
+        B = np.random.randn(150, 50, 5)
+        pure = np.einsum('ijk,jlk->il', A, B)
+        quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
+        np.testing.assert_allclose(pure, quick)
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@ -1,14 +1,11 @@
-# Copyright James Hensman and Max Zwiessele 2014
+# Copyright James Hensman and Max Zwiessele 2014, 2015
 # Licensed under the GNU GPL version 3.0

 import numpy as np
 from . import linalg
 from .config import config

-try:
-    from scipy import weave
-except ImportError:
-    config.set('weave', 'working', 'False')
+import choleskies_cython

 def safe_root(N):
    i = np.sqrt(N)
@ -17,36 +14,6 @@ def safe_root(N):
        raise ValueError("N is not square!")
    return j

-def _flat_to_triang_weave(flat):
-    """take a matrix N x D and return a M X M x D array where
-
-    N = M(M+1)/2
-
-    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
-    This is the weave implementation
-    """
-    N, D = flat.shape
-    M = (-1 + safe_root(8*N+1))/2
-    ret = np.zeros((M, M, D))
-    flat = np.ascontiguousarray(flat)
-
-    code = """
-    int count = 0;
-    for(int m=0; m<M; m++)
-    {
-      for(int mm=0; mm<=m; mm++)
-      {
-        for(int d=0; d<D; d++)
-        {
-          ret[d + m*D*M + mm*D] = flat[count];
-          count++;
-        }
-      }
-    }
-    """
-    weave.inline(code, ['flat', 'ret', 'D', 'M'])
-    return ret
-
 def _flat_to_triang_pure(flat_mat):
    N, D = flat_mat.shape
    M = (-1 + safe_root(8*N+1))//2
@ -59,34 +26,11 @@ def _flat_to_triang_pure(flat_mat):
              count = count+1
    return ret

-if config.getboolean('weave', 'working'):
-	flat_to_triang =  _flat_to_triang_weave
-else:
-        flat_to_triang =  _flat_to_triang_pure
+def _flat_to_triang_cython(flat_mat):
+    N, D = flat_mat.shape
+    M = (-1 + safe_root(8*N+1))//2
+    return choleskies_cython.flat_to_triang(flat_mat, M)

-def _triang_to_flat_weave(L):
-    M, _, D = L.shape
-
-    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
-
-    N = M*(M+1)/2
-    flat = np.empty((N, D))
-    code = """
-    int count = 0;
-    for(int m=0; m<M; m++)
-    {
-      for(int mm=0; mm<=m; mm++)
-      {
-        for(int d=0; d<D; d++)
-        {
-          flat[count] = L[d + m*D*M + mm*D];
-          count++;
-        }
-      }
-    }
-    """
-    weave.inline(code, ['flat', 'L', 'D', 'M'])
-    return flat

 def _triang_to_flat_pure(L):
    M, _, D = L.shape
@ -101,41 +45,41 @@ def _triang_to_flat_pure(L):
                count = count +1
    return flat

-if config.getboolean('weave', 'working'):
-    triang_to_flat =  _triang_to_flat_weave
-else:
-    triang_to_flat =  _triang_to_flat_pure
+def _triang_to_flat_cython(L):
+    return choleskies_cython.triang_to_flat(L)
+
+def _backprop_gradient_pure(dL, L):
+    """
+    Given the derivative of an objective fn with respect to the cholesky L,
+    compute the derivate with respect to the original matrix K, defined as
+
+        K = LL^T
+
+    where L was obtained by Cholesky decomposition
+    """
+    dL_dK = np.tril(dL).copy()
+    N = L.shape[0]
+    for k in xrange(N - 1, -1, -1):
+        for j in xrange(k + 1, N):
+            for i in xrange(j, N):
+                dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
+                dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
+        for j in xrange(k + 1, N):
+            dL_dK[j, k] /= L[k, k]
+            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
+        dL_dK[k, k] /= (2 * L[k, k])
+    return dL_dK

 def triang_to_cov(L):
    return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])

-def multiple_dpotri_old(Ls):
-    M, _, D = Ls.shape
-    Kis = np.rollaxis(Ls, -1).copy()
-    [dpotri(Kis[i,:,:], overwrite_c=1, lower=1) for i in range(D)]
-    code = """
-    for(int d=0; d<D; d++)
-    {
-      for(int m=0; m<M; m++)
-      {
-        for(int mm=0; mm<m; mm++)
-        {
-          Kis[d*M*M + mm*M + m ] = Kis[d*M*M + m*M + mm];
-        }
-      }
-    }
-
-    """
-    weave.inline(code, ['Kis', 'D', 'M'])
-    Kis = np.rollaxis(Kis, 0, 3) #wtf rollaxis?
-    return Kis
-
 def multiple_dpotri(Ls):
    return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])

 def indexes_to_fix_for_low_rank(rank, size):
    """
-    work out which indexes of the flatteneed array should be fixed if we want the cholesky to represent a low rank matrix
+    Work out which indexes of the flatteneed array should be fixed if we want
+    the cholesky to represent a low rank matrix
    """
    #first we'll work out what to keep, and the do the set difference.

@ -153,15 +97,11 @@ def indexes_to_fix_for_low_rank(rank, size):
    return np.setdiff1d(np.arange((size**2+size)/2), keep)


-
-#class cholchecker(GPy.core.Model):
-    #def __init__(self, L, name='cholchecker'):
-        #super(cholchecker, self).__init__(name)
-        #self.L = GPy.core.Param('L',L)
-        #self.link_parameter(self.L)
-    #def parameters_changed(self):
-        #LL = flat_to_triang(self.L)
-        #Ki = multiple_dpotri(LL)
-        #self.L.gradient = 2*np.einsum('ijk,jlk->ilk', Ki, LL)
-        #self._loglik = np.sum([np.sum(np.log(np.abs(np.diag()))) for i in range(self.L.shape[-1])])
-#
+if config.getboolean('cython', 'working'):
+    triang_to_flat = _triang_to_flat_cython
+    flat_to_triang = _flat_to_triang_cython
+    backprop_gradient = choleskies_cython.backprop_gradient
+else:
+    backprop_gradient = _backprop_gradient_pure
+    triang_to_flat =  _triang_to_flat_pure
+    flat_to_triang = _flat_to_triang_pure
--- a/GPy/util/choleskies_cython.c
+++ b/GPy/util/choleskies_cython.c
--- a/GPy/util/choleskies_cython.pyx
+++ b/GPy/util/choleskies_cython.pyx
@ -0,0 +1,59 @@
+#cython: wraparaound=False
+#cython: boundscheck=False
+#cython: nonecheck=False
+
+# Copyright James Hensman and Alan Saul 2015
+
+import numpy as np
+cimport numpy as np
+
+def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
+    """take a matrix N x D and return a M X M x D array where
+
+    N = M(M+1)/2
+
+    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
+    """
+    cdef int N = flat.shape[0]
+    cdef int D = flat.shape[1]
+    cdef int count = 0
+    cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D))
+    cdef int d, m, mm
+    for d in range(D):
+        count = 0
+        for m in range(M):
+            for mm in range(m+1):
+                ret[m, mm, d] = flat[count,d]
+                count += 1
+    return ret
+
+def triang_to_flat(np.ndarray[double, ndim=3] L):
+    cdef int M = L.shape[0]
+    cdef int D = L.shape[2]
+    cdef int N = M*(M+1)/2
+    cdef int count = 0
+    cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
+    cdef int d, m, mm
+    for d in range(D):
+        count = 0
+        for m in range(M):
+            for mm in range(m+1):
+                flat[count,d] = L[m, mm, d]
+                count += 1
+    return flat
+
+
+def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
+    cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL).copy()
+    cdef int N = L.shape[0]
+    cdef int k, j, i
+    for k in range(N - 1, -1, -1):
+        for j in range(k + 1, N):
+            for i in range(j, N):
+                dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
+                dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
+        for j in range(k + 1, N):
+            dL_dK[j, k] /= L[k, k]
+            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
+        dL_dK[k, k] /= (2. * L[k, k])
+    return dL_dK
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,11 +15,7 @@ import warnings
 import os
 from .config import config
 import logging
-
-try:
-    from scipy import weave
-except ImportError:
-    config.set('weave', 'working', 'False')
+import linalg_cython


 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
@ -422,114 +418,33 @@ def DSYR(*args, **kwargs):

 def symmetrify(A, upper=False):
    """
-    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
+    Take the square matrix A and make it symmetrical by copting elements from
+    the lower half to the upper

    works IN PLACE.

-    note: tries to use weave, falls back to a slower numpy version
+    note: tries to use cython, falls back to a slower numpy version
    """
-    if config.getboolean('weave', 'working'):
-        try:
-            symmetrify_weave(A, upper)
-        except:
-            print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
-            config.set('weave', 'working', 'False')
-            symmetrify_numpy(A, upper)
+    if config.getboolean('cython', 'working'):
+        _symmetrify_cython(A, upper)
    else:
-        symmetrify_numpy(A, upper)
+        _symmetrify_numpy(A, upper)


-def symmetrify_weave(A, upper=False):
-    """
-    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
+def _symmetrify_cython(A, upper=False):
+    return linalg_cython.symmetrify(A, upper)

-    works IN PLACE.
-
-
-    """
-    N, M = A.shape
-    assert N == M
-
-    c_contig_code = """
-    int iN;
-    for (int i=1; i<N; i++){
-      iN = i*N;
-      for (int j=0; j<i; j++){
-        A[i+j*N] = A[iN+j];
-      }
-    }
-    """
-    f_contig_code = """
-    int iN;
-    for (int i=1; i<N; i++){
-      iN = i*N;
-      for (int j=0; j<i; j++){
-        A[iN+j] = A[i+j*N];
-      }
-    }
-    """
-
-    N = int(N) # for safe type casting
-    if A.flags['C_CONTIGUOUS'] and upper:
-        weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
-    elif A.flags['C_CONTIGUOUS'] and not upper:
-        weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
-    elif A.flags['F_CONTIGUOUS'] and upper:
-        weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
-    elif A.flags['F_CONTIGUOUS'] and not upper:
-        weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
-    else:
-        if upper:
-            tmp = np.tril(A.T)
-        else:
-            tmp = np.tril(A)
-        A[:] = 0.0
-        A += tmp
-        A += np.tril(tmp, -1).T
-
-
-def symmetrify_numpy(A, upper=False):
-    """
-    Force a matrix to be symmetric
-    """
+def _symmetrify_numpy(A, upper=False):
    triu = np.triu_indices_from(A,k=1)
    if upper:
        A.T[triu] = A[triu]
    else:
        A[triu] = A.T[triu]

-#This function appears to be unused. It's use of weave makes it problematic
-#Commenting out for now
-#def cholupdate(L, x):
-#    """
-#    update the LOWER cholesky factor of a pd matrix IN PLACE
-#
-#    if L is the lower chol. of K, then this function computes L\_
-#    where L\_ is the lower chol of K + x*x^T
-#    """
-#    support_code = """
-#    #include <math.h>
-#    """
-#    code = """
-#    double r,c,s;
-#    int j,i;
-#    for(j=0; j<N; j++){
-#      r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
-#      c = r / L(j,j);
-#      s = x(j) / L(j,j);
-#      L(j,j) = r;
-#      for (i=j+1; i<N; i++){
-#        L(i,j) = (L(i,j) + s*x(i))/c;
-#        x(i) = c*x(i) - s*L(i,j);
-#      }
-#    }
-#    """
-#    x = x.copy()
-#    N = x.size
-#    weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
-
 def backsub_both_sides(L, X, transpose='left'):
-    """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
+    """
+    Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky
+    """
    if transpose == 'left':
        tmp, _ = dtrtrs(L, X, lower=1, trans=1)
        return dtrtrs(L, tmp.T, lower=1, trans=1)[0].T
@ -537,3 +452,16 @@ def backsub_both_sides(L, X, transpose='left'):
        tmp, _ = dtrtrs(L, X, lower=1, trans=0)
        return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T

+def ij_jlk_to_ilk(A, B):
+    """
+    Faster version of einsum 'ij,jlk->ilk'
+    """
+    return A.dot(B.reshape(B.shape[0], -1)).reshape(A.shape[0], B.shape[1], B.shape[2])
+
+def ijk_jlk_to_il(A, B):
+    """
+    Faster version of einsum einsum('ijk,jlk->il', A,B)
+    """
+    res = np.zeros((A.shape[0], B.shape[1]))
+    [np.add(np.dot(A[:,:,k], B[:,:,k]), res, res) for k in range(B.shape[-1])]
+    return res
--- a/GPy/util/linalg_cython.c
+++ b/GPy/util/linalg_cython.c
--- a/GPy/util/linalg_cython.pyx
+++ b/GPy/util/linalg_cython.pyx
@ -0,0 +1,34 @@
+cimport numpy as np
+from cpython cimport bool
+import cython
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
+    cdef int N = A.shape[0]
+    if not upper:
+        for i in xrange(N):
+            for j in xrange(i):
+                A[j, i] = A[i, j]
+    else:
+        for j in xrange(N):
+            for i in xrange(j):
+                A[j, i] = A[i, j]
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
+    cdef double r
+    cdef double c
+    cdef double s
+    for j in xrange(N):
+        r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
+        c = r / L[j,j]
+        s = x[j] / L[j,j]
+        L[j,j] = r
+        for i in xrange(j):
+            L[i,j] = (L[i,j] + s*x[i])/c
+            x[i] = c*x[i] - s*L[i,j];
+        r = np.sqrt(L[j,j])
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -42,9 +42,6 @@ def chain_1(df_dg, dg_dx):
    """
    if np.all(dg_dx==1.):
        return df_dg
-    if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1:
-        #import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
-        raise NotImplementedError('Not implemented for matricies yet')
    return df_dg * dg_dx

 def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@ -56,8 +53,6 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
    """
    if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
        return d2f_dg2
-    if  len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
-        raise NotImplementedError('Not implemented for matricies yet')
    dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
    #dg_dx_2 = dg_dx**2
    return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
@ -71,11 +66,7 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
    """
    if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
        return d3f_dg3
-    if (  (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
-           or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
-        raise NotImplementedError('Not implemented for matricies yet')
    dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
-    #dg_dx_3 = dg_dx**3
    return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3

 def opt_wrapper(m, **kwargs):
@ -133,10 +124,11 @@ def kmm_init(X, m = 10):
 ### make a parameter to its corresponding array:
 def param_to_array(*param):
    """
-Convert an arbitrary number of parameters to :class:ndarray class objects. This is for
-converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
-In scipy.weave.blitz there is no automatic array detection (even when the array inherits
-from :class:ndarray)"""
+    Convert an arbitrary number of parameters to :class:ndarray class objects.
+    This is for converting parameter objects to numpy arrays, when using
+    scipy.weave.inline routine.  In scipy.weave.blitz there is no automatic
+    array detection (even when the array inherits from :class:ndarray)
+    """
    import warnings
    warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
    assert len(param) > 0, "At least one parameter needed"
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

 import numpy as np
-from GPy.core.parameterization import Parameterized, Param
+from ..core.parameterization import Parameterized, Param
 from ..core.parameterization.transformations import Logexp

 class WarpingFunction(Parameterized):
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -6,3 +6,6 @@ include *.cfg
 recursive-include doc *.cfg
 include *.json
 recursive-include doc *.json
+recursive-include GPy *.c
+recursive-include GPy *.so
+recursive-include GPy *.pyx
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-

 import os
-from setuptools import setup
+from setuptools import setup, Extension
+import numpy as np

 # Version number
 version = '0.6.1'
@ -10,6 +11,27 @@ version = '0.6.1'
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read()

+#compile_flags = ["-march=native", '-fopenmp', '-O3', ]
+compile_flags = [ '-fopenmp', '-O3', ]
+
+ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
+                      sources=['GPy/kern/_src/stationary_cython.c','GPy/kern/_src/stationary_utils.c'],
+                      include_dirs=[np.get_include()],
+                      extra_compile_args=compile_flags,
+                      extra_link_args = ['-lgomp']),
+            Extension(name='GPy.util.choleskies_cython',
+                      sources=['GPy/util/choleskies_cython.c'],
+                      include_dirs=[np.get_include()],
+                      extra_compile_args=compile_flags),
+            Extension(name='GPy.util.linalg_cython',
+                      sources=['GPy/util/linalg_cython.c'],
+                      include_dirs=[np.get_include()],
+                      extra_compile_args=compile_flags),
+            Extension(name='GPy.kern._src.coregionalize_cython',
+                      sources=['GPy/kern/_src/coregionalize_cython.c'],
+                      include_dirs=[np.get_include()],
+                      extra_compile_args=compile_flags)]
+
 setup(name = 'GPy',
      version = version,
      author = read('AUTHORS.txt'),
@ -18,6 +40,7 @@ setup(name = 'GPy',
      license = "BSD 3-clause",
      keywords = "machine-learning gaussian-processes kernels",
      url = "http://sheffieldml.github.com/GPy/",
+      ext_modules = ext_mods,
      packages = ["GPy.models",
                  "GPy.inference.optimization",
                  "GPy.inference.mcmc",