Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

2026-05-09 20:12:38 +02:00 · 2015-06-29 10:19:43 +02:00 · 2015-06-29 10:19:43 +02:00 · 4ca4916cc0
commit 4ca4916cc0
parent f46b23d16f 5cc17e8754
28 changed files with 14715 additions and 197 deletions
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -5,7 +5,7 @@ import numpy
 from numpy.lib.function_base import vectorize
 from .lists_and_dicts import IntArrayDict
 from functools import reduce
-from transformations import Transformation
+from .transformations import Transformation
 def extract_properties_to_index(index, props):
    prop_index = dict()
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -440,7 +440,7 @@ class Indexable(Nameable, Updateable):
        log_j = 0.
        priored_indexes = np.hstack([i for p, i in self.priors.items()])
        for c,j in self.constraints.items():
-            if c is 'fixed':continue
+            if not isinstance(c, Transformation):continue
            for jj in j:
                if jj in priored_indexes:
                    log_j += c.log_jacobian(x[jj])
@ -457,6 +457,7 @@ class Indexable(Nameable, Updateable):
        #add in jacobian derivatives if transformed
        priored_indexes = np.hstack([i for p, i in self.priors.items()])
        for c,j in self.constraints.items():
            if not isinstance(c, Transformation):continue
            for jj in j:
                if jj in priored_indexes:
                    ret[jj] += c.log_jacobian_grad(x[jj])
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -6,10 +6,10 @@ import numpy; np = numpy
 import itertools
 from re import compile, _pattern_type
 from .param import ParamConcatenation
-from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
+from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
 import logging
-from index_operations import ParameterIndexOperationsView
+from .index_operations import ParameterIndexOperationsView
 logger = logging.getLogger("parameters changed meta")
 class ParametersChangedMeta(type):
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -758,12 +758,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
        self.sigma2 = sigma2
        # self.x = x
        self.lbl = lbl
-	self.lamda = lamda
+        self.lamda = lamda
        self.classnum = lbl.shape[1]
        self.datanum = lbl.shape[0]
        self.x_shape = x_shape
        self.dim = x_shape[1]
-	self.lamda = Param('lamda', np.diag(lamda))
+        self.lamda = Param('lamda', np.diag(lamda))
        self.link_parameter(self.lamda)
    def get_class_label(self, y):
@ -789,7 +789,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
-	    class_i = cls[i]
+            class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i
@ -899,8 +899,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
 	#!!!!!!!!!!!!!!!!!!!!!!!!!!!
 	#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()	
-	xprime = x.dot(np.diagflat(self.lamda))
+        xprime = x.dot(np.diagflat(self.lamda))
-	x = xprime
+        x = xprime
 	# print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
@ -910,14 +910,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
-	xprime = x.dot(np.diagflat(self.lamda))
+        xprime = x.dot(np.diagflat(self.lamda))
-	x = xprime
+        x = xprime
 	# print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
@ -934,7 +934,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        Sb_inv_N_trans = np.transpose(Sb_inv_N)
        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
        Sw_trans = np.transpose(Sw)
@ -951,14 +951,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
        DPxprim_Dx = DPxprim_Dx.T
-    	DPxprim_Dlamda = DPx_Dx.dot(x)
+        DPxprim_Dlamda = DPx_Dx.dot(x)
    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
-    	DPxprim_Dlamda = DPxprim_Dlamda.T
+        DPxprim_Dlamda = DPxprim_Dlamda.T
-	self.lamda.gradient = np.diag(DPxprim_Dlamda)
+        self.lamda.gradient = np.diag(DPxprim_Dlamda)
 	# print DPxprim_Dx
-    	return DPxprim_Dx
+        return DPxprim_Dx
    # def frb(self, x):
@ -1139,8 +1139,8 @@ class DGPLVM_T(Prior):
    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
-	xprim = x.dot(self.vec)
+        xprim = x.dot(self.vec)
-	x = xprim
+        x = xprim
 	# print x  
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
@ -1156,11 +1156,11 @@ class DGPLVM_T(Prior):
    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
-	x = x.reshape(self.x_shape)
+        x = x.reshape(self.x_shape)
-	xprim = x.dot(self.vec)
+        xprim = x.dot(self.vec)
-	x = xprim 
+        x = xprim 
 	# print x       
-	cls = self.compute_cls(x)
+        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@ -35,12 +35,12 @@ class Transformation(object):
        """
        compute the log of the jacobian of f, evaluated at f(x)= model_param
        """
-	raise NotImplementedError
+        raise NotImplementedError
    def log_jacobian_grad(self, model_param):
        """
        compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
        """
-	raise NotImplementedError
+        raise NotImplementedError
    def gradfactor(self, model_param, dL_dmodel_param):
        """ df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
--- a/GPy/inference/latent_function_inference/inferenceX.py
+++ b/GPy/inference/latent_function_inference/inferenceX.py
@ -45,17 +45,23 @@ class InferenceX(Model):
        super(InferenceX, self).__init__(name)
        self.likelihood = model.likelihood.copy()
        self.kern = model.kern.copy()
-        if model.kern.useGPU:
+#         if model.kern.useGPU:
-            from ...models import SSGPLVM
+#             from ...models import SSGPLVM
-            if isinstance(model, SSGPLVM):
+#             if isinstance(model, SSGPLVM):
-                self.kern.GPU_SSRBF(True)
+#                 self.kern.GPU_SSRBF(True)
-            else:
+#             else:
-                self.kern.GPU(True)
+#                 self.kern.GPU(True)
        from copy import deepcopy
        self.posterior = deepcopy(model.posterior)
        if hasattr(model, 'variational_prior'):
            self.uncertain_input = True
-            self.variational_prior = model.variational_prior.copy()
+            from ...models.ss_gplvm import IBPPrior
            from ...models.ss_mrd import IBPPrior_SSMRD
            if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
                from ...core.parameterization.variational import SpikeAndSlabPrior
                self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
            else:
                self.variational_prior = model.variational_prior.copy()
        else:
            self.uncertain_input = False
        if hasattr(model, 'inducing_inputs'):
@ -147,9 +153,9 @@ class InferenceX(Model):
            from ...core.parameterization.variational import SpikeAndSlabPrior
            if isinstance(self.variational_prior, SpikeAndSlabPrior):
                # Update Log-likelihood
-                KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
+                KL_div = self.variational_prior.KL_divergence(self.X)
                # update for the KL divergence
-                self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
+                self.variational_prior.update_gradients_KL(self.X)
            else:
                # Update Log-likelihood
                KL_div = self.variational_prior.KL_divergence(self.X)
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@ -142,7 +142,7 @@ class opt_lbfgsb(Optimizer):
        #a more helpful error message is available in opt_result in the Error case
        if opt_result[2]['warnflag']==2:
-            self.status = 'Error' + opt_result[2]['task']
+            self.status = 'Error' + str(opt_result[2]['task'])
 class opt_simplex(Optimizer):
    def __init__(self, *args, **kwargs):
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -19,5 +19,5 @@ from ._src.splitKern import SplitKern,DEtime
 from ._src.splitKern import DEtime as DiffGenomeKern
-from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
+from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -6,7 +6,7 @@ import numpy as np
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.config import config # for assesing whether to use cython
-import coregionalize_cython
+from . import coregionalize_cython
 class Coregionalize(Kern):
    """
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
        if X2 is None:
            # TODO: make use of index_to_slices
            # FIXME: Broken as X is already sliced out
-            print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
+            print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
            values = np.unique(X[:,self.index_dim])
            slices = [X[:,self.index_dim]==i for i in values]
            [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -20,7 +20,6 @@ class RBF(Stationary):
    _support_GPU = True
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
        self.psicomp = PSICOMP_RBF()
        if self.useGPU:
            self.psicomp = PSICOMP_RBF_GPU()
        else:
@ -36,6 +35,7 @@ class RBF(Stationary):
        dc = super(RBF, self).__getstate__()
        if self.useGPU:
            dc['psicomp'] = PSICOMP_RBF()
            dc['useGPU'] = False
        return dc
    def __setstate__(self, state):
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -13,7 +13,7 @@ from ...util.config import config # for assesing whether to use cython
 from ...util.caching import Cache_this
 try:
-    import stationary_cython
+    from . import stationary_cython
 except ImportError:
    print('warning in sationary: failed to import cython module: falling back to numpy')
    config.set('cython', 'working', 'false')
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -266,8 +266,8 @@ class Likelihood(Parameterized):
            stop
        if self.size:
-            dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
+            dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None], Y_metadata=Y_metadata) # Ntheta x (orig size) x N_{quad_points}
-            dF_dtheta = np.dot(dF_dtheta, gh_w)
+            dF_dtheta = np.dot(dF_dtheta, gh_w)/np.sqrt(np.pi)
            dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
        else:
            dF_dtheta = None # Not yet implemented
--- a/GPy/models/bayesian_gplvm_minibatch.py
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@ -64,9 +64,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
            self.logger.debug("creating inference_method var_dtc")
            inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
        if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
            kernel.psicomp.GPU_direct = True
        super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
                                           name=name, inference_method=inference_method,
                                           normalizer=normalizer,
--- a/GPy/models/dpgplvm.py
+++ b/GPy/models/dpgplvm.py
@ -3,7 +3,7 @@
 import numpy as np
 from .. import kern
-from bayesian_gplvm import BayesianGPLVM
+from .bayesian_gplvm import BayesianGPLVM
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 class DPBayesianGPLVM(BayesianGPLVM):
@ -15,5 +15,5 @@ class DPBayesianGPLVM(BayesianGPLVM):
                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1):
        super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, 					num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
-	self.X.mean.set_prior(X_prior)
+        self.X.mean.set_prior(X_prior)
        self.link_parameter(X_prior)
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@ -16,6 +16,7 @@ class GPRegression(GP):
    :param Y: observed values
    :param kernel: a GPy kernel, defaults to rbf
    :param Norm normalizer: [False]
    :param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
        Normalize Y with the norm given.
        If normalizer is False, no normalization will be done
@ -25,12 +26,12 @@ class GPRegression(GP):
    """
-    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
        if kernel is None:
            kernel = kern.RBF(X.shape[1])
-
+	
-        likelihood = likelihoods.Gaussian()
+        likelihood = likelihoods.Gaussian(variance=noise_var)
        super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
--- a/GPy/models/gradient_checker.py
+++ b/GPy/models/gradient_checker.py
@ -228,14 +228,14 @@ class HessianChecker(GradientChecker):
        if verbose:
            if block_indices:
-                print "\nBlock {}".format(block_indices)
+                print("\nBlock {}".format(block_indices))
            else:
-                print "\nAll blocks"
+                print("\nAll blocks")
            header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
            header_string = map(lambda x: ' | '.join(header), [header])
            separator = '-' * len(header_string[0])
-            print '\n'.join([header_string[0], separator])
+            print('\n'.join([header_string[0], separator]))
            min_r = '%.6f' % float(numpy.min(ratio))
            max_r = '%.6f' % float(numpy.max(ratio))
            max_d = '%.6f' % float(numpy.max(difference))
@ -248,7 +248,7 @@ class HessianChecker(GradientChecker):
                checked = "\033[91m  False \033[0m"
            grad_string = "{} | {}  | {} |    {}    |   {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
-            print grad_string
+            print(grad_string)
            if plot:
                import pylab as pb
@ -348,7 +348,7 @@ class SkewChecker(HessianChecker):
            numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
            numeric_hess = numeric_hess_partial(x)
-            print "Done making numerical hessian"
+            print("Done making numerical hessian")
            if analytic_hess.dtype is np.dtype('object'):
                #Blockify numeric_hess aswell
                blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
@ -365,7 +365,7 @@ class SkewChecker(HessianChecker):
                #Unless super_plot is set, just plot the first one
                p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
                if verbose:
-                    print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
+                    print("Checking derivative of hessian wrt parameter number {}".format(block_ind))
                check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
            current_index += current_size
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -451,7 +451,7 @@ class mocap_data_show(matplotlib_show):
        self.initialize_axes_modify()
        self.draw_vertices()
        self.initialize_axes()
-        self.finalize_axes_modify()
+        #self.finalize_axes_modify()
        self.draw_edges()
        self.axes.figure.canvas.draw()
@ -470,12 +470,20 @@ class mocap_data_show(matplotlib_show):
        self.line_handle[0].remove()
    def finalize_axes(self):
-        self.axes.set_xlim(self.x_lim)
+#         self.axes.set_xlim(self.x_lim)
-        self.axes.set_ylim(self.y_lim)
+#         self.axes.set_ylim(self.y_lim)
-        self.axes.set_zlim(self.z_lim)
+#         self.axes.set_zlim(self.z_lim)
-        self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
+#         self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
-#        self.axes.set_aspect('equal')
+        extents = np.array([getattr(self.axes, 'get_{}lim'.format(dim))() for dim in 'xyz'])
        sz = extents[:,1] - extents[:,0]
        centers = np.mean(extents, axis=1)
        maxsize = max(abs(sz))
        r = maxsize/2
        for ctr, dim in zip(centers, 'xyz'):
            getattr(self.axes, 'set_{}lim'.format(dim))(ctr - r, ctr + r)
 #         self.axes.set_aspect('equal')
 #         self.axes.autoscale(enable=False)
    def finalize_axes_modify(self):
--- a/GPy/testing/link_function_tests.py
+++ b/GPy/testing/link_function_tests.py
@ -49,7 +49,7 @@ class LinkFunctionTests(np.testing.TestCase):
        self.assertTrue(grad3.checkgrad(verbose=True))
        if test_lim:
-            print "Testing limits"
+            print("Testing limits")
            #Remove some otherwise we are too close to the limit for gradcheck to work effectively
            lim_of_inf = lim_of_inf - 1e-4
            grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@ -100,10 +100,10 @@ def block_dot(A, B, diagonal=False):
        Dshape = D.shape
        if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
                or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
-            print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
+            print("Broadcasting, C: {} D:{}".format(C.shape, D.shape))
            return C*D
        else:
-            print "Dotting, C: {} C:{}".format(C.shape, D.shape)
+            print("Dotting, C: {} C:{}".format(C.shape, D.shape))
            return np.dot(C,D)
    dot = np.vectorize(f, otypes = [np.object])
    return dot(A,B)
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@ -5,7 +5,7 @@ import numpy as np
 from . import linalg
 from .config import config
-import choleskies_cython
+from . import choleskies_cython
 def safe_root(N):
    i = np.sqrt(N)
@ -59,12 +59,12 @@ def _backprop_gradient_pure(dL, L):
    """
    dL_dK = np.tril(dL).copy()
    N = L.shape[0]
-    for k in xrange(N - 1, -1, -1):
+    for k in range(N - 1, -1, -1):
-        for j in xrange(k + 1, N):
+        for j in range(k + 1, N):
-            for i in xrange(j, N):
+            for i in range(j, N):
                dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
                dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
-        for j in xrange(k + 1, N):
+        for j in range(k + 1, N):
            dL_dK[j, k] /= L[k, k]
            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
        dL_dK[k, k] /= (2 * L[k, k])
@ -100,7 +100,7 @@ def indexes_to_fix_for_low_rank(rank, size):
 if config.getboolean('cython', 'working'):
    triang_to_flat = _triang_to_flat_cython
    flat_to_triang = _flat_to_triang_cython
-    backprop_gradient = choleskies_cython.backprop_gradient
+    backprop_gradient = choleskies_cython.backprop_gradient_par_c
 else:
    backprop_gradient = _backprop_gradient_pure
    triang_to_flat =  _triang_to_flat_pure
--- a/GPy/util/choleskies_cython.c
+++ b/GPy/util/choleskies_cython.c
--- a/GPy/util/choleskies_cython.pyx
+++ b/GPy/util/choleskies_cython.pyx
@ -5,6 +5,7 @@
 # Copyright James Hensman and Alan Saul 2015
 import numpy as np
 from cython.parallel import prange, parallel
 cimport numpy as np
 def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
@ -57,3 +58,50 @@ def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2]
            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
        dL_dK[k, k] /= (2. * L[k, k])
    return dL_dK
 def backprop_gradient_par(double[:,:] dL, double[:,:] L):
    cdef double[:,:] dL_dK = np.tril(dL).copy()
    cdef int N = L.shape[0]
    cdef int k, j, i
    for k in range(N - 1, -1, -1):
        with nogil, parallel():
            for i in prange(k + 1, N):
                for j in range(k+1, i+1):
                    dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
                for j in range(i, N):
                    dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
        for j in range(k + 1, N):
            dL_dK[j, k] /= L[k, k]
            dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
        dL_dK[k, k] /= (2. * L[k, k])
    return dL_dK
 #here's a pure C version...
 cdef extern from "cholesky_backprop.h" nogil:
    void chol_backprop(int N, double* dL, double* L)
 def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
    cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
    cdef int N = L.shape[0]
    with nogil:
        chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
    return dL_dK
 cdef extern from "cholesky_backprop.h" nogil:
    void old_chol_backprop(int N, double* dL, double* L)
 def backprop_gradient_par_c_old(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
    cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
    cdef int N = L.shape[0]
    with nogil:
        old_chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
    return dL_dK
--- a/GPy/util/cholesky_backprop.c
+++ b/GPy/util/cholesky_backprop.c
@ -0,0 +1,51 @@
 #include <cblas.h>
 void chol_backprop(int N, double* dL, double* L){
    //at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
    int i,k;
    dL[N*N - 1] /= (2. * L[N*N - 1]);
    for(k=N-2;k>(-1);k--){
        cblas_dsymv(CblasRowMajor, CblasLower,
 	            N-k-1, -1,
 		    &dL[(N*(k+1) + k+1)],N,
 		    &L[k*N+k+1],1,
 		    1, &dL[N*(k+1)+k], N);
        for(i=0;i<(N-k-1); i++){
 		dL[N*(k+1+i)+k] -= dL[N*(k+1)+k+i*(N+1)+1] * L[k*N+k+1+i];
 	}
 	cblas_dscal(N-k-1, 1.0/L[k*N+k], &dL[(k+1)*N+k], N);
        dL[k*N + k] -= cblas_ddot(N-k-1, &dL[(k+1)*N+k], N, &L[k*N+k+1], 1);
        dL[k*N + k] /= (2.0 * L[k*N + k]);
    }
 }
 double mydot(int n, double* a,  int stride_a, double* b, int stride_b){
    double ret = 0;	
    for(int i=0; i<n; i++){
 	ret += a[i*stride_a]*b[i*stride_b];
    }
    return ret;
 }
 void old_chol_backprop(int N, double* dL, double* U){
    //at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
    int iN, kN,i,j,k;
    dL[N*N-1] /= (2. * U[N*N-1]);
    for(k=N-2;k>(-1);k--){
 	kN = k*N;
        #pragma omp parallel for private(i,iN)
        for(i=k+1; i<N; i++){
 	    iN = i*N;
 	    dL[iN+k] -= mydot(i-k, &dL[iN+k+1], 1, &U[kN+k+1], 1);
 	    dL[iN+k] -= mydot(N-i, &dL[iN+i], N, &U[kN+i],  1);
 	}
        for(i=(k + 1); i<N; i++){
 	    iN = i*N;
            dL[iN + k] /= U[kN + k];
            dL[kN + k] -= U[kN + i] * dL[iN + k];
 	}
        dL[kN + k] /= (2. * U[kN + k]);
    }
 }
--- a/GPy/util/cholesky_backprop.h
+++ b/GPy/util/cholesky_backprop.h
@ -0,0 +1,5 @@
 #include <cblas.h>
 void dsymv(int N, double*A, double*b, double*y);
 double mydot(int n, double* a,  int stride_a, double* b, int stride_b);
 void chol_backprop(int N, double* dL, double* L);
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,7 +15,7 @@ import warnings
 import os
 from .config import config
 import logging
-import linalg_cython
+from . import linalg_cython
 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
--- a/README.md
+++ b/README.md
@ -13,7 +13,6 @@ Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GP
 ### Python 3 Compatibility
 Work is underway to make GPy run on Python 3.
 * Python 2.x compatibility is currently broken in this fork
 * All tests in the testsuite now run on Python3. 
 To see this for yourself, in Ubuntu 14.04, you can do
@ -21,12 +20,17 @@ To see this for yourself, in Ubuntu 14.04, you can do
    git clone https://github.com/mikecroucher/GPy.git
    cd GPy
    git checkout devel
    python3 setup.py build_ext --inplace
    nosetests3 GPy/testing
 nosetests3 is Ubuntu's way of reffering to the Python 3 version of nosetests. You install it with 
    sudo apt-get install python3-nose
 The command `python3 setup.py build_ext --inplace` builds the Cython extensions. IF it doesn't work, you may need to install this:
    sudo apt-get install python3-dev
 * Test coverage is less than 100% so it is expected that there is still more work to be done. We need more tests and examples to try out.
 * All weave functions not covered by the test suite are *simply commented out*. Can add equivalents later as test functions become available
 * A set of benchmarks would be useful! 
--- a/setup.py
+++ b/setup.py
@ -20,9 +20,10 @@ ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
                      extra_compile_args=compile_flags,
                      extra_link_args = ['-lgomp']),
            Extension(name='GPy.util.choleskies_cython',
-                      sources=['GPy/util/choleskies_cython.c'],
+                      sources=['GPy/util/choleskies_cython.c', 'GPy/util/cholesky_backprop.c'],
                      include_dirs=[np.get_include()],
-                      extra_compile_args=compile_flags),
+                      extra_link_args = ['-lgomp', '-lblas'],
                      extra_compile_args=compile_flags+['-std=c99']),
            Extension(name='GPy.util.linalg_cython',
                      sources=['GPy/util/linalg_cython.c'],
                      include_dirs=[np.get_include()],
`@ -19,5 +19,5 @@ from ._src.splitKern import SplitKern,DEtime`
	`from ._src.splitKern import DEtime as DiffGenomeKern`	`from ._src.splitKern import DEtime as DiffGenomeKern`


	`from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel`	`from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel`