Merge pull request #442 from SheffieldML/devel

New Major for GPy
2026-07-11 16:22:13 +02:00 · 2016-09-06 13:07:02 +01:00 · 2016-09-06 13:07:02 +01:00 · abf3c377bf
commit abf3c377bf
parent 2bb8161937 69f0ea1b11
35 changed files with 11791 additions and 79 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/GPy/version.py
+++ b/GPy/version.py
@ -1 +1 @@
-__version__ = "1.4.0"
+__version__ = "1.5.3"
--- a/GPy/core/init.py
+++ b/GPy/core/init.py
@ -8,6 +8,7 @@ from . import parameterization
 from .gp import GP
 from .svgp import SVGP
 from .sparse_gp import SparseGP
+from .gp_grid import GpGrid
 from .mapping import *


--- a/GPy/core/gp_grid.py
+++ b/GPy/core/gp_grid.py
@ -0,0 +1,116 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+#This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+import numpy as np
+import scipy.linalg as sp
+from .gp import GP
+from .parameterization.param import Param
+from ..inference.latent_function_inference import gaussian_grid_inference
+from .. import likelihoods
+
+import logging
+from GPy.inference.latent_function_inference.posterior import Posterior
+logger = logging.getLogger("gp grid")
+
+class GpGrid(GP):
+    """
+    A GP model for Grid inputs
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+
+    """
+
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
+                 name='gp grid', Y_metadata=None, normalizer=False):
+        #pick a sensible inference method
+
+        inference_method = gaussian_grid_inference.GaussianGridInference()
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        self.posterior = None
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+        """
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
+        self.kern.update_gradients_direct(self.grad_dict['dL_dVar'], self.grad_dict['dL_dLen'])
+
+    def kron_mmprod(self, A, B):
+        count = 0
+        D = len(A)
+        for b in (B.T):
+            x = b
+            N = 1
+            G = np.zeros(D)
+            for d in range(D):
+                G[d] = len(A[d])
+            N = np.prod(G)
+            for d in range(D-1, -1, -1):
+                X = np.reshape(x, (G[d], np.round(N/G[d])), order='F')
+                Z = np.dot(A[d], X)
+                Z = Z.T
+                x = np.reshape(Z, (-1, 1), order='F')
+            if (count == 0):
+                result = x
+            else:
+                result = np.column_stack((result, x))
+            count+=1
+        return result
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        """
+        if kern is None:
+            kern = self.kern
+
+        # compute mean predictions
+        Kmn = kern.K(Xnew, self.X)
+        alpha_kron = self.posterior.alpha
+        mu = np.dot(Kmn, alpha_kron)
+        mu = mu.reshape(-1,1)
+
+        # compute variance of predictions
+        Knm = Kmn.T        
+        noise = self.likelihood.variance
+        V_kron = self.posterior.V_kron
+        Qs = self.posterior.Qs
+        QTs = self.posterior.QTs
+        A = self.kron_mmprod(QTs, Knm)
+        V_kron = V_kron.reshape(-1, 1)
+        A = A / (V_kron + noise)
+        A = self.kron_mmprod(Qs, A)
+
+        Kmm = kern.K(Xnew)
+        var = np.diag(Kmm - np.dot(Kmn, A)).copy()
+        #var = np.zeros((Xnew.shape[0]))
+        var = var.reshape(-1, 1)
+
+        return mu, var
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -1309,3 +1309,52 @@ class Exponential(Prior):

    def rvs(self, n):
        return np.random.exponential(scale=self.l, size=n)
+
+class StudentT(Prior):
+    """
+    Implementation of the student t probability function, coupled with random variables.
+
+    :param mu: mean
+    :param sigma: standard deviation
+    :param nu: degrees of freedom
+
+    .. Note:: Bishop 2006 notation is used throughout the code
+
+    """
+    domain = _REAL
+    _instances = []
+
+    def __new__(cls, mu=0, sigma=1, nu=4):  # Singleton:
+        if cls._instances:
+            cls._instances[:] = [instance for instance in cls._instances if instance()]
+            for instance in cls._instances:
+                if instance().mu == mu and instance().sigma == sigma and instance().nu == nu:
+                    return instance()
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)
+        else:
+            o = newfunc(cls, mu, sigma, nu)
+        cls._instances.append(weakref.ref(o))
+        return cls._instances[-1]()
+
+    def __init__(self, mu, sigma, nu):
+        self.mu = float(mu)
+        self.sigma = float(sigma)
+        self.sigma2 = np.square(self.sigma)
+        self.nu = float(nu)
+
+    def __str__(self):
+        return "St({:.2g}, {:.2g}, {:.2g})".format(self.mu, self.sigma, self.nu)
+
+    def lnpdf(self, x):
+        from scipy.stats import t
+        return t.logpdf(x,self.nu,self.mu,self.sigma)
+
+    def lnpdf_grad(self, x):
+        return -(self.nu + 1.)*(x - self.mu)/( self.nu*self.sigma2 + np.square(x - self.mu) )
+
+    def rvs(self, n):
+        from scipy.stats import t
+        ret = t.rvs(self.nu, loc=self.mu, scale=self.sigma, size=n)
+        return ret    
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -69,6 +69,8 @@ from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
 from .var_gauss import VarGauss
+from .gaussian_grid_inference import GaussianGridInference
+

 # class FullLatentFunctionData(object):
 #
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@ -21,7 +21,7 @@ class ExactGaussianInference(LatentFunctionInference):
    def __init__(self):
        pass#self._YYTfactor_cache = caching.cache()

-    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None, Z_tilde=None):
+    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        """
@ -31,8 +31,8 @@ class ExactGaussianInference(LatentFunctionInference):
        else:
            m = mean_function.f(X)

-        if precision is None:
-            precision = likelihood.gaussian_variance(Y_metadata)
+        if variance is None:
+            variance = likelihood.gaussian_variance(Y_metadata)

        YYT_factor = Y-m

@ -40,7 +40,7 @@ class ExactGaussianInference(LatentFunctionInference):
            K = kern.K(X)

        Ky = K.copy()
-        diag.add(Ky, precision+1e-8)
+        diag.add(Ky, variance+1e-8)

        Wi, LW, LWi, W_logdet = pdinv(Ky)

--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -66,7 +66,7 @@ class EP(EPBase, ExactGaussianInference):
            #if we've already run EP, just use the existing approximation stored in self._ep_approximation
            mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation

-        return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
+        return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, variance=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())

    def expectation_propagation(self, K, Y, likelihood, Y_metadata):

--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@ -0,0 +1,114 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from .grid_posterior import GridPosterior
+import numpy as np
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+class GaussianGridInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian and inputs are on a grid.
+
+    The function self.inference returns a GridPosterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass
+
+    def kron_mvprod(self, A, b):
+        x = b
+        N = 1
+        D = len(A)
+        G = np.zeros((D,1))
+        for d in range(0, D):
+            G[d] = len(A[d])
+        N = np.prod(G)
+        for d in range(D-1, -1, -1):
+            X = np.reshape(x, (G[d], np.round(N/G[d])), order='F')
+            Z = np.dot(A[d], X)
+            Z = Z.T
+            x = np.reshape(Z, (-1, 1), order='F')
+        return x
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a GridPosterior class containing essential quantities of the posterior
+        """
+        N = X.shape[0] #number of training points
+        D = X.shape[1] #number of dimensions
+
+        Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension
+        Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension
+        QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension
+        V_kron = 1 # kronecker product of eigenvalues
+
+        # retrieve the one-dimensional variation of the designated kernel
+        oneDkernel = kern.get_one_dimensional_kernel(D)
+
+        for d in range(D):
+            xg = list(set(X[:,d])) #extract unique values for a dimension
+            xg = np.reshape(xg, (len(xg), 1))
+            oneDkernel.lengthscale = kern.lengthscale[d]
+            Kds[d] = oneDkernel.K(xg)
+            [V, Q] = np.linalg.eig(Kds[d])
+            V_kron = np.kron(V_kron, V)
+            Qs[d] = Q
+            QTs[d] = Q.T
+
+        noise = likelihood.variance + 1e-8
+
+        alpha_kron = self.kron_mvprod(QTs, Y)
+        V_kron = V_kron.reshape(-1, 1)
+        alpha_kron = alpha_kron / (V_kron + noise)
+        alpha_kron = self.kron_mvprod(Qs, alpha_kron)
+
+        log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi)
+
+        # compute derivatives wrt parameters Thete
+        derivs = np.zeros(D+2, dtype='object')
+        for t in range(len(derivs)):
+            dKd_dTheta = np.zeros(D, dtype='object')
+            gamma = np.zeros(D, dtype='object')
+            gam = 1
+            for d in range(D):
+                xg = list(set(X[:,d]))
+                xg = np.reshape(xg, (len(xg), 1))
+                oneDkernel.lengthscale = kern.lengthscale[d]
+                if t < D:
+                    dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale
+                elif (t == D):
+                    dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance
+                else:
+                    dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise
+                gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d]))
+                gam = np.kron(gam, gamma[d])
+            
+            gam = gam.reshape(-1,1)
+            kappa = self.kron_mvprod(dKd_dTheta, alpha_kron)
+            derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise))
+
+        # separate derivatives
+        dL_dLen = derivs[:D]
+        dL_dVar = derivs[D]
+        dL_dThetaL = derivs[D+1]
+
+        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), \
+                log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
--- a/GPy/inference/latent_function_inference/grid_posterior.py
+++ b/GPy/inference/latent_function_inference/grid_posterior.py
@ -0,0 +1,62 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class GridPosterior(object):
+    """
+    Specially intended for the Grid Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None):
+        """
+        alpha_kron : 
+        QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices
+        Qs : eigen vectors resulting from decomposition of single dimension covariance matrices
+        V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices
+        """
+
+        if ((alpha_kron is not None) and (QTs is not None) 
+            and (Qs is not None) and (V_kron is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information for predictions")
+
+        self._alpha_kron = alpha_kron
+        self._qTs = QTs
+        self._qs = Qs
+        self._v_kron = V_kron
+
+    @property
+    def alpha(self):
+        """
+        """
+        return self._alpha_kron
+
+    @property
+    def QTs(self):
+        """
+        array of transposed eigenvectors resulting for single dimension covariance
+        """
+        return self._qTs
+
+    @property
+    def Qs(self):
+        """
+        array of eigenvectors resulting for single dimension covariance
+        """
+        return self._qs
+
+    @property
+    def V_kron(self):
+        """
+        kronecker product of eigenvalues s
+        """
+        return self._v_kron
+    
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -33,6 +33,7 @@ from .src.splitKern import SplitKern,DEtime
 from .src.splitKern import DEtime as DiffGenomeKern
 from .src.spline import Spline
 from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
+from .src.grid_kerns import GridRBF

 from .src.sde_matern import sde_Matern32
 from .src.sde_matern import sde_Matern52
--- a/GPy/kern/src/basis_funcs.py
+++ b/GPy/kern/src/basis_funcs.py
@ -15,6 +15,7 @@ class BasisFuncKernel(Kern):
        This class does NOT automatically add an offset to the design matrix phi!
        """
        super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
+        assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
        self.ARD = ARD
        if self.ARD:
            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
@ -60,6 +61,11 @@ class BasisFuncKernel(Kern):
            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta

    def concatenate_offset(self, X):
+        """
+        Convenience function to add an offset column to phi.
+        You can use this function to add an offset (bias on y axis)
+        to phi in your custom self._phi(X).
+        """
        return np.c_[np.ones((X.shape[0], 1)), X]

    def posterior_inf(self, X=None, posterior=None):
@ -120,6 +126,12 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
        return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.

 class ChangePointBasisFuncKernel(BasisFuncKernel):
+    """
+    The basis function has a changepoint. That is, it is constant, jumps at a
+    single point (given as changepoint) and is constant again. You can
+    give multiple changepoints. The changepoints are calculated using
+    np.where(self.X < self.changepoint), -1, 1)
+    """
    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
        self.changepoint = np.array(changepoint)
        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@ -129,6 +141,11 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
        return np.where((X < self.changepoint), -1, 1)

 class DomainKernel(LinearSlopeBasisFuncKernel):
+    """
+    Create a constant plateou of correlation between start and stop and zero
+    elsewhere. This is a constant shift of the outputs along the yaxis
+    in the range from start to stop.
+    """
    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
        super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)

@ -138,19 +155,25 @@ class DomainKernel(LinearSlopeBasisFuncKernel):
        return phi#((phi-self.start)/(self.stop-self.start))-.5

 class LogisticBasisFuncKernel(BasisFuncKernel):
+    """
+    Create a series of logistic basis functions with centers given. The
+    slope gets computed by datafit. The number of centers determines the
+    number of logistic functions.
+    """
    def __init__(self, input_dim, centers, variance=1., slope=1., active_dims=None, ARD=False, ARD_slope=True, name='logistic'):
        self.centers = np.atleast_2d(centers)
+        if ARD:
+            assert ARD_slope, "If we have one variance per center, we want also one slope per center."
        self.ARD_slope = ARD_slope
        if self.ARD_slope:
-            self.slope = Param('slope', slope * np.ones(self.centers.size), Logexp())
+            self.slope = Param('slope', slope * np.ones(self.centers.size))
        else:
-            self.slope = Param('slope', slope, Logexp())
+            self.slope = Param('slope', slope)
        super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
        self.link_parameter(self.slope)

    @Cache_this(limit=3, ignore_args=())
    def _phi(self, X):
-        import scipy as sp
        phi = 1/(1+np.exp(-((X-self.centers)*self.slope)))
        return np.where(np.isnan(phi), 0, phi)#((phi-self.start)/(self.stop-self.start))-.5

@ -167,7 +190,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
            if self.ARD_slope:
                self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
            else:
-                self.slope.gradient = self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()
+                self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum())
        else:
            phi1 = self.phi(X)
            phi2 = self.phi(X2)
@ -179,5 +202,5 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
            if self.ARD_slope:
                self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
            else:
-                self.slope.gradient = self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum()
+                self.slope.gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum())
        self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)
--- a/GPy/kern/src/grid_kerns.py
+++ b/GPy/kern/src/grid_kerns.py
@ -0,0 +1,76 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+from .stationary import Stationary
+from paramz.caching import Cache_this
+
+
+class GridKern(Stationary):
+
+	def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=False):
+		super(GridKern, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
+		self.originalDimensions = originalDimensions
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dVar(self, X, X2=None):
+		"""
+		Derivative of Kernel function wrt variance applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dVar(X, X2) = dKdVar_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdVar_of_r(r)
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dLen(self, X, dimension, lengthscale, X2=None):
+		"""
+		Derivate of Kernel function wrt lengthscale applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dLen(X, X2) = dKdLen_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdLen_of_r(r, dimension, lengthscale)
+
+class GridRBF(GridKern):
+	"""
+	Similar to regular RBF but supplemented with methods required for Gaussian grid regression
+	Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
+
+	.. math::
+
+	   k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
+
+	"""
+	_support_GPU = True
+	def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='gridRBF', originalDimensions=1, useGPU=False):
+		super(GridRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=useGPU)
+
+	def K_of_r(self, r):
+		return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 *  r**2)
+
+	def dKdVar_of_r(self, r):
+		"""
+		Compute derivative of kernel wrt variance
+		"""
+		return np.exp(-0.5 * r**2)
+
+	def dKdLen_of_r(self, r, dimCheck, lengthscale):
+		"""
+		Compute derivative of kernel for dimension wrt lengthscale
+		Computation of derivative changes when lengthscale corresponds to
+		the dimension of the kernel whose derivate is being computed. 
+		"""
+		if (dimCheck == True):
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) * (r**2) / (lengthscale**(float(1)/self.originalDimensions))
+		else:
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) / (lengthscale**(float(1)/self.originalDimensions))
+
+	def dK_dr(self, r):
+		return -r*self.K_of_r(r)
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@ -49,7 +49,7 @@ class Kern(Parameterized):
        if active_dims is None:
            active_dims = np.arange(input_dim)

-        self.active_dims = np.asarray(active_dims, np.int_)
+        self.active_dims = np.atleast_1d(np.asarray(active_dims, np.int_))

        self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)

--- a/GPy/kern/src/kernel_slice_operations.py
+++ b/GPy/kern/src/kernel_slice_operations.py
@ -20,6 +20,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
    def __new__(cls, name, bases, dct):
        put_clean(dct, 'K', _slice_K)
        put_clean(dct, 'Kdiag', _slice_Kdiag)
+        put_clean(dct, 'phi', _slice_Kdiag)
        put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
        put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
        put_clean(dct, 'gradients_X', _slice_gradients_X)
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@ -7,6 +7,7 @@ from .stationary import Stationary
 from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
 from ...core import Param
 from paramz.transformations import Logexp
+from .grid_kerns import GridRBF

 class RBF(Stationary):
    """
@ -60,6 +61,14 @@ class RBF(Stationary):
        if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
        super(RBF,self).parameters_changed()

+
+    def get_one_dimensional_kernel(self, dim):
+        """
+        Specially intended for Grid regression.
+        """
+        oneDkernel = GridRBF(input_dim=1, variance=self.variance.copy(), originalDimensions=dim)
+        return oneDkernel
+
    #---------------------------------------#
    #             PSI statistics            #
    #---------------------------------------#
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -52,8 +52,8 @@ class Stationary(Kern):
    The lengthscale(s) and variance parameters are added to the structure automatically.

    Thanks to @strongh:
-    In Stationary, a covariance function is defined in GPy as stationary when it depends only on the l2-norm |x_1 - x_2 |. 
-    However this is the typical definition of isotropy, while stationarity is usually a bit more relaxed. 
+    In Stationary, a covariance function is defined in GPy as stationary when it depends only on the l2-norm |x_1 - x_2 |.
+    However this is the typical definition of isotropy, while stationarity is usually a bit more relaxed.
    The more common version of stationarity is that the covariance is a function of x_1 - x_2 (See e.g. R&W first paragraph of section 4.1).
    """

@ -198,6 +198,16 @@ class Stationary(Kern):
            self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale


+    def update_gradients_direct(self, dL_dVar, dL_dLen):
+        """
+        Specially intended for the Grid regression case.
+        Given the computed log likelihood derivates, update the corresponding
+        kernel and likelihood gradients.
+        Useful for when gradients have been computed a priori.
+        """
+        self.variance.gradient = dL_dVar
+        self.lengthscale.gradient = dL_dLen
+
    def _inv_dist(self, X, X2=None):
        """
        Compute the elementwise inverse of the distance matrix, expecpt on the
@ -319,6 +329,15 @@ class Stationary(Kern):
    def input_sensitivity(self, summarize=True):
        return self.variance*np.ones(self.input_dim)/self.lengthscale**2

+    def get_one_dimensional_kernel(self, dimensions):
+        """
+        Specially intended for the grid regression case
+        For a given covariance kernel, this method returns the corresponding kernel for
+        a single dimension. The resulting values can then be used in the algorithm for
+        reconstructing the full covariance matrix.
+        """
+        raise NotImplementedError("implement one dimensional variation of kernel")
+



--- a/GPy/likelihoods/binomial.py
+++ b/GPy/likelihoods/binomial.py
@ -63,14 +63,17 @@ class Binomial(Likelihood):
        :rtype: float
        """
        N = Y_metadata['trials']
+        assert N.shape == y.shape
        nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
-
        return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)

    def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
        """
        Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.

+        .. math::
+            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{y_{i}}{\\lambda(f)} - \\frac{(N-y_{i})}{(1-\\lambda(f))}
+
        :param inv_link_f: latent variables inverse link of f.
        :type inv_link_f: Nx1 array
        :param y: data
@ -80,7 +83,8 @@ class Binomial(Likelihood):
        :rtype: Nx1 array
        """
        N = Y_metadata['trials']
-        return y/inv_link_f - (N-y)/(1-inv_link_f)
+        assert N.shape == y.shape
+        return y/inv_link_f - (N-y)/(1.-inv_link_f)

    def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
        """
@ -89,7 +93,7 @@ class Binomial(Likelihood):


        .. math::
-            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
+            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(N-y_{i})}{(1-\\lambda(f))^{2}}

        :param inv_link_f: latent variables inverse link of f.
        :type inv_link_f: Nx1 array
@ -104,7 +108,32 @@ class Binomial(Likelihood):
            (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
        """
        N = Y_metadata['trials']
-        return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
+        assert N.shape == y.shape
+        return -y/np.square(inv_link_f) - (N-y)/np.square(1.-inv_link_f)
+
+    def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
+        """
+        Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
+
+        .. math::
+            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(N-y_{i})}{(1-\\lambda(f))^{3}}
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata not used in binomial
+        :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
+        :rtype: Nx1 array
+
+        .. Note::
+            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
+            (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
+        """
+        N = Y_metadata['trials']
+        assert N.shape == y.shape
+        inv_link_f2 = np.square(inv_link_f)
+        return 2*y/inv_link_f**3 - 2*(N-y)/(1.-inv_link_f)**3

    def samples(self, gp, Y_metadata=None, **kw):
        """
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -22,9 +22,7 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
 from .one_vs_all_classification import OneVsAllClassification
 from .one_vs_all_sparse_classification import OneVsAllSparseClassification
 from .dpgplvm import DPBayesianGPLVM
-
 from .state_space_model import StateSpace
-
 from .ibp_lfm import IBPLFM
-
 from .gp_offset_regression import GPOffsetRegression
+from .gp_grid_regression import GPRegressionGrid
--- a/GPy/models/gp_grid_regression.py
+++ b/GPy/models/gp_grid_regression.py
@ -0,0 +1,36 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+from ..core import GpGrid
+from .. import likelihoods
+from .. import kern
+
+class GPRegressionGrid(GpGrid):
+    """
+    Gaussian Process model for grid inputs using Kronecker products
+
+    This is a thin wrapper around the models.GpGrid class, with a set of sensible defaults
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to the kron variation of SqExp
+    :param Norm normalizer: [False]
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
+
+        if kernel is None:
+            kernel = kern.RBF(1)   # no other kernels implemented so far
+
+        likelihood = likelihoods.Gaussian()
+        super(GPRegressionGrid, self).__init__(X, Y, kernel, likelihood, name='GP Grid regression', Y_metadata=Y_metadata, normalizer=normalizer)
+
--- a/GPy/models/gp_kronecker_gaussian_regression.py
+++ b/GPy/models/gp_kronecker_gaussian_regression.py
@ -30,6 +30,7 @@ class GPKroneckerGaussianRegression(Model):
    """
    def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
        Model.__init__(self, name=name)
+
        # accept the construction arguments
        self.X1 = ObsAr(X1)
        self.X2 = ObsAr(X2)
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -195,11 +195,11 @@ class MRD(BayesianGPLVMMiniBatch):
            fracs = [fracs]*len(Ylist)
        elif init in "PCA_single":
            X = np.zeros((Ylist[0].shape[0], self.input_dim))
-            fracs = []
+            fracs = np.empty((len(Ylist), self.input_dim))
            for qs, Y in zip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
-                x,frcs = initialize_latent('PCA', len(qs), Y)
+                x, frcs = initialize_latent('PCA', len(qs), Y)
                X[:, qs] = x
-                fracs.append(frcs)
+                fracs[:, qs] = frcs
        else: # init == 'random':
            X = np.random.randn(Ylist[0].shape[0], self.input_dim)
            fracs = X.var(0)
@ -208,9 +208,7 @@ class MRD(BayesianGPLVMMiniBatch):
        X /= X.std()
        return X, fracs

-    def _init_Z(self, init="permute", X=None):
-        if X is None:
-            X = self.X
+    def _init_Z(self, init, X):
        if init in "permute":
            Z = np.random.permutation(X.copy())[:self.num_inducing]
        elif init in "random":
--- a/GPy/testing/baseline/sparse_gp_class_likelihood.npz
+++ b/GPy/testing/baseline/sparse_gp_class_likelihood.npz
--- a/GPy/testing/baseline/sparse_gp_class_raw.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw.npz
--- a/GPy/testing/baseline/sparse_gp_class_raw_link.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw_link.npz
--- a/GPy/testing/baseline/sparse_gp_data_error.npz
+++ b/GPy/testing/baseline/sparse_gp_data_error.npz
--- a/GPy/testing/grid_tests.py
+++ b/GPy/testing/grid_tests.py
@ -0,0 +1,51 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import unittest
+import numpy as np
+import GPy
+
+class GridModelTest(unittest.TestCase):
+    def setUp(self):
+        ######################################
+        # # 3 dimensional example
+
+        # sample inputs and outputs
+        self.X = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1],[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
+        self.Y = np.random.randn(8, 1) * 100
+        self.dim = self.X.shape[1]
+
+    def test_alpha_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(m.posterior.alpha, m2.posterior.woodbury_vector)
+
+    def test_gradient_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(kernel.variance.gradient, kernel2.variance.gradient)
+        np.testing.assert_almost_equal(kernel.lengthscale.gradient, kernel2.lengthscale.gradient)
+        np.testing.assert_almost_equal(m.likelihood.variance.gradient, m2.likelihood.variance.gradient)
+
+
+    def test_prediction_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        test = np.array([[0,0,2],[-1,3,-4]])
+
+        np.testing.assert_almost_equal(m.predict(test), m2.predict(test))
+
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -477,6 +477,34 @@ class KernelGradientTestsContinuous(unittest.TestCase):
        k.randomize()
        self.assertTrue(check_kernel_gradient_functions(k, X=X, X2=X2, verbose=verbose, fixed_X_dims=[0]))

+    def test_basis_func_linear_slope(self):
+        start_stop = np.random.uniform(self.X.min(0), self.X.max(0), (4, self.X.shape[1])).T
+        start_stop.sort(axis=1)
+        ks = []
+        for i in range(start_stop.shape[0]):
+            start, stop = np.split(start_stop[i], 2)
+            ks.append(GPy.kern.LinearSlopeBasisFuncKernel(1, start, stop, ARD=i%2==0, active_dims=[i]))
+        k = GPy.kern.Add(ks)
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
+    def test_basis_func_changepoint(self):
+        points = np.random.uniform(self.X.min(0), self.X.max(0), (self.X.shape[1]))
+        ks = []
+        for i in range(points.shape[0]):
+            ks.append(GPy.kern.ChangePointBasisFuncKernel(1, points[i], ARD=i%2==0, active_dims=[i]))
+        k = GPy.kern.Add(ks)
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
+    def test_basis_func_domain(self):
+        start_stop = np.random.uniform(self.X.min(0), self.X.max(0), (4, self.X.shape[1])).T
+        start_stop.sort(axis=1)
+        ks = []
+        for i in range(start_stop.shape[0]):
+            start, stop = np.split(start_stop[i], 2)
+            ks.append(GPy.kern.DomainKernel(1, start, stop, ARD=i%2==0, active_dims=[i]))
+        k = GPy.kern.Add(ks)
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
 class KernelTestsMiscellaneous(unittest.TestCase):
    def setUp(self):
        N, D = 100, 10
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -117,7 +117,10 @@ class TestNoiseModels(object):
        self.positive_Y = np.exp(self.Y.copy())
        tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None]
        self.integer_Y = np.where(tmp > 0, tmp, 0)
-
+        self.ns = np.random.poisson(50, size=self.N)[:, None]
+        p = np.abs(np.cos(2*np.pi*self.X + np.random.normal(scale=.2, size=(self.N, self.D)))).mean(1)
+        self.binomial_Y = np.array([np.random.binomial(self.ns[i], p[i]) for i in range(p.shape[0])])[:, None]
+        
        self.var = 0.2
        self.deg_free = 4.0

@ -204,15 +207,6 @@ class TestNoiseModels(object):
                },
                "laplace": True
            },
-            #"Student_t_log": {
-            #"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
-            #"grad_params": {
-            #"names": [".*t_noise"],
-            #"vals": [self.var],
-            #"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
-            #},
-            #"laplace": True
-            #},
            "Gaussian_default": {
                "model": GPy.likelihoods.Gaussian(variance=self.var),
                "grad_params": {
@ -273,6 +267,13 @@ class TestNoiseModels(object):
                "laplace": True,
                "ep": False #Should work though...
            },
+            "Binomial_default": {
+                "model": GPy.likelihoods.Binomial(),
+                "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
+                "Y": self.binomial_Y,
+                "Y_metadata": {'trials': self.ns},
+                "laplace": True,
+            },
            #,
            #GAMMA needs some work!"Gamma_default": {
            #"model": GPy.likelihoods.Gamma(),
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -91,23 +91,23 @@ class MiscTests(unittest.TestCase):
        k = GPy.kern.RBF(1)
        m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
        m2[:] = m[:]
-        
+
        mu1, var1 = m.predict(m.X, full_cov=True)
        mu2, var2 = m2.predict(m2.X, full_cov=True)
        np.testing.assert_allclose(mu1, (mu2*std)+mu)
        np.testing.assert_allclose(var1, var2*std**2)
-        
+
        mu1, var1 = m.predict(m.X, full_cov=False)
        mu2, var2 = m2.predict(m2.X, full_cov=False)
-        
+
        np.testing.assert_allclose(mu1, (mu2*std)+mu)
        np.testing.assert_allclose(var1, var2*std**2)

        q50n = m.predict_quantiles(m.X, (50,))
        q50 = m2.predict_quantiles(m2.X, (50,))
-        
+
        np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
-        
+
        # Test variance component:
        qs = np.array([2.5, 97.5])
        # The quantiles get computed before unormalization
@ -181,8 +181,8 @@ class MiscTests(unittest.TestCase):
        Y_mu_true = 2*X_pred_mu
        Y_var_true = 4*X_pred_var
        Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred)
-        np.testing.assert_allclose(Y_mu_true, Y_mu_pred, rtol=1e-4)
-        np.testing.assert_allclose(Y_var_true, Y_var_pred, rtol=1e-4)
+        np.testing.assert_allclose(Y_mu_true, Y_mu_pred, rtol=1e-3)
+        np.testing.assert_allclose(Y_var_true, Y_var_pred, rtol=1e-3)

    def test_sparse_raw_predict(self):
        k = GPy.kern.RBF(1)
@ -328,6 +328,41 @@ class MiscTests(unittest.TestCase):
        m.checkgrad()
        print(m)

+    def test_mrd(self):
+        from GPy.inference.latent_function_inference import InferenceMethodList, VarDTC
+        from GPy.likelihoods import Gaussian
+        Y1 = np.random.normal(0, 1, (40, 13))
+        Y2 = np.random.normal(0, 1, (40, 6))
+        Y3 = np.random.normal(0, 1, (40, 8))
+        Q = 5
+        m = GPy.models.MRD(dict(data1=Y1, data2=Y2, data3=Y3), Q, 
+                           )
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+        
+        m = GPy.models.MRD(dict(data1=Y1, data2=Y2, data3=Y3), Q, initx='PCA_single', 
+                           initz='random',
+                           kernel=[GPy.kern.RBF(Q, ARD=1) for _ in range(3)], 
+                           inference_method=InferenceMethodList([VarDTC() for _ in range(3)]),
+                           likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(3)])
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+        m = GPy.models.MRD(dict(data1=Y1, data2=Y2, data3=Y3), Q, initx='random', 
+                           initz='random',
+                           kernel=GPy.kern.RBF(Q, ARD=1), 
+                           )
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+        m = GPy.models.MRD(dict(data1=Y1, data2=Y2, data3=Y3), Q, X=np.random.normal(0,1,size=(40,Q)), 
+                           X_variance=False,
+                           kernel=GPy.kern.RBF(Q, ARD=1), 
+                           likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(3)])
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+
    def test_model_set_params(self):
        m = GPy.models.GPRegression(self.X, self.Y)
        lengthscale = np.random.uniform()
@ -376,18 +411,18 @@ class MiscTests(unittest.TestCase):

        warp_k = GPy.kern.RBF(1)
        warp_f = GPy.util.warping_functions.IdentityFunction(closed_inverse=False)
-        warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k, 
+        warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k,
                                     warping_function=warp_f)
        warp_m.optimize()
        warp_preds = warp_m.predict(self.X)

        warp_k_exact = GPy.kern.RBF(1)
        warp_f_exact = GPy.util.warping_functions.IdentityFunction()
-        warp_m_exact = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k_exact, 
+        warp_m_exact = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k_exact,
                                           warping_function=warp_f_exact)
        warp_m_exact.optimize()
        warp_preds_exact = warp_m_exact.predict(self.X)
- 
+
        np.testing.assert_almost_equal(preds, warp_preds, decimal=4)
        np.testing.assert_almost_equal(preds, warp_preds_exact, decimal=4)

@ -406,18 +441,18 @@ class MiscTests(unittest.TestCase):

        warp_k = GPy.kern.RBF(1)
        warp_f = GPy.util.warping_functions.LogFunction(closed_inverse=False)
-        warp_m = GPy.models.WarpedGP(self.X, Y, kernel=warp_k, 
+        warp_m = GPy.models.WarpedGP(self.X, Y, kernel=warp_k,
                                     warping_function=warp_f)
        warp_m.optimize()
        warp_preds = warp_m.predict(self.X, median=True)[0]

        warp_k_exact = GPy.kern.RBF(1)
        warp_f_exact = GPy.util.warping_functions.LogFunction()
-        warp_m_exact = GPy.models.WarpedGP(self.X, Y, kernel=warp_k_exact, 
+        warp_m_exact = GPy.models.WarpedGP(self.X, Y, kernel=warp_k_exact,
                                           warping_function=warp_f_exact)
        warp_m_exact.optimize(messages=True)
        warp_preds_exact = warp_m_exact.predict(self.X, median=True)[0]
- 
+
        np.testing.assert_almost_equal(np.exp(preds), warp_preds, decimal=4)
        np.testing.assert_almost_equal(np.exp(preds), warp_preds_exact, decimal=4)

@ -435,7 +470,7 @@ class MiscTests(unittest.TestCase):

        warp_m = GPy.models.WarpedGP(X, Y)#, kernel=warp_k)#, warping_function=warp_f)
        warp_m['.*\.d'].constrain_fixed(1.0)
-        warp_m.optimize_restarts(parallel=False, robust=False, num_restarts=5, 
+        warp_m.optimize_restarts(parallel=False, robust=False, num_restarts=5,
                                 max_iters=max_iters)
        warp_m.predict(X)
        warp_m.predict_quantiles(X)
@ -444,7 +479,7 @@ class MiscTests(unittest.TestCase):
        warp_m.plot()
        warp_m.predict_in_warped_space = True
        warp_m.plot()
-        
+
    def test_offset_regression(self):
        #Tests GPy.models.GPOffsetRegression. Using two small time series
        #from a sine wave, we confirm the algorithm determines that the
@ -465,6 +500,53 @@ class MiscTests(unittest.TestCase):
        m.optimize()
        assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))

+    def test_logistic_basis_func_gradients(self):
+        X = np.random.uniform(-4, 4, (20, 5))
+        points = np.random.uniform(X.min(0), X.max(0), X.shape[1])
+        ks = []
+        for i in range(points.shape[0]):
+            if (i%2==0) and (i%3!=0):
+                self.assertRaises(AssertionError, GPy.kern.LogisticBasisFuncKernel, 1, points, ARD=i%2==0, ARD_slope=i%3==0, active_dims=[i])
+            else:
+                ks.append(GPy.kern.LogisticBasisFuncKernel(1, points, ARD=i%2==0, ARD_slope=i%3==0, active_dims=[i]))
+        k = GPy.kern.Add(ks)
+        k.randomize()
+
+        Y = np.random.normal(0, 1, (X.shape[0], 1))
+        m = GPy.models.GPRegression(X, Y, kernel=k.copy())
+        assert m.checkgrad()
+
+    def test_posterior_inf_basis_funcs(self):
+        X = np.random.uniform(-4, 1, (50, 1))
+
+        # Logistic:
+        k = GPy.kern.LogisticBasisFuncKernel(1, [0, -2])
+
+        true_w = [1, 2]
+        true_slope = [5, -2]
+
+        Y = 0
+        for w, s, c in zip(true_w, true_slope, k.centers[0]):
+            Y += w/(1+np.exp(-s*(X-c)))
+        Y += np.random.normal(0, .000001)
+
+        m = GPy.models.GPRegression(X,Y,kernel=k.copy())
+        #m.likelihood.fix(1e-6)
+        m.optimize()
+
+        wu, wv = m.kern.posterior_inf()
+        #_sort = np.argsort(wu.flat)
+
+        #from scipy.stats import norm
+        #confidence_intervals = np.array(norm.interval(.95, loc=wu.flat[_sort], scale=np.sqrt(np.diag(wv))[_sort])).T
+        #for i in range(wu.size):
+        #    s,t = confidence_intervals[i]
+        #    v = true_w[i]
+        #    assert ((s<v)&(v<t)), "didnt find true w within the 95% confidence interval of the predicted values"
+
+        np.testing.assert_allclose(np.sort(wu.flat), np.sort(true_w), rtol=1e-4)
+        np.testing.assert_allclose(np.diag(wv), 0, atol=1e-4)
+        np.testing.assert_allclose(np.sort(m.kern.slope.flat), np.sort(true_slope), rtol=1e-4)

 class GradientTests(np.testing.TestCase):
    def setUp(self):
--- a/GPy/testing/prior_tests.py
+++ b/GPy/testing/prior_tests.py
@ -6,6 +6,29 @@ import numpy as np
 import GPy

 class PriorTests(unittest.TestCase):
+    def test_studentT(self):
+        xmin, xmax = 1, 2.5*np.pi
+        b, C, SNR = 1, 0, 0.1
+        X = np.linspace(xmin, xmax, 500)
+        y  = b*X + C + 1*np.sin(X)
+        y += 0.05*np.random.randn(len(X))
+        X, y = X[:, None], y[:, None]
+        studentT = GPy.priors.StudentT(1, 2, 4)
+        
+        m = GPy.models.SparseGPRegression(X, y)
+        m.Z.set_prior(studentT)
+
+        # setting a StudentT prior on non-negative parameters
+        # should raise an assertionerror.
+        self.assertRaises(AssertionError, m.rbf.set_prior, studentT)
+        
+        # The gradients need to be checked
+        self.assertTrue(m.checkgrad())
+        
+        # Check the singleton pattern:
+        self.assertIs(studentT, GPy.priors.StudentT(1,2,4))
+        self.assertIsNot(studentT, GPy.priors.StudentT(2,2,4))
+    
    def test_lognormal(self):
        xmin, xmax = 1, 2.5*np.pi
        b, C, SNR = 1, 0, 0.1
@ -74,7 +97,7 @@ class PriorTests(unittest.TestCase):
        # setting a Gaussian prior on non-negative parameters
        # should raise an assertionerror.
        #self.assertRaises(AssertionError, m.Z.set_prior, gaussian)
-
+        self.assertTrue(m.checkgrad())


    def test_fixed_domain_check(self):
@ -107,8 +130,6 @@ class PriorTests(unittest.TestCase):
        # should raise an assertionerror.
        self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)

-
-
 if __name__ == "__main__":
    print("Running unit tests, please be (very) patient...")
    unittest.main()
--- a/README.md
+++ b/README.md
@ -11,6 +11,46 @@ The Gaussian processes framework in Python.

 [![deploystat](https://travis-ci.org/SheffieldML/GPy.svg?branch=deploy)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)

+## What's new:
+
+From now on we keep track of changes in the CHANGELOG.md. 
+If you want your changes to show up there follow the [guidelines](#gl).
+In particular tag your commits by the [gitchangelog](https://github.com/vaab/gitchangelog) commit message format. 
+
+## Contributing to GPy
+
+We welcome any contributions to GPy, after all it is an open source project. We use the GitHub feature of pull requests for contributions.
+
+For an in depth description of pull requests, please visit https://help.github.com/articles/using-pull-requests/ .
+
+### Steps to a successfull contribution:
+
+ 1. Fork GPy: https://help.github.com/articles/fork-a-repo/
+ 2. Make your changes to the source in your fork.
+ 3. Make sure the [guidelines](#gl) are met.
+ 4. Set up tests to test your code. We are using unttests in the testing subfolder of GPy. There is a good chance that there is already a framework set up to test your new model in model_tests.py or kernel in kernel_tests.py. have a look at the source and you might be able to just add your model (or kernel or others) as an additional test in the appropriate file. There is more frameworks for testing the other bits and pieces, just head over to the testing folder and have a look.
+ 5. Create a pull request to the devel branch in GPy, see above.
+ 6. The tests will be running on your pull request. In the comments section we will be able to discuss the changes and help you with any problems. Let us know if there are any in the comments, so we can help.
+ 7. The pull request gets accepted and your awsome new feature will be in the next GPy release :)
+
+For any further questions/suggestions head over to the issues section in GPy. 
+
+<a name=gl></a>
+### Pull Request Guidelines
+
+ - Check your code with PEP8 or pylint. Try to stick to 80 columns wide.
+ - Separate commits per smallest concern.
+ - Each functionality/bugfix commit should contain code, tests, and doc.
+ - We are using gitchangelog to keep track of changes and log new features. So if you want your changes to show up in the changelog, make sure you follow the [gitchangelog](https://github.com/vaab/gitchangelog) commit message format.
+
+## Support and questions to the community
+
+We have set up a mailing list for any questions you might have or problems you feel others have encountered:
+
+gpy-users@lists.shef.ac.uk
+
+Feel free to join the discussions on the issues section, too.
+
 ## Updated Structure

 We have pulled the core parameterization out of GPy. It is a package called [paramz](https://github.com/sods/paramz) and is the pure gradient based model optimization.
@ -105,13 +145,34 @@ m.optimize()
 np.save('model_save.npy', m.param_array)
 # 2: loading a model
 # Model creation, without initialization:
-m = GPy.models(GPRegression(X,Y,initialize=False)
-m[:] = np.load('model_save.npy')
-m.initialize_parameter()
-print m
+m_load = GPy.models.GPRegression(X, Y, initialize=False)
+m_load.update_model(False) # do not call the underlying expensive algebra on load
+m_load.initialize_parameter() # Initialize the parameters (connect the parameters up)
+m_load[:] = np.load('model_save.npy') # Load the parameters
+m_load.update_model(True) # Call the algebra only once
+print(m_load)
 ```
+## For Admins and Developers:

-## Running unit tests:
+### Running unit tests:
+
+New way of running tests is using coverage:
+
+Ensure nose and coverage is installed:
+
+    pip install nose coverage
+    
+Run nosetests from root directory of repository:
+
+    coverage run travis_tests.py
+    
+Create coverage report in htmlcov/
+
+    coverage html
+    
+The coverage report is located in htmlcov/index.html
+
+##### Legacy: using nosetests

 Ensure nose is installed via pip:

@ -129,22 +190,8 @@ or using setuptools

    python setup.py test

-## Ubuntu hackers

-> Note: Right now the Ubuntu package index does not include scipy 0.16.0, and thus, cannot
-> be used for GPy. We hope this gets fixed soon.
-
-For the most part, the developers are using ubuntu. To install the required packages:
-
-    sudo apt-get install python-numpy python-scipy python-matplotlib
-
-clone this git repository and add it to your path:
-
-    git clone git@github.com:SheffieldML/GPy.git ~/SheffieldML
-    echo 'PYTHONPATH=$PYTHONPATH:~/SheffieldML' >> ~/.bashrc
-
-
-## Compiling documentation:
+### Compiling documentation:

 The documentation is stored in doc/ and is compiled with the Sphinx Python documentation generator, and is written in the reStructuredText format.

@ -167,6 +214,50 @@ The documentation can be compiled as follows:

 The HTML files are then stored in doc/build/html

+### Commit new patch to devel
+
+If you want to merge a branch into devel make sure the following steps are met:
+
+ - Create a local branch from the pull request and merge the current devel in.
+ - Look through the changes on the pull request.
+ - Check that tests are there and are checking code where applicable.
+ - [optional] Make changes if necessary and commit and push to run tests.
+ - [optional] Repeat the above until tests pass.
+ - [optional] bump up the version of GPy using bumpversion. The configuration is done, so all you need is bumpversion [major|minor|patch]. 
+ - Update the changelog using gitchangelog: `gitchangelog > CHANGELOG.md`
+ - Commit the changes of the changelog as silent update: `git commit -m "chg: pkg: CHANGELOG update" CHANGELOG.md
+ - Push the changes into devel.
+
+A usual workflow should look like this:
+
+    $ git fetch origin
+    $ git checkout -b <pull-origin>-devel origin/<pull-origin>-devel
+    $ git merge devel
+    $ coverage run travis_tests.py
+
+**Make changes for tests to cover corner cases (if statements, None arguments etc.)**
+Then we are ready to make the last changes for the changelog and versioning:
+
+    $ git commit -am "fix: Fixed tests for <pull-origin>"
+    $ bumpversion patch # [optional]
+    $ gitchangelog > CHANGELOG.md
+    $ git commit -m "chg: pkg: CHANGELOG update" CHANGELOG.md
+
+Now we can merge the pull request into devel:
+
+    $ git checkout devel
+    $ git merge --no-ff kurtCutajar-devel
+    $ git push origin devel
+    
+This will update the devel branch of GPy.
+
+### Deploying GPy
+
+We have set up all deployment automatic. 
+Thus, all you need to do is create a pull request from devel to deploy. 
+Wait for the tests to finish (successfully!) and merge the pull request. 
+This will update the package on pypi for all platforms fully automatically.
+
 ## Funding Acknowledgements

 Current support for the GPy software is coming through the following projects.
--- a/appveyor.yml
+++ b/appveyor.yml
@ -3,7 +3,7 @@ environment:
    secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
  COVERALLS_REPO_TOKEN:
    secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
-  gpy_version: 1.4.0
+  gpy_version: 1.5.3
  matrix:
    - PYTHON_VERSION: 2.7
      MINICONDA: C:\Miniconda-x64
--- a/setup.cfg
+++ b/setup.cfg
@ -1,6 +1,6 @@
 [bumpversion]
-current_version = 1.4.0
-tag = False
+current_version = 1.5.3
+tag = True
 commit = True

 [bumpversion:file:GPy/__version__.py]
--- a/setup.py
+++ b/setup.py
@ -148,7 +148,7 @@ setup(name = 'GPy',
      include_package_data = True,
      py_modules = ['GPy.__init__'],
      test_suite = 'GPy.testing',
-      install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.6.6'],
+      install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.6.8'],
      extras_require = {'docs':['sphinx'],
                        'optional':['mpi4py',
                                    'ipython>=4.0.0',