From ea2f814d557f5a130fd1fbe98b387c2df1c1e63c Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Tue, 22 Mar 2016 08:53:39 +0100
Subject: [PATCH 01/13] Added core code for GpSSM and GpGrid

---
 GPy/core/__init__.py                          |   2 +
 GPy/core/gp_grid.py                           | 116 ++++++++
 GPy/core/gp_ssm.py                            | 253 ++++++++++++++++++
 .../latent_function_inference/__init__.py     |   3 +
 4 files changed, 374 insertions(+)
 create mode 100644 GPy/core/gp_grid.py
 create mode 100644 GPy/core/gp_ssm.py

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index b0743916..76d8bb90 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -8,6 +8,8 @@ from . import parameterization
 from .gp import GP
 from .svgp import SVGP
 from .sparse_gp import SparseGP
+from .gp_ssm import GpSSM
+from .gp_grid import GpGrid
 from .mapping import *
 
 
diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
new file mode 100644
index 00000000..3d061bb0
--- /dev/null
+++ b/GPy/core/gp_grid.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+#This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+import numpy as np
+import scipy.linalg as sp
+from gp import GP
+from parameterization.param import Param
+from ..inference.latent_function_inference import gaussian_grid_inference
+from .. import likelihoods
+
+import logging
+from GPy.inference.latent_function_inference.posterior import Posterior
+logger = logging.getLogger("gp grid")
+
+class GpGrid(GP):
+    """
+    A GP model for Grid inputs
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+
+    """
+
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
+                 name='gp grid', Y_metadata=None, normalizer=False):
+        #pick a sensible inference method
+
+        inference_method = gaussian_grid_inference.GaussianGridInference()
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        self.posterior = None
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+        """
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
+        self.kern.update_gradients_direct(self.grad_dict['dL_dVar'], self.grad_dict['dL_dLen'])
+
+    def kron_mmprod(self, A, B):
+        count = 0
+        D = len(A)
+        for b in (B.T):
+            x = b
+            N = 1
+            G = np.zeros(D)
+            for d in xrange(D):
+                G[d] = len(A[d])
+            N = np.prod(G)
+            for d in xrange(D-1, -1, -1):
+                X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+                Z = np.dot(A[d], X)
+                Z = Z.T
+                x = np.reshape(Z, (-1, 1), order='F')
+            if (count == 0):
+                result = x
+            else:
+                result = np.column_stack((result, x))
+            count+=1
+        return result
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        """
+        if kern is None:
+            kern = self.kern
+
+        # compute mean predictions
+        Kmn = kern.K(Xnew, self.X)
+        alpha_kron = self.posterior.alpha
+        mu = np.dot(Kmn, alpha_kron)
+        mu = mu.reshape(-1,1)
+
+        # compute variance of predictions
+        Knm = Kmn.T        
+        noise = self.likelihood.variance
+        V_kron = self.posterior.V_kron
+        Qs = self.posterior.Qs
+        QTs = self.posterior.QTs
+        A = self.kron_mmprod(QTs, Knm)
+        V_kron = V_kron.reshape(-1, 1)
+        A = A / (V_kron + noise)
+        A = self.kron_mmprod(Qs, A)
+
+        Kmm = kern.K(Xnew)
+        var = np.diag(Kmm - np.dot(Kmn, A)).copy()
+        #var = np.zeros((Xnew.shape[0]))
+        var = var.reshape(-1, 1)
+
+        return mu, var
diff --git a/GPy/core/gp_ssm.py b/GPy/core/gp_ssm.py
new file mode 100644
index 00000000..5426d010
--- /dev/null
+++ b/GPy/core/gp_ssm.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+import numpy as np
+import scipy.linalg as sp
+from gp import GP
+from parameterization.param import Param
+from ..inference.latent_function_inference import gaussian_ssm_inference
+from .. import likelihoods
+from ..inference import optimization
+from parameterization.transformations import Logexp
+from GPy.inference.latent_function_inference.posterior import Posterior
+
+class GpSSM(GP):
+    """
+    A GP model for sorted one-dimensional inputs
+
+    This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+
+    """
+
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
+                 name='gp ssm', Y_metadata=None, normalizer=False):
+        #pick a sensible inference method
+
+        inference_method = gaussian_ssm_inference.GaussianSSMInference()
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+
+        self.posterior = None
+
+    def optimize(self, optimizer=None, start=None, **kwargs):
+        prevLikelihood = 0
+        count = 0
+        change = 1
+        while ((change > 0.001) and (count < 50)):
+            posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
+            expectations = posterior.expectations
+            self.optimize_params(expectations=expectations)
+            change = np.abs(likelihood - prevLikelihood)
+            prevLikelihood = likelihood
+            count = count + 1
+
+    def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
+        if self.is_fixed:
+            print("nothing to optimize")
+        if self.size == 0:
+            print("nothing to optimize")
+
+        if not self.update_model():
+            print("Updates were off, setting updates on again")
+            self.update_model(True)
+
+        if start == None:
+            start = self.optimizer_array
+
+        if optimizer is None:
+            optimizer = self.preferred_optimizer
+
+        if isinstance(optimizer, optimization.Optimizer):
+            opt = optimizer
+            opt.model = self
+        else:
+            optimizer = optimization.get_optimizer(optimizer)
+            opt = optimizer(start, model=self, **kwargs)
+
+        opt.max_iters = 1
+
+        opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
+        self.optimization_runs.append(opt)
+        self.optimizer_array = opt.x_opt
+
+    def param_maximisation_step(self, loghyper, X, Y, E, *args):
+
+        loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
+        lam = loghyper[0]
+        sig = loghyper[1]
+        noise = loghyper[2]
+
+        kern = self.kern
+        order = kern.order
+
+        K = len(X)
+        mu_0 = np.zeros((order, 1))
+        v_0 = kern.Phi_of_r(-1)
+        dvSig = v_0/sig
+        dvLam = kern.dQ(-1)[0]
+
+        mu = E[0][0]
+        V = E[0][1]
+        E11 = V + np.dot(mu, mu.T)
+        V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
+        Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
+        dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
+        dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
+
+        for t in range(1, K):
+            delta = X[t] - X[t-1]
+            Q = kern.Q_of_r(delta)
+            Phi = kern.Phi_of_r(delta)
+            dPhi = kern.dPhidLam(delta)
+            [dLam, dSig] = kern.dQ(delta)
+            mu_prev = E[t-1][0]
+            V_prev = E[t-1][1]
+            mu = E[t][0]
+            V = E[t][1]
+            Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
+            Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
+            Ett = V + np.dot(mu, mu.T)
+            CC = sp.cholesky(Q, lower=True)
+            Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
+
+            Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
+            A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
+            dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))        
+            A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
+            dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
+
+        dUb_noise = 0
+        for t in xrange(K):
+            mu = E[t][0]
+            V = E[t][1]
+            Ett = V + np.dot(mu, mu.T)
+            Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
+            dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
+
+        dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
+        
+        return Ub.item(), dUb
+
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+
+        We override the method in the parent class since we do not handle updates to the standard gradients.
+        """
+        self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        N.B. It is assumed that input points are sorted
+        """
+        if kern is None:
+            kern = self.kern
+
+        X = self.X
+        K = X.shape[0]
+        K_new = Xnew.shape[0]
+        order = kern.order
+        mean_pred = np.zeros(K_new)
+        var_pred = np.zeros(K_new)
+        H = np.zeros((1,order))
+        H[0][0] = 1
+
+        count = 0
+        for t in xrange(K_new):
+            while ((count < K) and (Xnew[t] > X[count])):
+                count += 1
+
+            if (count == 0):
+                mu = np.zeros((order, 1))
+                V = kern.Phi_of_r(-1)
+
+                delta = np.abs(Xnew[t] - X[count])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+
+                mu_s = self.posterior.mu_s[count]
+                V_s = self.posterior.V_s[count]
+                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
+                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+
+                mean_pred[t] = np.dot(H, mu_s)
+                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
+
+            elif (count == K):
+                # forwards phase
+                delta = np.abs(Xnew[t] - X[count-1])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                mu_f = self.posterior.mu_f[count-1]
+                V_f = self.posterior.V_f[count-1]
+
+                mu = np.dot(Phi, mu_f)
+                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
+                V = P
+
+                mean_pred[t] = np.dot(H,mu)
+                var_pred[t] = np.dot(np.dot(H, V), H.T)
+
+            else:
+                # forwards phase
+                delta = np.abs(Xnew[t] - X[count-1])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                mu_f = self.posterior.mu_f[count-1]
+                V_f = self.posterior.V_f[count-1]
+                mu = np.dot(Phi, mu_f)
+                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
+                V = P
+
+                delta = np.abs(Xnew[t] - X[count])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+
+                # backwards phase
+                mu_s = self.posterior.mu_s[count]
+                V_s = self.posterior.V_s[count]
+
+                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
+                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+
+                mean_pred[t] = np.dot(H, mu_s)
+                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
+
+
+        mean_pred = mean_pred.reshape(-1, 1)
+        var_pred = var_pred.reshape(-1, 1)
+
+        return mean_pred, var_pred
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index eabc6cc9..ba9099b6 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -69,6 +69,9 @@ from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
 from .var_gauss import VarGauss
+from .gaussian_ssm_inference import GaussianSSMInference
+from .gaussian_grid_inference import GaussianGridInference
+
 
 # class FullLatentFunctionData(object):
 #

From 08fa69fdd198efe7305756ec47e8f20cc01ecdc3 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Tue, 22 Mar 2016 09:09:30 +0100
Subject: [PATCH 02/13] Added kernels for GpGrid and GpSsm regression

---
 .../gaussian_grid_inference.py                | 113 +++++++++++++
 .../gaussian_ssm_inference.py                 | 140 +++++++++++++++++
 .../grid_posterior.py                         |  62 ++++++++
 .../ssm_posterior.py                          |  73 +++++++++
 GPy/kern/__init__.py                          |   2 +
 GPy/kern/src/gridKerns.py                     |  76 +++++++++
 GPy/kern/src/rbf.py                           |   8 +
 GPy/kern/src/ssm_kerns.py                     | 148 ++++++++++++++++++
 GPy/kern/src/stationary.py                    |  19 +++
 9 files changed, 641 insertions(+)
 create mode 100644 GPy/inference/latent_function_inference/gaussian_grid_inference.py
 create mode 100644 GPy/inference/latent_function_inference/gaussian_ssm_inference.py
 create mode 100644 GPy/inference/latent_function_inference/grid_posterior.py
 create mode 100644 GPy/inference/latent_function_inference/ssm_posterior.py
 create mode 100644 GPy/kern/src/gridKerns.py
 create mode 100644 GPy/kern/src/ssm_kerns.py

diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
new file mode 100644
index 00000000..8917abea
--- /dev/null
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from grid_posterior import GridPosterior
+import numpy as np
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+class GaussianGridInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian and inputs are on a grid.
+
+    The function self.inference returns a GridPosterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass
+
+    def kron_mvprod(self, A, b):
+        x = b
+        N = 1
+        D = len(A)
+        G = np.zeros((D,1))
+        for d in xrange(0, D):
+            G[d] = len(A[d])
+        N = np.prod(G)
+        for d in xrange(D-1, -1, -1):
+            X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+            Z = np.dot(A[d], X)
+            Z = Z.T
+            x = np.reshape(Z, (-1, 1), order='F')
+        return x
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a GridPosterior class containing essential quantities of the posterior
+        """
+        N = X.shape[0] #number of training points
+        D = X.shape[1] #number of dimensions
+
+        Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension
+        Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension
+        QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension
+        V_kron = 1 # kronecker product of eigenvalues
+
+        # retrieve the one-dimensional variation of the designated kernel
+        oneDkernel = kern.getOneDimensionalKernel(D)
+
+        for d in xrange(D):
+            xg = list(set(X[:,d])) #extract unique values for a dimension
+            xg = np.reshape(xg, (len(xg), 1))
+            oneDkernel.lengthscale = kern.lengthscale[d]
+            Kds[d] = oneDkernel.K(xg)
+            [V, Q] = np.linalg.eig(Kds[d])
+            V_kron = np.kron(V_kron, V)
+            Qs[d] = Q
+            QTs[d] = Q.T
+
+        noise = likelihood.variance + 1e-8
+
+        alpha_kron = self.kron_mvprod(QTs, Y)
+        V_kron = V_kron.reshape(-1, 1)
+        alpha_kron = alpha_kron / (V_kron + noise)
+        alpha_kron = self.kron_mvprod(Qs, alpha_kron)
+
+        log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi)
+
+        # compute derivatives wrt parameters Thete
+        derivs = np.zeros(D+2, dtype='object')
+        for t in xrange(len(derivs)):
+            dKd_dTheta = np.zeros(D, dtype='object')
+            gamma = np.zeros(D, dtype='object')
+            gam = 1
+            for d in xrange(D):
+                xg = list(set(X[:,d]))
+                xg = np.reshape(xg, (len(xg), 1))
+                oneDkernel.lengthscale = kern.lengthscale[d]
+                if t < D:
+                    dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale
+                elif (t == D):
+                    dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance
+                else:
+                    dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise
+                gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d]))
+                gam = np.kron(gam, gamma[d])
+            
+            gam = gam.reshape(-1,1)
+            kappa = self.kron_mvprod(dKd_dTheta, alpha_kron)
+            derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise))
+
+        # separate derivatives
+        dL_dLen = derivs[:D]
+        dL_dVar = derivs[D]
+        dL_dThetaL = derivs[D+1]
+
+        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
diff --git a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
new file mode 100644
index 00000000..4eb441cb
--- /dev/null
+++ b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from ssm_posterior import SsmPosterior
+from ...util.linalg import pdinv, dpotrs, tdot
+from ...util import diag
+import numpy as np
+import math as mt
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+
+class GaussianSSMInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian.
+
+    The function self.inference returns a Posterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass#self._YYTfactor_cache = caching.cache()
+
+    def get_YYTfactor(self, Y):
+        """
+        find a matrix L which satisfies LL^T = YY^T.
+
+        Note that L may have fewer columns than Y, else L=Y.
+        """
+        N, D = Y.shape
+        if (N>D):
+            return Y
+        else:
+            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
+            #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
+            return Y
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a Posterior class containing essential quantities of the posterior
+        """
+        order = kern.order
+        K = X.shape[0]
+        log_likelihood = 0
+        results = np.zeros((K,4),dtype=object)
+        H = np.zeros((1,order))
+        H[0][0] = 1
+        v_0 = kern.Phi_of_r(-1)
+        mu_0 = np.zeros((order, 1))
+        noise_var = likelihood.variance + 1e-8
+
+        # carry out forward filtering
+        for t in range(K):
+            if (t == 0):
+                prior_m = np.dot(H,mu_0)
+                prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
+
+                log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
+
+                kalman_gain = np.dot(v_0, H.T) / prior_v
+                mu = mu_0 + kalman_gain*(Y[0] - prior_m)
+
+
+                V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
+                results[0][0] = mu
+                results[0][1] = V
+            else:
+                delta = X[t] - X[t-1]
+                Q = kern.Q_of_r(delta)
+                Phi = kern.Phi_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+                PhiMu = np.dot(Phi, mu)
+                prior_m = np.dot(H, PhiMu)
+                prior_v = np.dot(np.dot(H, P), H.T) + noise_var
+
+                log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
+                log_likelihood += log_likelihood_i
+
+                kalman_gain = np.dot(P, H.T)/prior_v
+                mu = PhiMu + kalman_gain*(Y[t] - prior_m)
+                V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
+                
+                results[t-1][2] = Phi
+                results[t-1][3] = P
+                results[t][0] = mu
+                results[t][1] = V
+
+        # carry out backwards smoothing
+
+        W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
+
+        mu_s = results[K-1][0]
+        V_s = results[K-1][1]
+
+        posterior_mean = np.zeros((K,1))
+        posterior_var = np.zeros((K,1))
+        E = np.zeros((K,4), dtype='object')
+
+        posterior_mean[K-1] = np.dot(H, mu_s)
+        posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
+        E[K-1][0] = mu_s
+        E[K-1][1] = V_s
+
+        for t in range(K-2, -1, -1):
+            mu = results[t][0]
+            V = results[t][1]
+            Phi = results[t][2]
+            P = results[t][3]
+            
+            L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
+            mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+            V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+            posterior_mean[t] = np.dot(H, mu_s)
+            posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
+
+            if (t < K-2):
+                W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
+
+            E[t][0] = mu_s
+            E[t][1] = V_s
+            E[t][2] = L
+            E[t][3] = W
+
+        return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
diff --git a/GPy/inference/latent_function_inference/grid_posterior.py b/GPy/inference/latent_function_inference/grid_posterior.py
new file mode 100644
index 00000000..da86dd84
--- /dev/null
+++ b/GPy/inference/latent_function_inference/grid_posterior.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class GridPosterior(object):
+    """
+    Specially intended for the Grid Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None):
+        """
+        alpha_kron : 
+        QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices
+        Qs : eigen vectors resulting from decomposition of single dimension covariance matrices
+        V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices
+        """
+
+        if ((alpha_kron is not None) and (QTs is not None) 
+            and (Qs is not None) and (V_kron is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information for predictions")
+
+        self._alpha_kron = alpha_kron
+        self._qTs = QTs
+        self._qs = Qs
+        self._v_kron = V_kron
+
+    @property
+    def alpha(self):
+        """
+        """
+        return self._alpha_kron
+
+    @property
+    def QTs(self):
+        """
+        array of transposed eigenvectors resulting for single dimension covariance
+        """
+        return self._qTs
+
+    @property
+    def Qs(self):
+        """
+        array of eigenvectors resulting for single dimension covariance
+        """
+        return self._qs
+
+    @property
+    def V_kron(self):
+        """
+        kronecker product of eigenvalues s
+        """
+        return self._v_kron
+    
diff --git a/GPy/inference/latent_function_inference/ssm_posterior.py b/GPy/inference/latent_function_inference/ssm_posterior.py
new file mode 100644
index 00000000..1b87ddb3
--- /dev/null
+++ b/GPy/inference/latent_function_inference/ssm_posterior.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class SsmPosterior(object):
+    """
+    Specially intended for the SSM Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
+        """
+        mu_f : mean values predicted during kalman filtering step
+        var_f : variance predicted during the kalman filtering step
+        mu_s : mean values predicted during backwards smoothing step
+        var_s : variance predicted during backwards smoothing step
+        expectations : posterior expectations
+        """
+
+        if ((mu_f is not None) and (V_f is not None) and
+            (mu_s is not None) and (V_s is not None) and 
+            (expectations is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information to compute predictions")
+
+        self._mu_f = mu_f
+        self._V_f = V_f
+        self._mu_s = mu_s
+        self._V_s = V_s
+        self._expectations = expectations
+
+    @property
+    def mu_f(self):
+        """
+        Mean values predicted during kalman filtering step mean
+        """
+        return self._mu_f
+
+    @property
+    def V_f(self):
+        """
+        Variance predicted during the kalman filtering step
+        """
+        return self._V_f
+
+    @property
+    def mu_s(self):
+        """
+        Mean values predicted during kalman backwards smoothin mean
+        """
+        return self._mu_s
+
+    @property
+    def V_s(self):
+        """
+        Variance predicted during backwards smoothing step
+        """
+        return self._V_s
+
+    @property
+    def expectations(self):
+        """
+        Posterior expectations
+        """
+        return self._expectations
+    
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 62796d93..ffa906eb 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -29,3 +29,5 @@ from .src.splitKern import SplitKern,DEtime
 from .src.splitKern import DEtime as DiffGenomeKern
 from .src.spline import Spline
 from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
+from .src.ssmKerns import Matern32_SSM
+from .src.gridKerns import GridRBF
diff --git a/GPy/kern/src/gridKerns.py b/GPy/kern/src/gridKerns.py
new file mode 100644
index 00000000..98804544
--- /dev/null
+++ b/GPy/kern/src/gridKerns.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+from stationary import Stationary
+from ...util.config import *
+from ...util.caching import Cache_this
+
+class GridKern(Stationary):
+
+	def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=False):
+		super(GridKern, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
+		self.originalDimensions = originalDimensions
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dVar(self, X, X2=None):
+		"""
+		Derivative of Kernel function wrt variance applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dVar(X, X2) = dKdVar_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdVar_of_r(r)
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dLen(self, X, dimension, lengthscale, X2=None):
+		"""
+		Derivate of Kernel function wrt lengthscale applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dLen(X, X2) = dKdLen_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdLen_of_r(r, dimension, lengthscale)
+
+class GridRBF(GridKern):
+	"""
+	Similar to regular RBF but supplemented with methods required for Gaussian grid regression
+	Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
+
+	.. math::
+
+	   k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
+
+	"""
+	_support_GPU = True
+	def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='gridRBF', originalDimensions=1, useGPU=False):
+		super(GridRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=useGPU)
+
+	def K_of_r(self, r):
+		return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 *  r**2)
+
+	def dKdVar_of_r(self, r):
+		"""
+		Compute derivative of kernel wrt variance
+		"""
+		return np.exp(-0.5 * r**2)
+
+	def dKdLen_of_r(self, r, dimCheck, lengthscale):
+		"""
+		Compute derivative of kernel for dimension wrt lengthscale
+		Computation of derivative changes when lengthscale corresponds to
+		the dimension of the kernel whose derivate is being computed. 
+		"""
+		if (dimCheck == True):
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) * (r**2) / (lengthscale**(float(1)/self.originalDimensions))
+		else:
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) / (lengthscale**(float(1)/self.originalDimensions))
+
+	def dK_dr(self, r):
+		return -r*self.K_of_r(r)
\ No newline at end of file
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index ff86561d..557366ae 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -58,6 +58,14 @@ class RBF(Stationary):
         if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
         super(RBF,self).parameters_changed()
 
+
+    def get_one_dimensional_kernel(self, dim):
+        """
+        Specially intended for Grid regression.
+        """
+        oneDkernel = GridRBF(input_dim=1, variance=self.variance.copy(), originalDimensions=dim)
+        return oneDkernel
+
     #---------------------------------------#
     #             PSI statistics            #
     #---------------------------------------#
diff --git a/GPy/kern/src/ssm_kerns.py b/GPy/kern/src/ssm_kerns.py
new file mode 100644
index 00000000..aa988e52
--- /dev/null
+++ b/GPy/kern/src/ssm_kerns.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+#
+# Kurt Cutajar
+
+from kern import Kern
+from ... import util
+import numpy as np
+from scipy import integrate, weave
+from ...core.parameterization import Param
+from ...core.parameterization.transformations import Logexp
+
+class SsmKerns(Kern):
+    """
+    Kernels suitable for GP SSM regression with one-dimensional inputs
+
+    """
+
+    def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
+    	
+        super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
+        self.lengthscale = np.abs(lengthscale)
+        self.variance = Param('variance', variance, Logexp())
+
+    def K_of_r(self, r):
+        raise NotImplementedError("implement the covariance function as a fn of r to use this class")
+
+    def dK_dr(self, r):
+        raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
+
+    def Q_of_r(self, r):
+        raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
+
+    def Phi_of_r(self, r):
+        raise NotImplementedError("implement transition function of SDE wrt r to use this class")
+
+    def noise(self):
+    	raise NotImplementedError("implement noise function of SDE to use this class")
+
+   	def dPhidLam(self, r):
+		raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
+
+    def dQ(self, r):
+    	raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
+
+
+class Matern32_SSM(SsmKerns):
+    """
+    Matern 3/2 kernel:
+
+    .. math::
+
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    """
+
+    def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
+        super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
+        lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
+        self.lam = Param('lambda', lambd, Logexp())
+        self.link_parameters(self.lam, self.variance)
+        self.order = 2
+
+    def noise(self):
+        """
+        Compute noise for the kernel
+        """
+        p = 1
+        lp_fact = np.sum(np.log(range(1,p+1)))
+        l2p_fact = np.sum(np.log(range(1,2*p +1)))
+        logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
+        q = np.exp(logq)
+        return q
+
+    def K_of_r(self, r):
+        lengthscale = np.sqrt(3.) / self.lam
+        r = r / lengthscale
+        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
+
+    def dK_dr(self,r):
+        return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
+
+    def Q_of_r(self, r):
+        """
+        Compute process variance (Q)
+        """
+        q = self.noise()
+        Q = np.zeros((2,2))
+        Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2) 
+                            + 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
+        Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
+        Q[1][0] = Q[0][1]
+        Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2) 
+                            - 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
+        return q*Q
+
+    def Phi_of_r(self, r):
+        """
+        Compute transition function (Phi)
+        """
+        if r < 0:
+            phi = np.zeros((2,2))
+            phi[0][0] = self.variance
+            phi[0][1] = 0
+            phi[1][0] = 0
+            phi[1][1] = np.power(self.lam,2)*self.variance
+            return phi
+        else:
+            mult = np.exp(-self.lam*r)
+            phi = np.zeros((2, 2))
+            phi[0][0] = (1 + self.lam*r)*mult
+            phi[0][1] = mult*r
+            phi[1][0] = -r*mult*self.lam**2
+            phi[1][1] = mult*(1 - self.lam*r)
+            return phi
+
+    def dPhidLam(self, r):
+        """
+        Compute derivative of transition function (Phi) wrt lambda
+        """
+    	mult = np.exp(self.lam*r)
+    	dPhi = np.zeros((2, 2))
+    	dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
+    	dPhi[0][1] = (-1 * (r**2)) / mult
+    	dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
+    	dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
+    	return dPhi
+
+    def dQ(self, r):
+        """
+        Compute derivatives of Q with respect to lambda and variance
+        """
+        if (r == -1):
+            dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
+            dQdVar = None
+        else:
+            q = self.noise()
+            Q = self.Q_of_r(r)
+            dQdLam = np.zeros((2, 2))
+            mult = np.exp(2*self.lam*r)
+            dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
+            dQdLam[0][1] = -(r**3)/mult
+            dQdLam[1][0] = dQdLam[0][1]
+            dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
+            dq = (q*(2+1))/self.lam
+            dQdLam = q*dQdLam + dq*(Q/q)
+            dQdVar = Q / self.variance
+    	return [dQdLam, dQdVar]
\ No newline at end of file
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 286edcc2..197d2ac4 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -189,6 +189,16 @@ class Stationary(Kern):
             self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
 
 
+    def update_gradients_direct(self, dL_dVar, dL_dLen):
+        """
+        Specially intended for the Grid regression case.
+        Given the computed log likelihood derivates, update the corresponding
+        kernel and likelihood gradients.
+        Useful for when gradients have been computed a priori.
+        """
+        self.variance.gradient = dL_dVar
+        self.lengthscale.gradient = dL_dLen
+
     def _inv_dist(self, X, X2=None):
         """
         Compute the elementwise inverse of the distance matrix, expecpt on the
@@ -307,6 +317,15 @@ class Stationary(Kern):
     def input_sensitivity(self, summarize=True):
         return self.variance*np.ones(self.input_dim)/self.lengthscale**2
 
+      def get_one_dimensional_kernel(self, dimensions):
+        """
+        Specially intended for the grid regression case
+        For a given covariance kernel, this method returns the corresponding kernel for
+        a single dimension. The resulting values can then be used in the algorithm for
+        reconstructing the full covariance matrix.
+        """
+        raise NotImplementedError("implement one dimensional variation of kernel")
+
 
 
 

From 8e634722375e016140c06e93ea78db4b4e340c7d Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Sun, 1 May 2016 21:50:34 +0200
Subject: [PATCH 03/13] Removed SSM functionality - updated Kronecker grid case

---
 GPy/core/__init__.py                          |   3 +-
 GPy/core/gp_ssm.py                            | 253 ------------------
 .../latent_function_inference/__init__.py     |   1 -
 .../gaussian_grid_inference.py                |   2 +-
 .../gaussian_ssm_inference.py                 | 140 ----------
 .../ssm_posterior.py                          |  73 -----
 GPy/kern/__init__.py                          |   3 +-
 GPy/kern/src/{gridKerns.py => grid_kerns.py}  |   4 +-
 GPy/kern/src/rbf.py                           |   1 +
 GPy/kern/src/ssm_kerns.py                     | 148 ----------
 GPy/kern/src/stationary.py                    |   2 +-
 GPy/models/__init__.py                        |   1 +
 GPy/models/gp_grid_regression.py              |  36 +++
 GPy/testing/grid_tests.py                     |  51 ++++
 doc/source/GPy.core.rst                       |   8 +
 ...Py.inference.latent_function_inference.rst |  16 ++
 doc/source/GPy.kern.src.rst                   |   8 +
 doc/source/GPy.testing.rst                    |   8 +
 18 files changed, 135 insertions(+), 623 deletions(-)
 delete mode 100644 GPy/core/gp_ssm.py
 delete mode 100644 GPy/inference/latent_function_inference/gaussian_ssm_inference.py
 delete mode 100644 GPy/inference/latent_function_inference/ssm_posterior.py
 rename GPy/kern/src/{gridKerns.py => grid_kerns.py} (96%)
 delete mode 100644 GPy/kern/src/ssm_kerns.py
 create mode 100644 GPy/models/gp_grid_regression.py
 create mode 100644 GPy/testing/grid_tests.py

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index 76d8bb90..a6abfb06 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -3,12 +3,11 @@
 
 from GPy.core.model import Model
 from .parameterization import Param, Parameterized
-from . import parameterization
+#from . import parameterization
 
 from .gp import GP
 from .svgp import SVGP
 from .sparse_gp import SparseGP
-from .gp_ssm import GpSSM
 from .gp_grid import GpGrid
 from .mapping import *
 
diff --git a/GPy/core/gp_ssm.py b/GPy/core/gp_ssm.py
deleted file mode 100644
index 5426d010..00000000
--- a/GPy/core/gp_ssm.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-# This implementation of converting GPs to state space models is based on the article:
-
-#@article{Gilboa:2015,
-#  title={Scaling multidimensional inference for structured Gaussian processes},
-#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
-#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
-#  volume={37},
-#  number={2},
-#  pages={424--436},
-#  year={2015},
-#  publisher={IEEE}
-#}
-
-import numpy as np
-import scipy.linalg as sp
-from gp import GP
-from parameterization.param import Param
-from ..inference.latent_function_inference import gaussian_ssm_inference
-from .. import likelihoods
-from ..inference import optimization
-from parameterization.transformations import Logexp
-from GPy.inference.latent_function_inference.posterior import Posterior
-
-class GpSSM(GP):
-    """
-    A GP model for sorted one-dimensional inputs
-
-    This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
-
-    :param X: inputs
-    :type X: np.ndarray (num_data x input_dim)
-    :param likelihood: a likelihood instance, containing the observed data
-    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
-    :param kernel: the kernel (covariance function). See link kernels
-    :type kernel: a GPy.kern.kern instance
-
-    """
-
-    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
-                 name='gp ssm', Y_metadata=None, normalizer=False):
-        #pick a sensible inference method
-
-        inference_method = gaussian_ssm_inference.GaussianSSMInference()
-
-        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
-
-        self.posterior = None
-
-    def optimize(self, optimizer=None, start=None, **kwargs):
-        prevLikelihood = 0
-        count = 0
-        change = 1
-        while ((change > 0.001) and (count < 50)):
-            posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
-            expectations = posterior.expectations
-            self.optimize_params(expectations=expectations)
-            change = np.abs(likelihood - prevLikelihood)
-            prevLikelihood = likelihood
-            count = count + 1
-
-    def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
-        if self.is_fixed:
-            print("nothing to optimize")
-        if self.size == 0:
-            print("nothing to optimize")
-
-        if not self.update_model():
-            print("Updates were off, setting updates on again")
-            self.update_model(True)
-
-        if start == None:
-            start = self.optimizer_array
-
-        if optimizer is None:
-            optimizer = self.preferred_optimizer
-
-        if isinstance(optimizer, optimization.Optimizer):
-            opt = optimizer
-            opt.model = self
-        else:
-            optimizer = optimization.get_optimizer(optimizer)
-            opt = optimizer(start, model=self, **kwargs)
-
-        opt.max_iters = 1
-
-        opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
-        self.optimization_runs.append(opt)
-        self.optimizer_array = opt.x_opt
-
-    def param_maximisation_step(self, loghyper, X, Y, E, *args):
-
-        loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
-        lam = loghyper[0]
-        sig = loghyper[1]
-        noise = loghyper[2]
-
-        kern = self.kern
-        order = kern.order
-
-        K = len(X)
-        mu_0 = np.zeros((order, 1))
-        v_0 = kern.Phi_of_r(-1)
-        dvSig = v_0/sig
-        dvLam = kern.dQ(-1)[0]
-
-        mu = E[0][0]
-        V = E[0][1]
-        E11 = V + np.dot(mu, mu.T)
-        V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
-        Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
-        dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
-        dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
-
-        for t in range(1, K):
-            delta = X[t] - X[t-1]
-            Q = kern.Q_of_r(delta)
-            Phi = kern.Phi_of_r(delta)
-            dPhi = kern.dPhidLam(delta)
-            [dLam, dSig] = kern.dQ(delta)
-            mu_prev = E[t-1][0]
-            V_prev = E[t-1][1]
-            mu = E[t][0]
-            V = E[t][1]
-            Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
-            Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
-            Ett = V + np.dot(mu, mu.T)
-            CC = sp.cholesky(Q, lower=True)
-            Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
-
-            Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
-            A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
-            dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))        
-            A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
-            dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
-
-        dUb_noise = 0
-        for t in xrange(K):
-            mu = E[t][0]
-            V = E[t][1]
-            Ett = V + np.dot(mu, mu.T)
-            Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
-            dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
-
-        dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
-        
-        return Ub.item(), dUb
-
-
-    def parameters_changed(self):
-        """
-        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
-        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
-
-        .. warning::
-            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
-            this method yourself, there may be unexpected consequences.
-
-        We override the method in the parent class since we do not handle updates to the standard gradients.
-        """
-        self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
-
-    def _raw_predict(self, Xnew, full_cov=False, kern=None):
-        """
-        Make a prediction for the latent function values
-        N.B. It is assumed that input points are sorted
-        """
-        if kern is None:
-            kern = self.kern
-
-        X = self.X
-        K = X.shape[0]
-        K_new = Xnew.shape[0]
-        order = kern.order
-        mean_pred = np.zeros(K_new)
-        var_pred = np.zeros(K_new)
-        H = np.zeros((1,order))
-        H[0][0] = 1
-
-        count = 0
-        for t in xrange(K_new):
-            while ((count < K) and (Xnew[t] > X[count])):
-                count += 1
-
-            if (count == 0):
-                mu = np.zeros((order, 1))
-                V = kern.Phi_of_r(-1)
-
-                delta = np.abs(Xnew[t] - X[count])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-
-                mu_s = self.posterior.mu_s[count]
-                V_s = self.posterior.V_s[count]
-                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
-                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-
-                mean_pred[t] = np.dot(H, mu_s)
-                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
-
-            elif (count == K):
-                # forwards phase
-                delta = np.abs(Xnew[t] - X[count-1])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                mu_f = self.posterior.mu_f[count-1]
-                V_f = self.posterior.V_f[count-1]
-
-                mu = np.dot(Phi, mu_f)
-                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
-                V = P
-
-                mean_pred[t] = np.dot(H,mu)
-                var_pred[t] = np.dot(np.dot(H, V), H.T)
-
-            else:
-                # forwards phase
-                delta = np.abs(Xnew[t] - X[count-1])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                mu_f = self.posterior.mu_f[count-1]
-                V_f = self.posterior.V_f[count-1]
-                mu = np.dot(Phi, mu_f)
-                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
-                V = P
-
-                delta = np.abs(Xnew[t] - X[count])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-
-                # backwards phase
-                mu_s = self.posterior.mu_s[count]
-                V_s = self.posterior.V_s[count]
-
-                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
-                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-
-                mean_pred[t] = np.dot(H, mu_s)
-                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
-
-
-        mean_pred = mean_pred.reshape(-1, 1)
-        var_pred = var_pred.reshape(-1, 1)
-
-        return mean_pred, var_pred
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index ba9099b6..90fbf0f1 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -69,7 +69,6 @@ from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
 from .var_gauss import VarGauss
-from .gaussian_ssm_inference import GaussianSSMInference
 from .gaussian_grid_inference import GaussianGridInference
 
 
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index 8917abea..6b2315b5 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -61,7 +61,7 @@ class GaussianGridInference(LatentFunctionInference):
         V_kron = 1 # kronecker product of eigenvalues
 
         # retrieve the one-dimensional variation of the designated kernel
-        oneDkernel = kern.getOneDimensionalKernel(D)
+        oneDkernel = kern.get_one_dimensional_kernel(D)
 
         for d in xrange(D):
             xg = list(set(X[:,d])) #extract unique values for a dimension
diff --git a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
deleted file mode 100644
index 4eb441cb..00000000
--- a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-# This implementation of converting GPs to state space models is based on the article:
-
-#@article{Gilboa:2015,
-#  title={Scaling multidimensional inference for structured Gaussian processes},
-#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
-#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
-#  volume={37},
-#  number={2},
-#  pages={424--436},
-#  year={2015},
-#  publisher={IEEE}
-#}
-
-from ssm_posterior import SsmPosterior
-from ...util.linalg import pdinv, dpotrs, tdot
-from ...util import diag
-import numpy as np
-import math as mt
-from . import LatentFunctionInference
-log_2_pi = np.log(2*np.pi)
-
-
-class GaussianSSMInference(LatentFunctionInference):
-    """
-    An object for inference when the likelihood is Gaussian.
-
-    The function self.inference returns a Posterior object, which summarizes
-    the posterior.
-
-    """
-    def __init__(self):
-        pass#self._YYTfactor_cache = caching.cache()
-
-    def get_YYTfactor(self, Y):
-        """
-        find a matrix L which satisfies LL^T = YY^T.
-
-        Note that L may have fewer columns than Y, else L=Y.
-        """
-        N, D = Y.shape
-        if (N>D):
-            return Y
-        else:
-            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
-            #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
-            return Y
-
-    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
-
-        """
-        Returns a Posterior class containing essential quantities of the posterior
-        """
-        order = kern.order
-        K = X.shape[0]
-        log_likelihood = 0
-        results = np.zeros((K,4),dtype=object)
-        H = np.zeros((1,order))
-        H[0][0] = 1
-        v_0 = kern.Phi_of_r(-1)
-        mu_0 = np.zeros((order, 1))
-        noise_var = likelihood.variance + 1e-8
-
-        # carry out forward filtering
-        for t in range(K):
-            if (t == 0):
-                prior_m = np.dot(H,mu_0)
-                prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
-
-                log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
-
-                kalman_gain = np.dot(v_0, H.T) / prior_v
-                mu = mu_0 + kalman_gain*(Y[0] - prior_m)
-
-
-                V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
-                results[0][0] = mu
-                results[0][1] = V
-            else:
-                delta = X[t] - X[t-1]
-                Q = kern.Q_of_r(delta)
-                Phi = kern.Phi_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-                PhiMu = np.dot(Phi, mu)
-                prior_m = np.dot(H, PhiMu)
-                prior_v = np.dot(np.dot(H, P), H.T) + noise_var
-
-                log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
-                log_likelihood += log_likelihood_i
-
-                kalman_gain = np.dot(P, H.T)/prior_v
-                mu = PhiMu + kalman_gain*(Y[t] - prior_m)
-                V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
-                
-                results[t-1][2] = Phi
-                results[t-1][3] = P
-                results[t][0] = mu
-                results[t][1] = V
-
-        # carry out backwards smoothing
-
-        W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
-
-        mu_s = results[K-1][0]
-        V_s = results[K-1][1]
-
-        posterior_mean = np.zeros((K,1))
-        posterior_var = np.zeros((K,1))
-        E = np.zeros((K,4), dtype='object')
-
-        posterior_mean[K-1] = np.dot(H, mu_s)
-        posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
-        E[K-1][0] = mu_s
-        E[K-1][1] = V_s
-
-        for t in range(K-2, -1, -1):
-            mu = results[t][0]
-            V = results[t][1]
-            Phi = results[t][2]
-            P = results[t][3]
-            
-            L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
-            mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-            V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-            posterior_mean[t] = np.dot(H, mu_s)
-            posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
-
-            if (t < K-2):
-                W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
-
-            E[t][0] = mu_s
-            E[t][1] = V_s
-            E[t][2] = L
-            E[t][3] = W
-
-        return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
diff --git a/GPy/inference/latent_function_inference/ssm_posterior.py b/GPy/inference/latent_function_inference/ssm_posterior.py
deleted file mode 100644
index 1b87ddb3..00000000
--- a/GPy/inference/latent_function_inference/ssm_posterior.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-import numpy as np
-
-class SsmPosterior(object):
-    """
-    Specially intended for the SSM Regression case
-    An object to represent a Gaussian posterior over latent function values, p(f|D).
-
-    The purpose of this class is to serve as an interface between the inference
-    schemes and the model classes.
-
-    """
-    def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
-        """
-        mu_f : mean values predicted during kalman filtering step
-        var_f : variance predicted during the kalman filtering step
-        mu_s : mean values predicted during backwards smoothing step
-        var_s : variance predicted during backwards smoothing step
-        expectations : posterior expectations
-        """
-
-        if ((mu_f is not None) and (V_f is not None) and
-            (mu_s is not None) and (V_s is not None) and 
-            (expectations is not None)):
-            pass # we have sufficient to compute the posterior
-        else:
-            raise ValueError("insufficient information to compute predictions")
-
-        self._mu_f = mu_f
-        self._V_f = V_f
-        self._mu_s = mu_s
-        self._V_s = V_s
-        self._expectations = expectations
-
-    @property
-    def mu_f(self):
-        """
-        Mean values predicted during kalman filtering step mean
-        """
-        return self._mu_f
-
-    @property
-    def V_f(self):
-        """
-        Variance predicted during the kalman filtering step
-        """
-        return self._V_f
-
-    @property
-    def mu_s(self):
-        """
-        Mean values predicted during kalman backwards smoothin mean
-        """
-        return self._mu_s
-
-    @property
-    def V_s(self):
-        """
-        Variance predicted during backwards smoothing step
-        """
-        return self._V_s
-
-    @property
-    def expectations(self):
-        """
-        Posterior expectations
-        """
-        return self._expectations
-    
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index ffa906eb..65ca12de 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -29,5 +29,4 @@ from .src.splitKern import SplitKern,DEtime
 from .src.splitKern import DEtime as DiffGenomeKern
 from .src.spline import Spline
 from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
-from .src.ssmKerns import Matern32_SSM
-from .src.gridKerns import GridRBF
+from .src.grid_kerns import GridRBF
diff --git a/GPy/kern/src/gridKerns.py b/GPy/kern/src/grid_kerns.py
similarity index 96%
rename from GPy/kern/src/gridKerns.py
rename to GPy/kern/src/grid_kerns.py
index 98804544..a043e159 100644
--- a/GPy/kern/src/gridKerns.py
+++ b/GPy/kern/src/grid_kerns.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 from stationary import Stationary
-from ...util.config import *
-from ...util.caching import Cache_this
+from paramz.caching import Cache_this
+
 
 class GridKern(Stationary):
 
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index 557366ae..a51a4ba4 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -7,6 +7,7 @@ from .stationary import Stationary
 from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
 from ...core import Param
 from paramz.transformations import Logexp
+from .gridKerns import GridRBF
 
 class RBF(Stationary):
     """
diff --git a/GPy/kern/src/ssm_kerns.py b/GPy/kern/src/ssm_kerns.py
deleted file mode 100644
index aa988e52..00000000
--- a/GPy/kern/src/ssm_kerns.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-#
-# Kurt Cutajar
-
-from kern import Kern
-from ... import util
-import numpy as np
-from scipy import integrate, weave
-from ...core.parameterization import Param
-from ...core.parameterization.transformations import Logexp
-
-class SsmKerns(Kern):
-    """
-    Kernels suitable for GP SSM regression with one-dimensional inputs
-
-    """
-
-    def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
-    	
-        super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
-        self.lengthscale = np.abs(lengthscale)
-        self.variance = Param('variance', variance, Logexp())
-
-    def K_of_r(self, r):
-        raise NotImplementedError("implement the covariance function as a fn of r to use this class")
-
-    def dK_dr(self, r):
-        raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
-
-    def Q_of_r(self, r):
-        raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
-
-    def Phi_of_r(self, r):
-        raise NotImplementedError("implement transition function of SDE wrt r to use this class")
-
-    def noise(self):
-    	raise NotImplementedError("implement noise function of SDE to use this class")
-
-   	def dPhidLam(self, r):
-		raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
-
-    def dQ(self, r):
-    	raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
-
-
-class Matern32_SSM(SsmKerns):
-    """
-    Matern 3/2 kernel:
-
-    .. math::
-
-       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-
-    def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
-        super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
-        lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
-        self.lam = Param('lambda', lambd, Logexp())
-        self.link_parameters(self.lam, self.variance)
-        self.order = 2
-
-    def noise(self):
-        """
-        Compute noise for the kernel
-        """
-        p = 1
-        lp_fact = np.sum(np.log(range(1,p+1)))
-        l2p_fact = np.sum(np.log(range(1,2*p +1)))
-        logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
-        q = np.exp(logq)
-        return q
-
-    def K_of_r(self, r):
-        lengthscale = np.sqrt(3.) / self.lam
-        r = r / lengthscale
-        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
-
-    def dK_dr(self,r):
-        return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
-
-    def Q_of_r(self, r):
-        """
-        Compute process variance (Q)
-        """
-        q = self.noise()
-        Q = np.zeros((2,2))
-        Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2) 
-                            + 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
-        Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
-        Q[1][0] = Q[0][1]
-        Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2) 
-                            - 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
-        return q*Q
-
-    def Phi_of_r(self, r):
-        """
-        Compute transition function (Phi)
-        """
-        if r < 0:
-            phi = np.zeros((2,2))
-            phi[0][0] = self.variance
-            phi[0][1] = 0
-            phi[1][0] = 0
-            phi[1][1] = np.power(self.lam,2)*self.variance
-            return phi
-        else:
-            mult = np.exp(-self.lam*r)
-            phi = np.zeros((2, 2))
-            phi[0][0] = (1 + self.lam*r)*mult
-            phi[0][1] = mult*r
-            phi[1][0] = -r*mult*self.lam**2
-            phi[1][1] = mult*(1 - self.lam*r)
-            return phi
-
-    def dPhidLam(self, r):
-        """
-        Compute derivative of transition function (Phi) wrt lambda
-        """
-    	mult = np.exp(self.lam*r)
-    	dPhi = np.zeros((2, 2))
-    	dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
-    	dPhi[0][1] = (-1 * (r**2)) / mult
-    	dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
-    	dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
-    	return dPhi
-
-    def dQ(self, r):
-        """
-        Compute derivatives of Q with respect to lambda and variance
-        """
-        if (r == -1):
-            dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
-            dQdVar = None
-        else:
-            q = self.noise()
-            Q = self.Q_of_r(r)
-            dQdLam = np.zeros((2, 2))
-            mult = np.exp(2*self.lam*r)
-            dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
-            dQdLam[0][1] = -(r**3)/mult
-            dQdLam[1][0] = dQdLam[0][1]
-            dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
-            dq = (q*(2+1))/self.lam
-            dQdLam = q*dQdLam + dq*(Q/q)
-            dQdVar = Q / self.variance
-    	return [dQdLam, dQdVar]
\ No newline at end of file
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 197d2ac4..1452c562 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -317,7 +317,7 @@ class Stationary(Kern):
     def input_sensitivity(self, summarize=True):
         return self.variance*np.ones(self.input_dim)/self.lengthscale**2
 
-      def get_one_dimensional_kernel(self, dimensions):
+    def get_one_dimensional_kernel(self, dimensions):
         """
         Specially intended for the grid regression case
         For a given covariance kernel, this method returns the corresponding kernel for
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 4d645bea..3e76a477 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -22,3 +22,4 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
 from .one_vs_all_classification import OneVsAllClassification
 from .one_vs_all_sparse_classification import OneVsAllSparseClassification
 from .dpgplvm import DPBayesianGPLVM
+from .gp_grid_regression import GPRegressionGrid
\ No newline at end of file
diff --git a/GPy/models/gp_grid_regression.py b/GPy/models/gp_grid_regression.py
new file mode 100644
index 00000000..fa4838fd
--- /dev/null
+++ b/GPy/models/gp_grid_regression.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+from ..core import GpGrid
+from .. import likelihoods
+from .. import kern
+
+class GPRegressionGrid(GpGrid):
+    """
+    Gaussian Process model for grid inputs using Kronecker products
+
+    This is a thin wrapper around the models.GpGrid class, with a set of sensible defaults
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to the kron variation of SqExp
+    :param Norm normalizer: [False]
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
+
+        if kernel is None:
+            kernel = kern.RBF(1)   # no other kernels implemented so far
+
+        likelihood = likelihoods.Gaussian()
+        super(GPRegressionGrid, self).__init__(X, Y, kernel, likelihood, name='GP Grid regression', Y_metadata=Y_metadata, normalizer=normalizer)
+
diff --git a/GPy/testing/grid_tests.py b/GPy/testing/grid_tests.py
new file mode 100644
index 00000000..e55efb18
--- /dev/null
+++ b/GPy/testing/grid_tests.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import unittest
+import numpy as np
+import GPy
+
+class GridModelTest(unittest.TestCase):
+    def setUp(self):
+        ######################################
+        # # 3 dimensional example
+
+        # sample inputs and outputs
+        self.X = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1],[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
+        self.Y = np.random.randn(8, 1) * 100
+        self.dim = self.X.shape[1]
+
+    def test_alpha_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(m.posterior.alpha, m2.posterior.woodbury_vector)
+
+    def test_gradient_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(kernel.variance.gradient, kernel2.variance.gradient)
+        np.testing.assert_almost_equal(kernel.lengthscale.gradient, kernel2.lengthscale.gradient)
+        np.testing.assert_almost_equal(m.likelihood.variance.gradient, m2.likelihood.variance.gradient)
+
+
+    def test_prediction_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        test = np.array([[0,0,2],[-1,3,-4]])
+
+        np.testing.assert_almost_equal(m.predict(test), m2.predict(test))
+
diff --git a/doc/source/GPy.core.rst b/doc/source/GPy.core.rst
index 66878101..a42acd9c 100644
--- a/doc/source/GPy.core.rst
+++ b/doc/source/GPy.core.rst
@@ -59,6 +59,14 @@ GPy.core.svgp module
     :undoc-members:
     :show-inheritance:
 
+GPy.core.gp_grid module
+------------------------
+
+.. automodule:: GPy.core.gp_grid
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.core.symbolic module
 ------------------------
 
diff --git a/doc/source/GPy.inference.latent_function_inference.rst b/doc/source/GPy.inference.latent_function_inference.rst
index c374e73b..f170003c 100644
--- a/doc/source/GPy.inference.latent_function_inference.rst
+++ b/doc/source/GPy.inference.latent_function_inference.rst
@@ -60,6 +60,14 @@ GPy.inference.latent_function_inference.posterior module
     :undoc-members:
     :show-inheritance:
 
+GPy.inference.latent_function_inference.grid_posterior module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.grid_posterior
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.inference.latent_function_inference.svgp module
 ---------------------------------------------------
 
@@ -92,6 +100,14 @@ GPy.inference.latent_function_inference.var_gauss module
     :undoc-members:
     :show-inheritance:
 
+GPy.inference.latent_function_inference.gaussian_grid_inference module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.gaussian_grid_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 
 Module contents
 ---------------
diff --git a/doc/source/GPy.kern.src.rst b/doc/source/GPy.kern.src.rst
index ccbc3f99..166b3cba 100644
--- a/doc/source/GPy.kern.src.rst
+++ b/doc/source/GPy.kern.src.rst
@@ -171,6 +171,14 @@ GPy.kern.src.spline module
     :undoc-members:
     :show-inheritance:
 
+GPy.kern.src.grid_kerns module
+-----------------------------
+
+.. automodule:: GPy.kern.src.grid_kerns
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.kern.src.splitKern module
 -----------------------------
 
diff --git a/doc/source/GPy.testing.rst b/doc/source/GPy.testing.rst
index a10c3d18..5cd144cb 100644
--- a/doc/source/GPy.testing.rst
+++ b/doc/source/GPy.testing.rst
@@ -197,6 +197,14 @@ GPy.testing.svgp_tests module
     :show-inheritance:
 
 
+GPy.testing.grid_tests module
+-----------------------------
+
+.. automodule:: GPy.testing.grid_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Module contents
 ---------------
 

From 1fc93236c46ddd1b7bd7f73ef26dc51af4cd2181 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Sun, 1 May 2016 22:20:26 +0200
Subject: [PATCH 04/13] Minor fix

---
 GPy/core/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index a6abfb06..49e9c3c7 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -3,7 +3,7 @@
 
 from GPy.core.model import Model
 from .parameterization import Param, Parameterized
-#from . import parameterization
+from . import parameterization
 
 from .gp import GP
 from .svgp import SVGP

From 9a2670b98e064ff0afdf4a10b1ca949f66098f67 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Tue, 22 Mar 2016 08:53:39 +0100
Subject: [PATCH 05/13] Added core code for GpSSM and GpGrid

---
 GPy/core/__init__.py                          |   2 +
 GPy/core/gp_grid.py                           | 116 ++++++++
 GPy/core/gp_ssm.py                            | 253 ++++++++++++++++++
 .../latent_function_inference/__init__.py     |   3 +
 4 files changed, 374 insertions(+)
 create mode 100644 GPy/core/gp_grid.py
 create mode 100644 GPy/core/gp_ssm.py

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index b0743916..76d8bb90 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -8,6 +8,8 @@ from . import parameterization
 from .gp import GP
 from .svgp import SVGP
 from .sparse_gp import SparseGP
+from .gp_ssm import GpSSM
+from .gp_grid import GpGrid
 from .mapping import *
 
 
diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
new file mode 100644
index 00000000..3d061bb0
--- /dev/null
+++ b/GPy/core/gp_grid.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+#This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+import numpy as np
+import scipy.linalg as sp
+from gp import GP
+from parameterization.param import Param
+from ..inference.latent_function_inference import gaussian_grid_inference
+from .. import likelihoods
+
+import logging
+from GPy.inference.latent_function_inference.posterior import Posterior
+logger = logging.getLogger("gp grid")
+
+class GpGrid(GP):
+    """
+    A GP model for Grid inputs
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+
+    """
+
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
+                 name='gp grid', Y_metadata=None, normalizer=False):
+        #pick a sensible inference method
+
+        inference_method = gaussian_grid_inference.GaussianGridInference()
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        self.posterior = None
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+        """
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
+        self.kern.update_gradients_direct(self.grad_dict['dL_dVar'], self.grad_dict['dL_dLen'])
+
+    def kron_mmprod(self, A, B):
+        count = 0
+        D = len(A)
+        for b in (B.T):
+            x = b
+            N = 1
+            G = np.zeros(D)
+            for d in xrange(D):
+                G[d] = len(A[d])
+            N = np.prod(G)
+            for d in xrange(D-1, -1, -1):
+                X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+                Z = np.dot(A[d], X)
+                Z = Z.T
+                x = np.reshape(Z, (-1, 1), order='F')
+            if (count == 0):
+                result = x
+            else:
+                result = np.column_stack((result, x))
+            count+=1
+        return result
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        """
+        if kern is None:
+            kern = self.kern
+
+        # compute mean predictions
+        Kmn = kern.K(Xnew, self.X)
+        alpha_kron = self.posterior.alpha
+        mu = np.dot(Kmn, alpha_kron)
+        mu = mu.reshape(-1,1)
+
+        # compute variance of predictions
+        Knm = Kmn.T        
+        noise = self.likelihood.variance
+        V_kron = self.posterior.V_kron
+        Qs = self.posterior.Qs
+        QTs = self.posterior.QTs
+        A = self.kron_mmprod(QTs, Knm)
+        V_kron = V_kron.reshape(-1, 1)
+        A = A / (V_kron + noise)
+        A = self.kron_mmprod(Qs, A)
+
+        Kmm = kern.K(Xnew)
+        var = np.diag(Kmm - np.dot(Kmn, A)).copy()
+        #var = np.zeros((Xnew.shape[0]))
+        var = var.reshape(-1, 1)
+
+        return mu, var
diff --git a/GPy/core/gp_ssm.py b/GPy/core/gp_ssm.py
new file mode 100644
index 00000000..5426d010
--- /dev/null
+++ b/GPy/core/gp_ssm.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+import numpy as np
+import scipy.linalg as sp
+from gp import GP
+from parameterization.param import Param
+from ..inference.latent_function_inference import gaussian_ssm_inference
+from .. import likelihoods
+from ..inference import optimization
+from parameterization.transformations import Logexp
+from GPy.inference.latent_function_inference.posterior import Posterior
+
+class GpSSM(GP):
+    """
+    A GP model for sorted one-dimensional inputs
+
+    This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+
+    """
+
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
+                 name='gp ssm', Y_metadata=None, normalizer=False):
+        #pick a sensible inference method
+
+        inference_method = gaussian_ssm_inference.GaussianSSMInference()
+
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+
+        self.posterior = None
+
+    def optimize(self, optimizer=None, start=None, **kwargs):
+        prevLikelihood = 0
+        count = 0
+        change = 1
+        while ((change > 0.001) and (count < 50)):
+            posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
+            expectations = posterior.expectations
+            self.optimize_params(expectations=expectations)
+            change = np.abs(likelihood - prevLikelihood)
+            prevLikelihood = likelihood
+            count = count + 1
+
+    def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
+        if self.is_fixed:
+            print("nothing to optimize")
+        if self.size == 0:
+            print("nothing to optimize")
+
+        if not self.update_model():
+            print("Updates were off, setting updates on again")
+            self.update_model(True)
+
+        if start == None:
+            start = self.optimizer_array
+
+        if optimizer is None:
+            optimizer = self.preferred_optimizer
+
+        if isinstance(optimizer, optimization.Optimizer):
+            opt = optimizer
+            opt.model = self
+        else:
+            optimizer = optimization.get_optimizer(optimizer)
+            opt = optimizer(start, model=self, **kwargs)
+
+        opt.max_iters = 1
+
+        opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
+        self.optimization_runs.append(opt)
+        self.optimizer_array = opt.x_opt
+
+    def param_maximisation_step(self, loghyper, X, Y, E, *args):
+
+        loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
+        lam = loghyper[0]
+        sig = loghyper[1]
+        noise = loghyper[2]
+
+        kern = self.kern
+        order = kern.order
+
+        K = len(X)
+        mu_0 = np.zeros((order, 1))
+        v_0 = kern.Phi_of_r(-1)
+        dvSig = v_0/sig
+        dvLam = kern.dQ(-1)[0]
+
+        mu = E[0][0]
+        V = E[0][1]
+        E11 = V + np.dot(mu, mu.T)
+        V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
+        Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
+        dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
+        dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
+
+        for t in range(1, K):
+            delta = X[t] - X[t-1]
+            Q = kern.Q_of_r(delta)
+            Phi = kern.Phi_of_r(delta)
+            dPhi = kern.dPhidLam(delta)
+            [dLam, dSig] = kern.dQ(delta)
+            mu_prev = E[t-1][0]
+            V_prev = E[t-1][1]
+            mu = E[t][0]
+            V = E[t][1]
+            Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
+            Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
+            Ett = V + np.dot(mu, mu.T)
+            CC = sp.cholesky(Q, lower=True)
+            Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
+
+            Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
+            A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
+            dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))        
+            A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
+            dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
+
+        dUb_noise = 0
+        for t in xrange(K):
+            mu = E[t][0]
+            V = E[t][1]
+            Ett = V + np.dot(mu, mu.T)
+            Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
+            dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
+
+        dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
+        
+        return Ub.item(), dUb
+
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+
+        We override the method in the parent class since we do not handle updates to the standard gradients.
+        """
+        self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        Make a prediction for the latent function values
+        N.B. It is assumed that input points are sorted
+        """
+        if kern is None:
+            kern = self.kern
+
+        X = self.X
+        K = X.shape[0]
+        K_new = Xnew.shape[0]
+        order = kern.order
+        mean_pred = np.zeros(K_new)
+        var_pred = np.zeros(K_new)
+        H = np.zeros((1,order))
+        H[0][0] = 1
+
+        count = 0
+        for t in xrange(K_new):
+            while ((count < K) and (Xnew[t] > X[count])):
+                count += 1
+
+            if (count == 0):
+                mu = np.zeros((order, 1))
+                V = kern.Phi_of_r(-1)
+
+                delta = np.abs(Xnew[t] - X[count])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+
+                mu_s = self.posterior.mu_s[count]
+                V_s = self.posterior.V_s[count]
+                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
+                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+
+                mean_pred[t] = np.dot(H, mu_s)
+                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
+
+            elif (count == K):
+                # forwards phase
+                delta = np.abs(Xnew[t] - X[count-1])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                mu_f = self.posterior.mu_f[count-1]
+                V_f = self.posterior.V_f[count-1]
+
+                mu = np.dot(Phi, mu_f)
+                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
+                V = P
+
+                mean_pred[t] = np.dot(H,mu)
+                var_pred[t] = np.dot(np.dot(H, V), H.T)
+
+            else:
+                # forwards phase
+                delta = np.abs(Xnew[t] - X[count-1])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                mu_f = self.posterior.mu_f[count-1]
+                V_f = self.posterior.V_f[count-1]
+                mu = np.dot(Phi, mu_f)
+                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
+                V = P
+
+                delta = np.abs(Xnew[t] - X[count])
+                Phi = kern.Phi_of_r(delta)
+                Q = kern.Q_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+
+                # backwards phase
+                mu_s = self.posterior.mu_s[count]
+                V_s = self.posterior.V_s[count]
+
+                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
+                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+
+                mean_pred[t] = np.dot(H, mu_s)
+                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
+
+
+        mean_pred = mean_pred.reshape(-1, 1)
+        var_pred = var_pred.reshape(-1, 1)
+
+        return mean_pred, var_pred
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index eabc6cc9..ba9099b6 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -69,6 +69,9 @@ from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
 from .var_gauss import VarGauss
+from .gaussian_ssm_inference import GaussianSSMInference
+from .gaussian_grid_inference import GaussianGridInference
+
 
 # class FullLatentFunctionData(object):
 #

From 3d346cbdd6e0b418b047ee0ffb9a6b938d236faa Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Tue, 22 Mar 2016 09:09:30 +0100
Subject: [PATCH 06/13] Added kernels for GpGrid and GpSsm regression

---
 .../gaussian_grid_inference.py                | 113 +++++++++++++
 .../gaussian_ssm_inference.py                 | 140 +++++++++++++++++
 .../grid_posterior.py                         |  62 ++++++++
 .../ssm_posterior.py                          |  73 +++++++++
 GPy/kern/__init__.py                          |   3 +-
 GPy/kern/src/gridKerns.py                     |  76 +++++++++
 GPy/kern/src/rbf.py                           |   8 +
 GPy/kern/src/ssm_kerns.py                     | 148 ++++++++++++++++++
 GPy/kern/src/stationary.py                    |  19 +++
 9 files changed, 641 insertions(+), 1 deletion(-)
 create mode 100644 GPy/inference/latent_function_inference/gaussian_grid_inference.py
 create mode 100644 GPy/inference/latent_function_inference/gaussian_ssm_inference.py
 create mode 100644 GPy/inference/latent_function_inference/grid_posterior.py
 create mode 100644 GPy/inference/latent_function_inference/ssm_posterior.py
 create mode 100644 GPy/kern/src/gridKerns.py
 create mode 100644 GPy/kern/src/ssm_kerns.py

diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
new file mode 100644
index 00000000..8917abea
--- /dev/null
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from grid_posterior import GridPosterior
+import numpy as np
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+class GaussianGridInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian and inputs are on a grid.
+
+    The function self.inference returns a GridPosterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass
+
+    def kron_mvprod(self, A, b):
+        x = b
+        N = 1
+        D = len(A)
+        G = np.zeros((D,1))
+        for d in xrange(0, D):
+            G[d] = len(A[d])
+        N = np.prod(G)
+        for d in xrange(D-1, -1, -1):
+            X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+            Z = np.dot(A[d], X)
+            Z = Z.T
+            x = np.reshape(Z, (-1, 1), order='F')
+        return x
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a GridPosterior class containing essential quantities of the posterior
+        """
+        N = X.shape[0] #number of training points
+        D = X.shape[1] #number of dimensions
+
+        Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension
+        Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension
+        QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension
+        V_kron = 1 # kronecker product of eigenvalues
+
+        # retrieve the one-dimensional variation of the designated kernel
+        oneDkernel = kern.getOneDimensionalKernel(D)
+
+        for d in xrange(D):
+            xg = list(set(X[:,d])) #extract unique values for a dimension
+            xg = np.reshape(xg, (len(xg), 1))
+            oneDkernel.lengthscale = kern.lengthscale[d]
+            Kds[d] = oneDkernel.K(xg)
+            [V, Q] = np.linalg.eig(Kds[d])
+            V_kron = np.kron(V_kron, V)
+            Qs[d] = Q
+            QTs[d] = Q.T
+
+        noise = likelihood.variance + 1e-8
+
+        alpha_kron = self.kron_mvprod(QTs, Y)
+        V_kron = V_kron.reshape(-1, 1)
+        alpha_kron = alpha_kron / (V_kron + noise)
+        alpha_kron = self.kron_mvprod(Qs, alpha_kron)
+
+        log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi)
+
+        # compute derivatives wrt parameters Thete
+        derivs = np.zeros(D+2, dtype='object')
+        for t in xrange(len(derivs)):
+            dKd_dTheta = np.zeros(D, dtype='object')
+            gamma = np.zeros(D, dtype='object')
+            gam = 1
+            for d in xrange(D):
+                xg = list(set(X[:,d]))
+                xg = np.reshape(xg, (len(xg), 1))
+                oneDkernel.lengthscale = kern.lengthscale[d]
+                if t < D:
+                    dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale
+                elif (t == D):
+                    dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance
+                else:
+                    dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise
+                gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d]))
+                gam = np.kron(gam, gamma[d])
+            
+            gam = gam.reshape(-1,1)
+            kappa = self.kron_mvprod(dKd_dTheta, alpha_kron)
+            derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise))
+
+        # separate derivatives
+        dL_dLen = derivs[:D]
+        dL_dVar = derivs[D]
+        dL_dThetaL = derivs[D+1]
+
+        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
diff --git a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
new file mode 100644
index 00000000..4eb441cb
--- /dev/null
+++ b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from ssm_posterior import SsmPosterior
+from ...util.linalg import pdinv, dpotrs, tdot
+from ...util import diag
+import numpy as np
+import math as mt
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+
+class GaussianSSMInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian.
+
+    The function self.inference returns a Posterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass#self._YYTfactor_cache = caching.cache()
+
+    def get_YYTfactor(self, Y):
+        """
+        find a matrix L which satisfies LL^T = YY^T.
+
+        Note that L may have fewer columns than Y, else L=Y.
+        """
+        N, D = Y.shape
+        if (N>D):
+            return Y
+        else:
+            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
+            #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
+            return Y
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a Posterior class containing essential quantities of the posterior
+        """
+        order = kern.order
+        K = X.shape[0]
+        log_likelihood = 0
+        results = np.zeros((K,4),dtype=object)
+        H = np.zeros((1,order))
+        H[0][0] = 1
+        v_0 = kern.Phi_of_r(-1)
+        mu_0 = np.zeros((order, 1))
+        noise_var = likelihood.variance + 1e-8
+
+        # carry out forward filtering
+        for t in range(K):
+            if (t == 0):
+                prior_m = np.dot(H,mu_0)
+                prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
+
+                log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
+
+                kalman_gain = np.dot(v_0, H.T) / prior_v
+                mu = mu_0 + kalman_gain*(Y[0] - prior_m)
+
+
+                V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
+                results[0][0] = mu
+                results[0][1] = V
+            else:
+                delta = X[t] - X[t-1]
+                Q = kern.Q_of_r(delta)
+                Phi = kern.Phi_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+                PhiMu = np.dot(Phi, mu)
+                prior_m = np.dot(H, PhiMu)
+                prior_v = np.dot(np.dot(H, P), H.T) + noise_var
+
+                log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
+                log_likelihood += log_likelihood_i
+
+                kalman_gain = np.dot(P, H.T)/prior_v
+                mu = PhiMu + kalman_gain*(Y[t] - prior_m)
+                V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
+                
+                results[t-1][2] = Phi
+                results[t-1][3] = P
+                results[t][0] = mu
+                results[t][1] = V
+
+        # carry out backwards smoothing
+
+        W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
+
+        mu_s = results[K-1][0]
+        V_s = results[K-1][1]
+
+        posterior_mean = np.zeros((K,1))
+        posterior_var = np.zeros((K,1))
+        E = np.zeros((K,4), dtype='object')
+
+        posterior_mean[K-1] = np.dot(H, mu_s)
+        posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
+        E[K-1][0] = mu_s
+        E[K-1][1] = V_s
+
+        for t in range(K-2, -1, -1):
+            mu = results[t][0]
+            V = results[t][1]
+            Phi = results[t][2]
+            P = results[t][3]
+            
+            L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
+            mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+            V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+            posterior_mean[t] = np.dot(H, mu_s)
+            posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
+
+            if (t < K-2):
+                W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
+
+            E[t][0] = mu_s
+            E[t][1] = V_s
+            E[t][2] = L
+            E[t][3] = W
+
+        return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
diff --git a/GPy/inference/latent_function_inference/grid_posterior.py b/GPy/inference/latent_function_inference/grid_posterior.py
new file mode 100644
index 00000000..da86dd84
--- /dev/null
+++ b/GPy/inference/latent_function_inference/grid_posterior.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class GridPosterior(object):
+    """
+    Specially intended for the Grid Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None):
+        """
+        alpha_kron : 
+        QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices
+        Qs : eigen vectors resulting from decomposition of single dimension covariance matrices
+        V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices
+        """
+
+        if ((alpha_kron is not None) and (QTs is not None) 
+            and (Qs is not None) and (V_kron is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information for predictions")
+
+        self._alpha_kron = alpha_kron
+        self._qTs = QTs
+        self._qs = Qs
+        self._v_kron = V_kron
+
+    @property
+    def alpha(self):
+        """
+        """
+        return self._alpha_kron
+
+    @property
+    def QTs(self):
+        """
+        array of transposed eigenvectors resulting for single dimension covariance
+        """
+        return self._qTs
+
+    @property
+    def Qs(self):
+        """
+        array of eigenvectors resulting for single dimension covariance
+        """
+        return self._qs
+
+    @property
+    def V_kron(self):
+        """
+        kronecker product of eigenvalues s
+        """
+        return self._v_kron
+    
diff --git a/GPy/inference/latent_function_inference/ssm_posterior.py b/GPy/inference/latent_function_inference/ssm_posterior.py
new file mode 100644
index 00000000..1b87ddb3
--- /dev/null
+++ b/GPy/inference/latent_function_inference/ssm_posterior.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class SsmPosterior(object):
+    """
+    Specially intended for the SSM Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
+        """
+        mu_f : mean values predicted during kalman filtering step
+        var_f : variance predicted during the kalman filtering step
+        mu_s : mean values predicted during backwards smoothing step
+        var_s : variance predicted during backwards smoothing step
+        expectations : posterior expectations
+        """
+
+        if ((mu_f is not None) and (V_f is not None) and
+            (mu_s is not None) and (V_s is not None) and 
+            (expectations is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information to compute predictions")
+
+        self._mu_f = mu_f
+        self._V_f = V_f
+        self._mu_s = mu_s
+        self._V_s = V_s
+        self._expectations = expectations
+
+    @property
+    def mu_f(self):
+        """
+        Mean values predicted during kalman filtering step mean
+        """
+        return self._mu_f
+
+    @property
+    def V_f(self):
+        """
+        Variance predicted during the kalman filtering step
+        """
+        return self._V_f
+
+    @property
+    def mu_s(self):
+        """
+        Mean values predicted during kalman backwards smoothin mean
+        """
+        return self._mu_s
+
+    @property
+    def V_s(self):
+        """
+        Variance predicted during backwards smoothing step
+        """
+        return self._V_s
+
+    @property
+    def expectations(self):
+        """
+        Posterior expectations
+        """
+        return self._expectations
+    
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 7f44b6a9..041159f6 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -29,7 +29,6 @@ from .src.splitKern import SplitKern,DEtime
 from .src.splitKern import DEtime as DiffGenomeKern
 from .src.spline import Spline
 from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
-
 from .src.sde_matern import sde_Matern32
 from .src.sde_matern import sde_Matern52
 from .src.sde_linear import sde_Linear
@@ -37,3 +36,5 @@ from .src.sde_standard_periodic import sde_StdPeriodic
 from .src.sde_static import sde_White, sde_Bias
 from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
 from .src.sde_brownian import sde_Brownian
+from .src.ssmKerns import Matern32_SSM
+from .src.gridKerns import GridRBF
diff --git a/GPy/kern/src/gridKerns.py b/GPy/kern/src/gridKerns.py
new file mode 100644
index 00000000..98804544
--- /dev/null
+++ b/GPy/kern/src/gridKerns.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+from stationary import Stationary
+from ...util.config import *
+from ...util.caching import Cache_this
+
+class GridKern(Stationary):
+
+	def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=False):
+		super(GridKern, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
+		self.originalDimensions = originalDimensions
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dVar(self, X, X2=None):
+		"""
+		Derivative of Kernel function wrt variance applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dVar(X, X2) = dKdVar_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdVar_of_r(r)
+
+	@Cache_this(limit=3, ignore_args=())
+	def dKd_dLen(self, X, dimension, lengthscale, X2=None):
+		"""
+		Derivate of Kernel function wrt lengthscale applied on inputs X and X2.
+		In the stationary case there is an inner function depending on the
+		distances from X to X2, called r.
+
+		dKd_dLen(X, X2) = dKdLen_of_r((X-X2)**2)
+		"""
+		r = self._scaled_dist(X, X2)
+		return self.dKdLen_of_r(r, dimension, lengthscale)
+
+class GridRBF(GridKern):
+	"""
+	Similar to regular RBF but supplemented with methods required for Gaussian grid regression
+	Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
+
+	.. math::
+
+	   k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
+
+	"""
+	_support_GPU = True
+	def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='gridRBF', originalDimensions=1, useGPU=False):
+		super(GridRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=useGPU)
+
+	def K_of_r(self, r):
+		return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 *  r**2)
+
+	def dKdVar_of_r(self, r):
+		"""
+		Compute derivative of kernel wrt variance
+		"""
+		return np.exp(-0.5 * r**2)
+
+	def dKdLen_of_r(self, r, dimCheck, lengthscale):
+		"""
+		Compute derivative of kernel for dimension wrt lengthscale
+		Computation of derivative changes when lengthscale corresponds to
+		the dimension of the kernel whose derivate is being computed. 
+		"""
+		if (dimCheck == True):
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) * (r**2) / (lengthscale**(float(1)/self.originalDimensions))
+		else:
+			return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) / (lengthscale**(float(1)/self.originalDimensions))
+
+	def dK_dr(self, r):
+		return -r*self.K_of_r(r)
\ No newline at end of file
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index ff86561d..557366ae 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -58,6 +58,14 @@ class RBF(Stationary):
         if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
         super(RBF,self).parameters_changed()
 
+
+    def get_one_dimensional_kernel(self, dim):
+        """
+        Specially intended for Grid regression.
+        """
+        oneDkernel = GridRBF(input_dim=1, variance=self.variance.copy(), originalDimensions=dim)
+        return oneDkernel
+
     #---------------------------------------#
     #             PSI statistics            #
     #---------------------------------------#
diff --git a/GPy/kern/src/ssm_kerns.py b/GPy/kern/src/ssm_kerns.py
new file mode 100644
index 00000000..aa988e52
--- /dev/null
+++ b/GPy/kern/src/ssm_kerns.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+#
+# Kurt Cutajar
+
+from kern import Kern
+from ... import util
+import numpy as np
+from scipy import integrate, weave
+from ...core.parameterization import Param
+from ...core.parameterization.transformations import Logexp
+
+class SsmKerns(Kern):
+    """
+    Kernels suitable for GP SSM regression with one-dimensional inputs
+
+    """
+
+    def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
+    	
+        super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
+        self.lengthscale = np.abs(lengthscale)
+        self.variance = Param('variance', variance, Logexp())
+
+    def K_of_r(self, r):
+        raise NotImplementedError("implement the covariance function as a fn of r to use this class")
+
+    def dK_dr(self, r):
+        raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
+
+    def Q_of_r(self, r):
+        raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
+
+    def Phi_of_r(self, r):
+        raise NotImplementedError("implement transition function of SDE wrt r to use this class")
+
+    def noise(self):
+    	raise NotImplementedError("implement noise function of SDE to use this class")
+
+   	def dPhidLam(self, r):
+		raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
+
+    def dQ(self, r):
+    	raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
+
+
+class Matern32_SSM(SsmKerns):
+    """
+    Matern 3/2 kernel:
+
+    .. math::
+
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    """
+
+    def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
+        super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
+        lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
+        self.lam = Param('lambda', lambd, Logexp())
+        self.link_parameters(self.lam, self.variance)
+        self.order = 2
+
+    def noise(self):
+        """
+        Compute noise for the kernel
+        """
+        p = 1
+        lp_fact = np.sum(np.log(range(1,p+1)))
+        l2p_fact = np.sum(np.log(range(1,2*p +1)))
+        logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
+        q = np.exp(logq)
+        return q
+
+    def K_of_r(self, r):
+        lengthscale = np.sqrt(3.) / self.lam
+        r = r / lengthscale
+        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
+
+    def dK_dr(self,r):
+        return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
+
+    def Q_of_r(self, r):
+        """
+        Compute process variance (Q)
+        """
+        q = self.noise()
+        Q = np.zeros((2,2))
+        Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2) 
+                            + 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
+        Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
+        Q[1][0] = Q[0][1]
+        Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2) 
+                            - 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
+        return q*Q
+
+    def Phi_of_r(self, r):
+        """
+        Compute transition function (Phi)
+        """
+        if r < 0:
+            phi = np.zeros((2,2))
+            phi[0][0] = self.variance
+            phi[0][1] = 0
+            phi[1][0] = 0
+            phi[1][1] = np.power(self.lam,2)*self.variance
+            return phi
+        else:
+            mult = np.exp(-self.lam*r)
+            phi = np.zeros((2, 2))
+            phi[0][0] = (1 + self.lam*r)*mult
+            phi[0][1] = mult*r
+            phi[1][0] = -r*mult*self.lam**2
+            phi[1][1] = mult*(1 - self.lam*r)
+            return phi
+
+    def dPhidLam(self, r):
+        """
+        Compute derivative of transition function (Phi) wrt lambda
+        """
+    	mult = np.exp(self.lam*r)
+    	dPhi = np.zeros((2, 2))
+    	dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
+    	dPhi[0][1] = (-1 * (r**2)) / mult
+    	dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
+    	dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
+    	return dPhi
+
+    def dQ(self, r):
+        """
+        Compute derivatives of Q with respect to lambda and variance
+        """
+        if (r == -1):
+            dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
+            dQdVar = None
+        else:
+            q = self.noise()
+            Q = self.Q_of_r(r)
+            dQdLam = np.zeros((2, 2))
+            mult = np.exp(2*self.lam*r)
+            dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
+            dQdLam[0][1] = -(r**3)/mult
+            dQdLam[1][0] = dQdLam[0][1]
+            dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
+            dq = (q*(2+1))/self.lam
+            dQdLam = q*dQdLam + dq*(Q/q)
+            dQdVar = Q / self.variance
+    	return [dQdLam, dQdVar]
\ No newline at end of file
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 5e137abb..a8ae3ac4 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -189,6 +189,16 @@ class Stationary(Kern):
             self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
 
 
+    def update_gradients_direct(self, dL_dVar, dL_dLen):
+        """
+        Specially intended for the Grid regression case.
+        Given the computed log likelihood derivates, update the corresponding
+        kernel and likelihood gradients.
+        Useful for when gradients have been computed a priori.
+        """
+        self.variance.gradient = dL_dVar
+        self.lengthscale.gradient = dL_dLen
+
     def _inv_dist(self, X, X2=None):
         """
         Compute the elementwise inverse of the distance matrix, expecpt on the
@@ -307,6 +317,15 @@ class Stationary(Kern):
     def input_sensitivity(self, summarize=True):
         return self.variance*np.ones(self.input_dim)/self.lengthscale**2
 
+      def get_one_dimensional_kernel(self, dimensions):
+        """
+        Specially intended for the grid regression case
+        For a given covariance kernel, this method returns the corresponding kernel for
+        a single dimension. The resulting values can then be used in the algorithm for
+        reconstructing the full covariance matrix.
+        """
+        raise NotImplementedError("implement one dimensional variation of kernel")
+
 
 
 

From 83bd94185d959387f6a0069bfed49e6f90d9c19f Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Sun, 1 May 2016 21:50:34 +0200
Subject: [PATCH 07/13] Removed SSM functionality - updated Kronecker grid case

---
 GPy/core/__init__.py                          |   3 +-
 GPy/core/gp_ssm.py                            | 253 ------------------
 .../latent_function_inference/__init__.py     |   1 -
 .../gaussian_grid_inference.py                |   2 +-
 .../gaussian_ssm_inference.py                 | 140 ----------
 .../ssm_posterior.py                          |  73 -----
 GPy/kern/__init__.py                          |   3 +-
 GPy/kern/src/{gridKerns.py => grid_kerns.py}  |   4 +-
 GPy/kern/src/rbf.py                           |   1 +
 GPy/kern/src/ssm_kerns.py                     | 148 ----------
 GPy/kern/src/stationary.py                    |   2 +-
 GPy/models/__init__.py                        |   2 +-
 GPy/models/gp_grid_regression.py              |  36 +++
 GPy/testing/grid_tests.py                     |  51 ++++
 doc/source/GPy.core.rst                       |  93 +++++++
 ...Py.inference.latent_function_inference.rst | 118 ++++++++
 doc/source/GPy.kern.src.rst                   | 245 +++++++++++++++++
 doc/source/GPy.testing.rst                    | 214 +++++++++++++++
 18 files changed, 765 insertions(+), 624 deletions(-)
 delete mode 100644 GPy/core/gp_ssm.py
 delete mode 100644 GPy/inference/latent_function_inference/gaussian_ssm_inference.py
 delete mode 100644 GPy/inference/latent_function_inference/ssm_posterior.py
 rename GPy/kern/src/{gridKerns.py => grid_kerns.py} (96%)
 delete mode 100644 GPy/kern/src/ssm_kerns.py
 create mode 100644 GPy/models/gp_grid_regression.py
 create mode 100644 GPy/testing/grid_tests.py
 create mode 100644 doc/source/GPy.core.rst
 create mode 100644 doc/source/GPy.inference.latent_function_inference.rst
 create mode 100644 doc/source/GPy.kern.src.rst
 create mode 100644 doc/source/GPy.testing.rst

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index 76d8bb90..a6abfb06 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -3,12 +3,11 @@
 
 from GPy.core.model import Model
 from .parameterization import Param, Parameterized
-from . import parameterization
+#from . import parameterization
 
 from .gp import GP
 from .svgp import SVGP
 from .sparse_gp import SparseGP
-from .gp_ssm import GpSSM
 from .gp_grid import GpGrid
 from .mapping import *
 
diff --git a/GPy/core/gp_ssm.py b/GPy/core/gp_ssm.py
deleted file mode 100644
index 5426d010..00000000
--- a/GPy/core/gp_ssm.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-# This implementation of converting GPs to state space models is based on the article:
-
-#@article{Gilboa:2015,
-#  title={Scaling multidimensional inference for structured Gaussian processes},
-#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
-#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
-#  volume={37},
-#  number={2},
-#  pages={424--436},
-#  year={2015},
-#  publisher={IEEE}
-#}
-
-import numpy as np
-import scipy.linalg as sp
-from gp import GP
-from parameterization.param import Param
-from ..inference.latent_function_inference import gaussian_ssm_inference
-from .. import likelihoods
-from ..inference import optimization
-from parameterization.transformations import Logexp
-from GPy.inference.latent_function_inference.posterior import Posterior
-
-class GpSSM(GP):
-    """
-    A GP model for sorted one-dimensional inputs
-
-    This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
-
-    :param X: inputs
-    :type X: np.ndarray (num_data x input_dim)
-    :param likelihood: a likelihood instance, containing the observed data
-    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
-    :param kernel: the kernel (covariance function). See link kernels
-    :type kernel: a GPy.kern.kern instance
-
-    """
-
-    def __init__(self, X, Y, kernel, likelihood, inference_method=None,
-                 name='gp ssm', Y_metadata=None, normalizer=False):
-        #pick a sensible inference method
-
-        inference_method = gaussian_ssm_inference.GaussianSSMInference()
-
-        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
-
-        self.posterior = None
-
-    def optimize(self, optimizer=None, start=None, **kwargs):
-        prevLikelihood = 0
-        count = 0
-        change = 1
-        while ((change > 0.001) and (count < 50)):
-            posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
-            expectations = posterior.expectations
-            self.optimize_params(expectations=expectations)
-            change = np.abs(likelihood - prevLikelihood)
-            prevLikelihood = likelihood
-            count = count + 1
-
-    def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
-        if self.is_fixed:
-            print("nothing to optimize")
-        if self.size == 0:
-            print("nothing to optimize")
-
-        if not self.update_model():
-            print("Updates were off, setting updates on again")
-            self.update_model(True)
-
-        if start == None:
-            start = self.optimizer_array
-
-        if optimizer is None:
-            optimizer = self.preferred_optimizer
-
-        if isinstance(optimizer, optimization.Optimizer):
-            opt = optimizer
-            opt.model = self
-        else:
-            optimizer = optimization.get_optimizer(optimizer)
-            opt = optimizer(start, model=self, **kwargs)
-
-        opt.max_iters = 1
-
-        opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
-        self.optimization_runs.append(opt)
-        self.optimizer_array = opt.x_opt
-
-    def param_maximisation_step(self, loghyper, X, Y, E, *args):
-
-        loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
-        lam = loghyper[0]
-        sig = loghyper[1]
-        noise = loghyper[2]
-
-        kern = self.kern
-        order = kern.order
-
-        K = len(X)
-        mu_0 = np.zeros((order, 1))
-        v_0 = kern.Phi_of_r(-1)
-        dvSig = v_0/sig
-        dvLam = kern.dQ(-1)[0]
-
-        mu = E[0][0]
-        V = E[0][1]
-        E11 = V + np.dot(mu, mu.T)
-        V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
-        Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
-        dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
-        dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
-
-        for t in range(1, K):
-            delta = X[t] - X[t-1]
-            Q = kern.Q_of_r(delta)
-            Phi = kern.Phi_of_r(delta)
-            dPhi = kern.dPhidLam(delta)
-            [dLam, dSig] = kern.dQ(delta)
-            mu_prev = E[t-1][0]
-            V_prev = E[t-1][1]
-            mu = E[t][0]
-            V = E[t][1]
-            Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
-            Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
-            Ett = V + np.dot(mu, mu.T)
-            CC = sp.cholesky(Q, lower=True)
-            Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
-
-            Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
-            A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
-            dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))        
-            A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
-            dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
-
-        dUb_noise = 0
-        for t in xrange(K):
-            mu = E[t][0]
-            V = E[t][1]
-            Ett = V + np.dot(mu, mu.T)
-            Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
-            dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
-
-        dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
-        
-        return Ub.item(), dUb
-
-
-    def parameters_changed(self):
-        """
-        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
-        In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
-
-        .. warning::
-            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
-            this method yourself, there may be unexpected consequences.
-
-        We override the method in the parent class since we do not handle updates to the standard gradients.
-        """
-        self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
-
-    def _raw_predict(self, Xnew, full_cov=False, kern=None):
-        """
-        Make a prediction for the latent function values
-        N.B. It is assumed that input points are sorted
-        """
-        if kern is None:
-            kern = self.kern
-
-        X = self.X
-        K = X.shape[0]
-        K_new = Xnew.shape[0]
-        order = kern.order
-        mean_pred = np.zeros(K_new)
-        var_pred = np.zeros(K_new)
-        H = np.zeros((1,order))
-        H[0][0] = 1
-
-        count = 0
-        for t in xrange(K_new):
-            while ((count < K) and (Xnew[t] > X[count])):
-                count += 1
-
-            if (count == 0):
-                mu = np.zeros((order, 1))
-                V = kern.Phi_of_r(-1)
-
-                delta = np.abs(Xnew[t] - X[count])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-
-                mu_s = self.posterior.mu_s[count]
-                V_s = self.posterior.V_s[count]
-                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
-                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-
-                mean_pred[t] = np.dot(H, mu_s)
-                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
-
-            elif (count == K):
-                # forwards phase
-                delta = np.abs(Xnew[t] - X[count-1])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                mu_f = self.posterior.mu_f[count-1]
-                V_f = self.posterior.V_f[count-1]
-
-                mu = np.dot(Phi, mu_f)
-                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
-                V = P
-
-                mean_pred[t] = np.dot(H,mu)
-                var_pred[t] = np.dot(np.dot(H, V), H.T)
-
-            else:
-                # forwards phase
-                delta = np.abs(Xnew[t] - X[count-1])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                mu_f = self.posterior.mu_f[count-1]
-                V_f = self.posterior.V_f[count-1]
-                mu = np.dot(Phi, mu_f)
-                P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
-                V = P
-
-                delta = np.abs(Xnew[t] - X[count])
-                Phi = kern.Phi_of_r(delta)
-                Q = kern.Q_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-
-                # backwards phase
-                mu_s = self.posterior.mu_s[count]
-                V_s = self.posterior.V_s[count]
-
-                L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
-                mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-                V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-
-                mean_pred[t] = np.dot(H, mu_s)
-                var_pred[t] = np.dot(np.dot(H, V_s), H.T)
-
-
-        mean_pred = mean_pred.reshape(-1, 1)
-        var_pred = var_pred.reshape(-1, 1)
-
-        return mean_pred, var_pred
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index ba9099b6..90fbf0f1 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -69,7 +69,6 @@ from .dtc import DTC
 from .fitc import FITC
 from .var_dtc_parallel import VarDTC_minibatch
 from .var_gauss import VarGauss
-from .gaussian_ssm_inference import GaussianSSMInference
 from .gaussian_grid_inference import GaussianGridInference
 
 
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index 8917abea..6b2315b5 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -61,7 +61,7 @@ class GaussianGridInference(LatentFunctionInference):
         V_kron = 1 # kronecker product of eigenvalues
 
         # retrieve the one-dimensional variation of the designated kernel
-        oneDkernel = kern.getOneDimensionalKernel(D)
+        oneDkernel = kern.get_one_dimensional_kernel(D)
 
         for d in xrange(D):
             xg = list(set(X[:,d])) #extract unique values for a dimension
diff --git a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
deleted file mode 100644
index 4eb441cb..00000000
--- a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-# This implementation of converting GPs to state space models is based on the article:
-
-#@article{Gilboa:2015,
-#  title={Scaling multidimensional inference for structured Gaussian processes},
-#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
-#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
-#  volume={37},
-#  number={2},
-#  pages={424--436},
-#  year={2015},
-#  publisher={IEEE}
-#}
-
-from ssm_posterior import SsmPosterior
-from ...util.linalg import pdinv, dpotrs, tdot
-from ...util import diag
-import numpy as np
-import math as mt
-from . import LatentFunctionInference
-log_2_pi = np.log(2*np.pi)
-
-
-class GaussianSSMInference(LatentFunctionInference):
-    """
-    An object for inference when the likelihood is Gaussian.
-
-    The function self.inference returns a Posterior object, which summarizes
-    the posterior.
-
-    """
-    def __init__(self):
-        pass#self._YYTfactor_cache = caching.cache()
-
-    def get_YYTfactor(self, Y):
-        """
-        find a matrix L which satisfies LL^T = YY^T.
-
-        Note that L may have fewer columns than Y, else L=Y.
-        """
-        N, D = Y.shape
-        if (N>D):
-            return Y
-        else:
-            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
-            #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
-            return Y
-
-    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
-
-        """
-        Returns a Posterior class containing essential quantities of the posterior
-        """
-        order = kern.order
-        K = X.shape[0]
-        log_likelihood = 0
-        results = np.zeros((K,4),dtype=object)
-        H = np.zeros((1,order))
-        H[0][0] = 1
-        v_0 = kern.Phi_of_r(-1)
-        mu_0 = np.zeros((order, 1))
-        noise_var = likelihood.variance + 1e-8
-
-        # carry out forward filtering
-        for t in range(K):
-            if (t == 0):
-                prior_m = np.dot(H,mu_0)
-                prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
-
-                log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
-
-                kalman_gain = np.dot(v_0, H.T) / prior_v
-                mu = mu_0 + kalman_gain*(Y[0] - prior_m)
-
-
-                V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
-                results[0][0] = mu
-                results[0][1] = V
-            else:
-                delta = X[t] - X[t-1]
-                Q = kern.Q_of_r(delta)
-                Phi = kern.Phi_of_r(delta)
-                P = np.dot(np.dot(Phi, V), Phi.T) + Q
-                PhiMu = np.dot(Phi, mu)
-                prior_m = np.dot(H, PhiMu)
-                prior_v = np.dot(np.dot(H, P), H.T) + noise_var
-
-                log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
-                log_likelihood += log_likelihood_i
-
-                kalman_gain = np.dot(P, H.T)/prior_v
-                mu = PhiMu + kalman_gain*(Y[t] - prior_m)
-                V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
-                
-                results[t-1][2] = Phi
-                results[t-1][3] = P
-                results[t][0] = mu
-                results[t][1] = V
-
-        # carry out backwards smoothing
-
-        W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
-
-        mu_s = results[K-1][0]
-        V_s = results[K-1][1]
-
-        posterior_mean = np.zeros((K,1))
-        posterior_var = np.zeros((K,1))
-        E = np.zeros((K,4), dtype='object')
-
-        posterior_mean[K-1] = np.dot(H, mu_s)
-        posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
-        E[K-1][0] = mu_s
-        E[K-1][1] = V_s
-
-        for t in range(K-2, -1, -1):
-            mu = results[t][0]
-            V = results[t][1]
-            Phi = results[t][2]
-            P = results[t][3]
-            
-            L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
-            mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
-            V_s = V + np.dot(np.dot(L, V_s - P), L.T)
-            posterior_mean[t] = np.dot(H, mu_s)
-            posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
-
-            if (t < K-2):
-                W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
-
-            E[t][0] = mu_s
-            E[t][1] = V_s
-            E[t][2] = L
-            E[t][3] = W
-
-        return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
diff --git a/GPy/inference/latent_function_inference/ssm_posterior.py b/GPy/inference/latent_function_inference/ssm_posterior.py
deleted file mode 100644
index 1b87ddb3..00000000
--- a/GPy/inference/latent_function_inference/ssm_posterior.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Kurt Cutajar
-
-import numpy as np
-
-class SsmPosterior(object):
-    """
-    Specially intended for the SSM Regression case
-    An object to represent a Gaussian posterior over latent function values, p(f|D).
-
-    The purpose of this class is to serve as an interface between the inference
-    schemes and the model classes.
-
-    """
-    def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
-        """
-        mu_f : mean values predicted during kalman filtering step
-        var_f : variance predicted during the kalman filtering step
-        mu_s : mean values predicted during backwards smoothing step
-        var_s : variance predicted during backwards smoothing step
-        expectations : posterior expectations
-        """
-
-        if ((mu_f is not None) and (V_f is not None) and
-            (mu_s is not None) and (V_s is not None) and 
-            (expectations is not None)):
-            pass # we have sufficient to compute the posterior
-        else:
-            raise ValueError("insufficient information to compute predictions")
-
-        self._mu_f = mu_f
-        self._V_f = V_f
-        self._mu_s = mu_s
-        self._V_s = V_s
-        self._expectations = expectations
-
-    @property
-    def mu_f(self):
-        """
-        Mean values predicted during kalman filtering step mean
-        """
-        return self._mu_f
-
-    @property
-    def V_f(self):
-        """
-        Variance predicted during the kalman filtering step
-        """
-        return self._V_f
-
-    @property
-    def mu_s(self):
-        """
-        Mean values predicted during kalman backwards smoothin mean
-        """
-        return self._mu_s
-
-    @property
-    def V_s(self):
-        """
-        Variance predicted during backwards smoothing step
-        """
-        return self._V_s
-
-    @property
-    def expectations(self):
-        """
-        Posterior expectations
-        """
-        return self._expectations
-    
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 041159f6..b304cc7f 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -36,5 +36,4 @@ from .src.sde_standard_periodic import sde_StdPeriodic
 from .src.sde_static import sde_White, sde_Bias
 from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
 from .src.sde_brownian import sde_Brownian
-from .src.ssmKerns import Matern32_SSM
-from .src.gridKerns import GridRBF
+from .src.grid_kerns import GridRBF
diff --git a/GPy/kern/src/gridKerns.py b/GPy/kern/src/grid_kerns.py
similarity index 96%
rename from GPy/kern/src/gridKerns.py
rename to GPy/kern/src/grid_kerns.py
index 98804544..a043e159 100644
--- a/GPy/kern/src/gridKerns.py
+++ b/GPy/kern/src/grid_kerns.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 from stationary import Stationary
-from ...util.config import *
-from ...util.caching import Cache_this
+from paramz.caching import Cache_this
+
 
 class GridKern(Stationary):
 
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index 557366ae..a51a4ba4 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -7,6 +7,7 @@ from .stationary import Stationary
 from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
 from ...core import Param
 from paramz.transformations import Logexp
+from .gridKerns import GridRBF
 
 class RBF(Stationary):
     """
diff --git a/GPy/kern/src/ssm_kerns.py b/GPy/kern/src/ssm_kerns.py
deleted file mode 100644
index aa988e52..00000000
--- a/GPy/kern/src/ssm_kerns.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-#
-# Kurt Cutajar
-
-from kern import Kern
-from ... import util
-import numpy as np
-from scipy import integrate, weave
-from ...core.parameterization import Param
-from ...core.parameterization.transformations import Logexp
-
-class SsmKerns(Kern):
-    """
-    Kernels suitable for GP SSM regression with one-dimensional inputs
-
-    """
-
-    def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
-    	
-        super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
-        self.lengthscale = np.abs(lengthscale)
-        self.variance = Param('variance', variance, Logexp())
-
-    def K_of_r(self, r):
-        raise NotImplementedError("implement the covariance function as a fn of r to use this class")
-
-    def dK_dr(self, r):
-        raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
-
-    def Q_of_r(self, r):
-        raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
-
-    def Phi_of_r(self, r):
-        raise NotImplementedError("implement transition function of SDE wrt r to use this class")
-
-    def noise(self):
-    	raise NotImplementedError("implement noise function of SDE to use this class")
-
-   	def dPhidLam(self, r):
-		raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
-
-    def dQ(self, r):
-    	raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
-
-
-class Matern32_SSM(SsmKerns):
-    """
-    Matern 3/2 kernel:
-
-    .. math::
-
-       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-
-    def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
-        super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
-        lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
-        self.lam = Param('lambda', lambd, Logexp())
-        self.link_parameters(self.lam, self.variance)
-        self.order = 2
-
-    def noise(self):
-        """
-        Compute noise for the kernel
-        """
-        p = 1
-        lp_fact = np.sum(np.log(range(1,p+1)))
-        l2p_fact = np.sum(np.log(range(1,2*p +1)))
-        logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
-        q = np.exp(logq)
-        return q
-
-    def K_of_r(self, r):
-        lengthscale = np.sqrt(3.) / self.lam
-        r = r / lengthscale
-        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
-
-    def dK_dr(self,r):
-        return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
-
-    def Q_of_r(self, r):
-        """
-        Compute process variance (Q)
-        """
-        q = self.noise()
-        Q = np.zeros((2,2))
-        Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2) 
-                            + 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
-        Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
-        Q[1][0] = Q[0][1]
-        Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2) 
-                            - 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
-        return q*Q
-
-    def Phi_of_r(self, r):
-        """
-        Compute transition function (Phi)
-        """
-        if r < 0:
-            phi = np.zeros((2,2))
-            phi[0][0] = self.variance
-            phi[0][1] = 0
-            phi[1][0] = 0
-            phi[1][1] = np.power(self.lam,2)*self.variance
-            return phi
-        else:
-            mult = np.exp(-self.lam*r)
-            phi = np.zeros((2, 2))
-            phi[0][0] = (1 + self.lam*r)*mult
-            phi[0][1] = mult*r
-            phi[1][0] = -r*mult*self.lam**2
-            phi[1][1] = mult*(1 - self.lam*r)
-            return phi
-
-    def dPhidLam(self, r):
-        """
-        Compute derivative of transition function (Phi) wrt lambda
-        """
-    	mult = np.exp(self.lam*r)
-    	dPhi = np.zeros((2, 2))
-    	dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
-    	dPhi[0][1] = (-1 * (r**2)) / mult
-    	dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
-    	dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
-    	return dPhi
-
-    def dQ(self, r):
-        """
-        Compute derivatives of Q with respect to lambda and variance
-        """
-        if (r == -1):
-            dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
-            dQdVar = None
-        else:
-            q = self.noise()
-            Q = self.Q_of_r(r)
-            dQdLam = np.zeros((2, 2))
-            mult = np.exp(2*self.lam*r)
-            dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
-            dQdLam[0][1] = -(r**3)/mult
-            dQdLam[1][0] = dQdLam[0][1]
-            dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
-            dq = (q*(2+1))/self.lam
-            dQdLam = q*dQdLam + dq*(Q/q)
-            dQdVar = Q / self.variance
-    	return [dQdLam, dQdVar]
\ No newline at end of file
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index a8ae3ac4..db07f00d 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -317,7 +317,7 @@ class Stationary(Kern):
     def input_sensitivity(self, summarize=True):
         return self.variance*np.ones(self.input_dim)/self.lengthscale**2
 
-      def get_one_dimensional_kernel(self, dimensions):
+    def get_one_dimensional_kernel(self, dimensions):
         """
         Specially intended for the grid regression case
         For a given covariance kernel, this method returns the corresponding kernel for
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index c31d68dd..1c1b336a 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -22,5 +22,5 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
 from .one_vs_all_classification import OneVsAllClassification
 from .one_vs_all_sparse_classification import OneVsAllSparseClassification
 from .dpgplvm import DPBayesianGPLVM
-
 from .state_space_model import StateSpace
+from .gp_grid_regression import GPRegressionGrid
diff --git a/GPy/models/gp_grid_regression.py b/GPy/models/gp_grid_regression.py
new file mode 100644
index 00000000..fa4838fd
--- /dev/null
+++ b/GPy/models/gp_grid_regression.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+from ..core import GpGrid
+from .. import likelihoods
+from .. import kern
+
+class GPRegressionGrid(GpGrid):
+    """
+    Gaussian Process model for grid inputs using Kronecker products
+
+    This is a thin wrapper around the models.GpGrid class, with a set of sensible defaults
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to the kron variation of SqExp
+    :param Norm normalizer: [False]
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
+
+        if kernel is None:
+            kernel = kern.RBF(1)   # no other kernels implemented so far
+
+        likelihood = likelihoods.Gaussian()
+        super(GPRegressionGrid, self).__init__(X, Y, kernel, likelihood, name='GP Grid regression', Y_metadata=Y_metadata, normalizer=normalizer)
+
diff --git a/GPy/testing/grid_tests.py b/GPy/testing/grid_tests.py
new file mode 100644
index 00000000..e55efb18
--- /dev/null
+++ b/GPy/testing/grid_tests.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import unittest
+import numpy as np
+import GPy
+
+class GridModelTest(unittest.TestCase):
+    def setUp(self):
+        ######################################
+        # # 3 dimensional example
+
+        # sample inputs and outputs
+        self.X = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1],[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
+        self.Y = np.random.randn(8, 1) * 100
+        self.dim = self.X.shape[1]
+
+    def test_alpha_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(m.posterior.alpha, m2.posterior.woodbury_vector)
+
+    def test_gradient_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        np.testing.assert_almost_equal(kernel.variance.gradient, kernel2.variance.gradient)
+        np.testing.assert_almost_equal(kernel.lengthscale.gradient, kernel2.lengthscale.gradient)
+        np.testing.assert_almost_equal(m.likelihood.variance.gradient, m2.likelihood.variance.gradient)
+
+
+    def test_prediction_match(self):
+        kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
+
+        kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
+        m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
+
+        test = np.array([[0,0,2],[-1,3,-4]])
+
+        np.testing.assert_almost_equal(m.predict(test), m2.predict(test))
+
diff --git a/doc/source/GPy.core.rst b/doc/source/GPy.core.rst
new file mode 100644
index 00000000..a42acd9c
--- /dev/null
+++ b/doc/source/GPy.core.rst
@@ -0,0 +1,93 @@
+GPy.core package
+================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.core.parameterization
+
+Submodules
+----------
+
+GPy.core.gp module
+------------------
+
+.. automodule:: GPy.core.gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.mapping module
+-----------------------
+
+.. automodule:: GPy.core.mapping
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.model module
+---------------------
+
+.. automodule:: GPy.core.model
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.sparse_gp module
+-------------------------
+
+.. automodule:: GPy.core.sparse_gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.sparse_gp_mpi module
+-----------------------------
+
+.. automodule:: GPy.core.sparse_gp_mpi
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.svgp module
+--------------------
+
+.. automodule:: GPy.core.svgp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.gp_grid module
+------------------------
+
+.. automodule:: GPy.core.gp_grid
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.symbolic module
+------------------------
+
+.. automodule:: GPy.core.symbolic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.verbose_optimization module
+------------------------------------
+
+.. automodule:: GPy.core.verbose_optimization
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.core
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/source/GPy.inference.latent_function_inference.rst b/doc/source/GPy.inference.latent_function_inference.rst
new file mode 100644
index 00000000..f170003c
--- /dev/null
+++ b/doc/source/GPy.inference.latent_function_inference.rst
@@ -0,0 +1,118 @@
+GPy.inference.latent_function_inference package
+===============================================
+
+Submodules
+----------
+
+GPy.inference.latent_function_inference.dtc module
+--------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.dtc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.exact_gaussian_inference module
+-----------------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.exact_gaussian_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.expectation_propagation module
+----------------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.expectation_propagation
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.fitc module
+---------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.inferenceX module
+---------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.inferenceX
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.laplace module
+------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.laplace
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.posterior module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.posterior
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.grid_posterior module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.grid_posterior
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.svgp module
+---------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.svgp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_dtc module
+------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_dtc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_dtc_parallel module
+---------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_dtc_parallel
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_gauss module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_gauss
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.gaussian_grid_inference module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.gaussian_grid_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.inference.latent_function_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/source/GPy.kern.src.rst b/doc/source/GPy.kern.src.rst
new file mode 100644
index 00000000..166b3cba
--- /dev/null
+++ b/doc/source/GPy.kern.src.rst
@@ -0,0 +1,245 @@
+GPy.kern.src package
+====================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.kern.src.psi_comp
+
+Submodules
+----------
+
+GPy.kern.src.ODE_UY module
+--------------------------
+
+.. automodule:: GPy.kern.src.ODE_UY
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.ODE_UYC module
+---------------------------
+
+.. automodule:: GPy.kern.src.ODE_UYC
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.ODE_st module
+--------------------------
+
+.. automodule:: GPy.kern.src.ODE_st
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.ODE_t module
+-------------------------
+
+.. automodule:: GPy.kern.src.ODE_t
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.add module
+-----------------------
+
+.. automodule:: GPy.kern.src.add
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.basis_funcs module
+-------------------------------
+
+.. automodule:: GPy.kern.src.basis_funcs
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.brownian module
+----------------------------
+
+.. automodule:: GPy.kern.src.brownian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.coregionalize module
+---------------------------------
+
+.. automodule:: GPy.kern.src.coregionalize
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.coregionalize_cython module
+----------------------------------------
+
+.. automodule:: GPy.kern.src.coregionalize_cython
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.eq_ode2 module
+---------------------------
+
+.. automodule:: GPy.kern.src.eq_ode2
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.independent_outputs module
+---------------------------------------
+
+.. automodule:: GPy.kern.src.independent_outputs
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.kern module
+------------------------
+
+.. automodule:: GPy.kern.src.kern
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.kernel_slice_operations module
+-------------------------------------------
+
+.. automodule:: GPy.kern.src.kernel_slice_operations
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.linear module
+--------------------------
+
+.. automodule:: GPy.kern.src.linear
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.mlp module
+-----------------------
+
+.. automodule:: GPy.kern.src.mlp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.periodic module
+----------------------------
+
+.. automodule:: GPy.kern.src.periodic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.poly module
+------------------------
+
+.. automodule:: GPy.kern.src.poly
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.prod module
+------------------------
+
+.. automodule:: GPy.kern.src.prod
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.rbf module
+-----------------------
+
+.. automodule:: GPy.kern.src.rbf
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.spline module
+--------------------------
+
+.. automodule:: GPy.kern.src.spline
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.grid_kerns module
+-----------------------------
+
+.. automodule:: GPy.kern.src.grid_kerns
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.splitKern module
+-----------------------------
+
+.. automodule:: GPy.kern.src.splitKern
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.standard_periodic module
+-------------------------------------
+
+.. automodule:: GPy.kern.src.standard_periodic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.static module
+--------------------------
+
+.. automodule:: GPy.kern.src.static
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.stationary module
+------------------------------
+
+.. automodule:: GPy.kern.src.stationary
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.stationary_cython module
+-------------------------------------
+
+.. automodule:: GPy.kern.src.stationary_cython
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.symbolic module
+----------------------------
+
+.. automodule:: GPy.kern.src.symbolic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.kern.src.trunclinear module
+-------------------------------
+
+.. automodule:: GPy.kern.src.trunclinear
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.kern.src
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/source/GPy.testing.rst b/doc/source/GPy.testing.rst
new file mode 100644
index 00000000..5cd144cb
--- /dev/null
+++ b/doc/source/GPy.testing.rst
@@ -0,0 +1,214 @@
+GPy.testing package
+===================
+
+Submodules
+----------
+
+GPy.testing.bgplvm_minibatch_tests module
+-----------------------------------------
+
+.. automodule:: GPy.testing.bgplvm_minibatch_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.cacher_tests module
+-------------------------------
+
+.. automodule:: GPy.testing.cacher_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.cython_tests module
+-------------------------------
+
+.. automodule:: GPy.testing.cython_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.examples_tests module
+---------------------------------
+
+.. automodule:: GPy.testing.examples_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.fitc module
+-----------------------
+
+.. automodule:: GPy.testing.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.gp_tests module
+---------------------------
+
+.. automodule:: GPy.testing.gp_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.index_operations_tests module
+-----------------------------------------
+
+.. automodule:: GPy.testing.index_operations_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.inference_tests module
+----------------------------------
+
+.. automodule:: GPy.testing.inference_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.kernel_tests module
+-------------------------------
+
+.. automodule:: GPy.testing.kernel_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.likelihood_tests module
+-----------------------------------
+
+.. automodule:: GPy.testing.likelihood_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.linalg_test module
+------------------------------
+
+.. automodule:: GPy.testing.linalg_test
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.link_function_tests module
+--------------------------------------
+
+.. automodule:: GPy.testing.link_function_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.mapping_tests module
+--------------------------------
+
+.. automodule:: GPy.testing.mapping_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.meanfunc_tests module
+---------------------------------
+
+.. automodule:: GPy.testing.meanfunc_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.misc_tests module
+-----------------------------
+
+.. automodule:: GPy.testing.misc_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.model_tests module
+------------------------------
+
+.. automodule:: GPy.testing.model_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.mpi_tests module
+----------------------------
+
+.. automodule:: GPy.testing.mpi_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.observable_tests module
+-----------------------------------
+
+.. automodule:: GPy.testing.observable_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.parameterized_tests module
+--------------------------------------
+
+.. automodule:: GPy.testing.parameterized_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.pickle_tests module
+-------------------------------
+
+.. automodule:: GPy.testing.pickle_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.plotting_tests module
+---------------------------------
+
+.. automodule:: GPy.testing.plotting_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.prior_tests module
+------------------------------
+
+.. automodule:: GPy.testing.prior_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.rv_transformation_tests module
+------------------------------------------
+
+.. automodule:: GPy.testing.rv_transformation_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.svgp_tests module
+-----------------------------
+
+.. automodule:: GPy.testing.svgp_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+GPy.testing.grid_tests module
+-----------------------------
+
+.. automodule:: GPy.testing.grid_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: GPy.testing
+    :members:
+    :undoc-members:
+    :show-inheritance:

From 46b467ae44a17250040d42147d6a5b3f4d99864c Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Sun, 1 May 2016 22:20:26 +0200
Subject: [PATCH 08/13] Minor fix

---
 GPy/core/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index a6abfb06..49e9c3c7 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -3,7 +3,7 @@
 
 from GPy.core.model import Model
 from .parameterization import Param, Parameterized
-#from . import parameterization
+from . import parameterization
 
 from .gp import GP
 from .svgp import SVGP

From bcbc58e065538d354a60a8d132ff691da2902325 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Mon, 9 May 2016 15:40:35 +0200
Subject: [PATCH 09/13] Added fixes to repo + rebased

---
 GPy/core/gp_grid.py                                 |  4 ++--
 .../gaussian_grid_inference.py                      | 13 +++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
index 3d061bb0..64815016 100644
--- a/GPy/core/gp_grid.py
+++ b/GPy/core/gp_grid.py
@@ -69,10 +69,10 @@ class GpGrid(GP):
             x = b
             N = 1
             G = np.zeros(D)
-            for d in xrange(D):
+            for d in range(D):
                 G[d] = len(A[d])
             N = np.prod(G)
-            for d in xrange(D-1, -1, -1):
+            for d in range(D-1, -1, -1):
                 X = np.reshape(x, (G[d], round(N/G[d])), order='F')
                 Z = np.dot(A[d], X)
                 Z = Z.T
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index 6b2315b5..aeefa8e7 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -37,10 +37,10 @@ class GaussianGridInference(LatentFunctionInference):
         N = 1
         D = len(A)
         G = np.zeros((D,1))
-        for d in xrange(0, D):
+        for d in range(0, D):
             G[d] = len(A[d])
         N = np.prod(G)
-        for d in xrange(D-1, -1, -1):
+        for d in range(D-1, -1, -1):
             X = np.reshape(x, (G[d], round(N/G[d])), order='F')
             Z = np.dot(A[d], X)
             Z = Z.T
@@ -63,7 +63,7 @@ class GaussianGridInference(LatentFunctionInference):
         # retrieve the one-dimensional variation of the designated kernel
         oneDkernel = kern.get_one_dimensional_kernel(D)
 
-        for d in xrange(D):
+        for d in range(D):
             xg = list(set(X[:,d])) #extract unique values for a dimension
             xg = np.reshape(xg, (len(xg), 1))
             oneDkernel.lengthscale = kern.lengthscale[d]
@@ -84,11 +84,11 @@ class GaussianGridInference(LatentFunctionInference):
 
         # compute derivatives wrt parameters Thete
         derivs = np.zeros(D+2, dtype='object')
-        for t in xrange(len(derivs)):
+        for t in range(len(derivs)):
             dKd_dTheta = np.zeros(D, dtype='object')
             gamma = np.zeros(D, dtype='object')
             gam = 1
-            for d in xrange(D):
+            for d in range(D):
                 xg = list(set(X[:,d]))
                 xg = np.reshape(xg, (len(xg), 1))
                 oneDkernel.lengthscale = kern.lengthscale[d]
@@ -110,4 +110,5 @@ class GaussianGridInference(LatentFunctionInference):
         dL_dVar = derivs[D]
         dL_dThetaL = derivs[D+1]
 
-        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
+        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), \
+                log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}

From 5b7566dfac89eaf3ac3d9c214c5feda1dda032a6 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Mon, 9 May 2016 16:21:10 +0200
Subject: [PATCH 10/13] Removed erreneous lines indicating merge conflicts

---
 GPy/core/gp_grid.py                                      | 7 -------
 .../latent_function_inference/gaussian_grid_inference.py | 9 +--------
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
index adf611b8..64815016 100644
--- a/GPy/core/gp_grid.py
+++ b/GPy/core/gp_grid.py
@@ -69,17 +69,10 @@ class GpGrid(GP):
             x = b
             N = 1
             G = np.zeros(D)
-<<<<<<< HEAD
             for d in range(D):
                 G[d] = len(A[d])
             N = np.prod(G)
             for d in range(D-1, -1, -1):
-=======
-            for d in xrange(D):
-                G[d] = len(A[d])
-            N = np.prod(G)
-            for d in xrange(D-1, -1, -1):
->>>>>>> 1fc93236c46ddd1b7bd7f73ef26dc51af4cd2181
                 X = np.reshape(x, (G[d], round(N/G[d])), order='F')
                 Z = np.dot(A[d], X)
                 Z = Z.T
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index b9702b92..aeefa8e7 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -37,17 +37,10 @@ class GaussianGridInference(LatentFunctionInference):
         N = 1
         D = len(A)
         G = np.zeros((D,1))
-<<<<<<< HEAD
         for d in range(0, D):
             G[d] = len(A[d])
         N = np.prod(G)
         for d in range(D-1, -1, -1):
-=======
-        for d in xrange(0, D):
-            G[d] = len(A[d])
-        N = np.prod(G)
-        for d in xrange(D-1, -1, -1):
->>>>>>> 1fc93236c46ddd1b7bd7f73ef26dc51af4cd2181
             X = np.reshape(x, (G[d], round(N/G[d])), order='F')
             Z = np.dot(A[d], X)
             Z = Z.T
@@ -116,6 +109,6 @@ class GaussianGridInference(LatentFunctionInference):
         dL_dLen = derivs[:D]
         dL_dVar = derivs[D]
         dL_dThetaL = derivs[D+1]
-        
+
         return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), \
                 log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}

From 5c4ce10a53aca782fd110c29fb159ebb5f9767d4 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Mon, 9 May 2016 16:38:09 +0200
Subject: [PATCH 11/13] Fixed incorrect import

---
 GPy/kern/src/rbf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index a51a4ba4..256149b8 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -7,7 +7,7 @@ from .stationary import Stationary
 from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
 from ...core import Param
 from paramz.transformations import Logexp
-from .gridKerns import GridRBF
+from .grid_kerns import GridRBF
 
 class RBF(Stationary):
     """

From 3320146eecf681d817c5ee0d33700b863df8b635 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Mon, 9 May 2016 23:30:36 +0200
Subject: [PATCH 12/13] Fixed incorrect import

---
 GPy/core/gp_grid.py                                           | 4 ++--
 .../latent_function_inference/gaussian_grid_inference.py      | 2 +-
 GPy/kern/src/grid_kerns.py                                    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
index 64815016..89b6cdec 100644
--- a/GPy/core/gp_grid.py
+++ b/GPy/core/gp_grid.py
@@ -18,8 +18,8 @@
 
 import numpy as np
 import scipy.linalg as sp
-from gp import GP
-from parameterization.param import Param
+from .gp import GP
+from .parameterization.param import Param
 from ..inference.latent_function_inference import gaussian_grid_inference
 from .. import likelihoods
 
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index aeefa8e7..04337e04 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -16,7 +16,7 @@
 #  publisher={IEEE}
 #}
 
-from grid_posterior import GridPosterior
+from .grid_posterior import GridPosterior
 import numpy as np
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
diff --git a/GPy/kern/src/grid_kerns.py b/GPy/kern/src/grid_kerns.py
index a043e159..ef18f1a9 100644
--- a/GPy/kern/src/grid_kerns.py
+++ b/GPy/kern/src/grid_kerns.py
@@ -4,7 +4,7 @@
 # Kurt Cutajar
 
 import numpy as np
-from stationary import Stationary
+from .stationary import Stationary
 from paramz.caching import Cache_this
 
 

From 1810549b602c4236ab76c78252d36cbdcab94659 Mon Sep 17 00:00:00 2001
From: kcutajar <kctotal@hotmail.com>
Date: Tue, 10 May 2016 01:56:59 +0200
Subject: [PATCH 13/13] Making consistent with python 3

---
 GPy/core/gp_grid.py                                             | 2 +-
 .../latent_function_inference/gaussian_grid_inference.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/core/gp_grid.py b/GPy/core/gp_grid.py
index 89b6cdec..bdb1b614 100644
--- a/GPy/core/gp_grid.py
+++ b/GPy/core/gp_grid.py
@@ -73,7 +73,7 @@ class GpGrid(GP):
                 G[d] = len(A[d])
             N = np.prod(G)
             for d in range(D-1, -1, -1):
-                X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+                X = np.reshape(x, (G[d], np.round(N/G[d])), order='F')
                 Z = np.dot(A[d], X)
                 Z = Z.T
                 x = np.reshape(Z, (-1, 1), order='F')
diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
index 04337e04..abdd78a3 100644
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@@ -41,7 +41,7 @@ class GaussianGridInference(LatentFunctionInference):
             G[d] = len(A[d])
         N = np.prod(G)
         for d in range(D-1, -1, -1):
-            X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+            X = np.reshape(x, (G[d], np.round(N/G[d])), order='F')
             Z = np.dot(A[d], X)
             Z = Z.T
             x = np.reshape(Z, (-1, 1), order='F')