From 06a7fedd22100ad070e95488908c993d8c0c95dd Mon Sep 17 00:00:00 2001
From: Alexander Grigorievskiy <alex.grigorievskiy@gmail.com>
Date: Tue, 19 May 2015 14:03:12 +0300
Subject: [PATCH] ENH: Adding SDE representation of addition, sumation and
 standard periodic kernel.

All changes have been tested tests are added in later commits.
---
 GPy/examples/state_space.py            |  16 +--
 GPy/kern/_src/sde_matern.py            | 134 ++++++++++++++++++++--
 GPy/kern/_src/sde_standard_periodic.py | 152 +++++++++++++++++++++++--
 GPy/kern/src/add.py                    |  78 +++++++++++++
 GPy/kern/src/kern.py                   |  45 --------
 GPy/kern/src/prod.py                   | 107 +++++++++++++++++
 GPy/models/state_space_main.py         |   6 +-
 GPy/models/state_space_new.py          |  21 ++--
 8 files changed, 477 insertions(+), 82 deletions(-)

diff --git a/GPy/examples/state_space.py b/GPy/examples/state_space.py
index 071fb628..a9d770e6 100644
--- a/GPy/examples/state_space.py
+++ b/GPy/examples/state_space.py
@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
 
 import GPy.models.state_space_new as SS_new
 
-X = np.linspace(0, 10, 4000)[:, None]
+X = np.linspace(0, 10, 2000)[:, None]
 Y = np.sin(X) + np.random.randn(*X.shape)*0.1
 
 # Need to run these lines when X and Y are imported ->
@@ -23,14 +23,14 @@ Y = np.sin(X) + np.random.randn(*X.shape)*0.1
 #plt.plot( X, Y)
 #plt.show()
 
-#kernel = GPy.kern.Matern32(X.shape[1])
-#m = GPy.models.StateSpace(X,Y, kernel)
+kernel = GPy.kern.Matern32(X.shape[1])
+m = GPy.models.StateSpace(X,Y, kernel)
+
+print m
 #
-#print m
-##
-#m.optimize(optimizer='bfgs',messages=True)
-##
-#print m
+m.optimize(optimizer='bfgs',messages=True)
+#
+print m
 
 kernel1 = GPy.kern.Matern32(X.shape[1])
 m1  = GPy.models.GPRegression(X,Y, kernel1)
diff --git a/GPy/kern/_src/sde_matern.py b/GPy/kern/_src/sde_matern.py
index 102691b4..0016c5b8 100644
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@@ -20,7 +20,15 @@ class sde_Matern32(Matern32):
        k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
 
     """
-
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
@@ -28,6 +36,7 @@ class sde_Matern32(Matern32):
         
         variance = float(self.variance.values)
         lengthscale = float(self.lengthscale.values)
+        
         foo  = np.sqrt(3.)/lengthscale 
         F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
         L    = np.array([[0], [1]]) 
@@ -71,16 +80,125 @@ class sde_Matern52(Matern52):
        k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
 
     """
-
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
         
-        # Arno, insert your code here
+        variance = float(self.variance.values)
+        lengthscale = float(self.lengthscale.values)
 
-        # Params to use:
-        # self.lengthscale
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
\ No newline at end of file
+        lamda = np.sqrt(5.0)/lengthscale
+        kappa = 5.0/3.0*variance/lengthscale**2        
+        
+        F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
+        L = np.array(((0,),(0,),(1,)))
+        Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
+        H = np.array(((1,0,0),))        
+        
+        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
+        
+        # Allocate space for the derivatives         
+        dF = np.empty((3,3,2))        
+        dQc = np.empty((1,1,2))        
+        dPinf = np.empty((3,3,2))
+        
+         # The partial derivatives 
+        dFvariance = np.zeros((3,3))
+        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 
+                                   30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
+        dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
+        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))        
+        
+        dPinf_variance = Pinf/variance
+        kappa2 = -2.0*kappa/lengthscale
+        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, 
+                                    0,-100*variance/lengthscale**5)))        
+        # Combine the derivatives 
+        dF[:,:,0] = dFvariance
+        dF[:,:,1] = dFlengthscale        
+        dQc[:,:,0] = dQcvariance         
+        dQc[:,:,1] = dQclengthscale        
+        dPinf[:,:,0] = dPinf_variance
+        dPinf[:,:,1] = dPinf_lengthscale
+        
+#        % Derivative of F w.r.t. parameter magnSigma2
+#    dFmagnSigma2    =  [0,  0,  0;
+#                        0,  0,  0;
+#                        0,  0,  0];
+#    
+#    % Derivative of F w.r.t parameter lengthScale
+#    dFlengthScale   =  [0,                          0,                  0;
+#                        0,                          0,                  0;
+#                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
+#    
+#    % Derivative of Qc w.r.t. parameter magnSigma2
+#    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
+#    
+#    % Derivative of Qc w.r.t. parameter lengthScale
+#    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
+#    
+#    % Derivative of Pinf w.r.t. parameter magnSigma2    
+#    dPinfmagnSigma2 = Pinf/magnSigma2;
+#    
+#    % Derivative of Pinf w.r.t. parameter lengthScale
+#    kappa2 = -2*kappa/lengthScale;
+#    dPinflengthScale = [0,          0,       -kappa2;
+#                        0,          kappa2,  0;
+#                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
+#  
+#    % Stack all derivatives
+#    dF = zeros(3,3,2);  
+#    dQc = zeros(1,1,2); 
+#    dPinf = zeros(3,3,2);
+#  
+#    dF(:,:,1) = dFmagnSigma2;
+#    dF(:,:,2) = dFlengthScale;
+#    dQc(:,:,1) = dQcmagnSigma2;
+#    dQc(:,:,2) = dQclengthScale;
+#    dPinf(:,:,1) = dPinfmagnSigma2;
+#    dPinf(:,:,2) = dPinflengthScale; 
+  
+#        % Derived constants
+#          lambda = sqrt(5)/lengthScale;
+#        
+#          % Feedback matrix
+#          F = [ 0,          1,          0;
+#                0,          0,          1;
+#               -lambda^3, -3*lambda^2, -3*lambda];
+#        
+#          % Noise effect matrix
+#          L = [0; 0; 1];
+#        
+#          % Spectral density
+#          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
+#        
+#          % Observation model
+#          H = [1, 0, 0];
+  
+  
+#        %% Stationary covariance
+#          
+#          % Calculate Pinf only if requested
+#          if nargout > 4,
+#              
+#            % Derived constant
+#            kappa = 5/3*magnSigma2/lengthScale^2;
+#            
+#            % Stationary covariance
+#            Pinf = [magnSigma2, 0,      -kappa;
+#                    0,          kappa,  0;
+#                    -kappa,     0,      25*magnSigma2/lengthScale^4];
+#                
+#          end
+        
+        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
\ No newline at end of file
diff --git a/GPy/kern/_src/sde_standard_periodic.py b/GPy/kern/_src/sde_standard_periodic.py
index d72c22af..3829b816 100644
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@@ -6,6 +6,9 @@ Stochastic Differential Equation (SDE) functionality.
 from .standard_periodic import StdPeriodic
 
 import numpy as np
+import scipy as sp
+
+from scipy import special as special
 
 class sde_StdPeriodic(StdPeriodic):
     """
@@ -21,20 +24,153 @@ class sde_StdPeriodic(StdPeriodic):
        \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
 
     """
-
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.wavelengths.gradient = gradients[1]
+        self.lengthscales.gradient = gradients[2]
+        
     def sde(self): 
         """ 
-        Return the state space representation of the covariance. 
+        Return the state space representation of the covariance.
+        
+        
+        ! Note: one must constrain lengthscale not to drop below 0.25.
+        After this bessel functions of the first kind grows to very high.
+        
+        ! Note: one must keep wevelength also not very low. Because then
+        the gradients wrt wavelength become ustable. 
+        However this might depend on the data. For test example with
+        300 data points the low limit is 0.15.
         """ 
         
-        # Arno, insert your code here
-
-        # Params to use:
+        # Params to use: (in that order)
         #self.variance
         #self.wavelengths
         #self.lengthscales
+        N = 7 # approximation order        
         
-        # Arno, you could visualize the Latex version of the kernel formula
-        # and assume inputs are 1D, so no ARD is used. Then use parameters aboove.        
         
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
\ No newline at end of file
+        w0 = 2*np.pi/self.wavelengths # frequency
+        
+        [q2,dq2l] = seriescoeff(N,2*self.lengthscales,self.variance)        
+        # lengthscale is multiplied by 2 because of slightly different
+        # formula for periodic covariance function.
+        # For the same reason:
+        
+        dq2l = 2*dq2l
+        
+        if np.any( np.isnan(q2)):
+            raise ValueError("SDE periodic covariance error1")
+        
+        if np.any( np.isnan(dq2l)):
+            raise ValueError("SDE periodic covariance error1")
+        
+        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
+        L    = np.eye(2*(N+1))
+        Qc   = np.zeros((2*(N+1), 2*(N+1)))
+        P_inf = np.kron(np.diag(q2),np.eye(2))
+        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
+        
+        
+        # Derivatives
+        dF = np.empty((F.shape[0], F.shape[1], 3))
+        dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
+        dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))         
+        
+        # Derivatives wrt self.variance
+        dF[:,:,0] = np.zeros(F.shape)
+        dQc[:,:,0] = np.zeros(Qc.shape)
+        dP_inf[:,:,0] = P_inf / self.variance
+
+        # Derivatives self.wavelengths
+        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.wavelengths );
+        dQc[:,:,1] = np.zeros(Qc.shape)
+        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
+        
+        # Derivatives self.lengthscales        
+        dF[:,:,2] = np.zeros(F.shape)
+        dQc[:,:,2] = np.zeros(Qc.shape)
+        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
+        
+
+        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
+        
+        
+        
+        
+def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
+    """
+    Calculate the coefficients q_j^2 for the covariance function 
+    approximation:
+    
+        k(\tau) =  \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
+    
+    Reference is:
+
+    [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic 
+        covariance functions and state space models. In Proceedings of the 
+        Seventeenth International Conference on Artifcial Intelligence and 
+        Statistics (AISTATS 2014). JMLR: W&CP, volume 33.    
+    
+    Note! Only the infinite approximation (through Bessel function) 
+          is currently implemented.
+
+    Input:
+    ----------------
+    
+    m: int
+        Degree of approximation. Default 6.
+    lengthScale: float
+        Length scale parameter in the kerenl
+    magnSigma2:float
+        Multiplier in front of the kernel.
+        
+    
+    Output:
+    -----------------
+    
+    coeffs: array(m+1)
+        Covariance series coefficients
+    
+    coeffs_dl: array(m+1)
+        Derivatives of the coefficients with respect to lengthscale.
+    
+    """
+    
+    if true_covariance:
+        
+        bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
+            sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
+            np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
+                
+        M,J = np.meshgrid(range(0,m+1),range(0,m+1))
+        
+        coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
+             (lengthScale**(-2))**J  *magnSigma2
+        
+        coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)         
+        
+        coeffs = np.sum(coeffs,0)
+        
+    else:
+        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
+        if np.any( np.isnan(coeffs)):
+            pass
+        coeffs[0] = 0.5*coeffs[0]
+        
+        # Derivatives wrt (lengthScale)
+        coeffs_dl = np.zeros(m+1)
+        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
+        (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )    
+            
+        # The first element
+        coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
+            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
+        
+
+    return coeffs, coeffs_dl
diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py
index 8f91d639..46e86e13 100644
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@@ -263,4 +263,82 @@ class Add(CombinationKernel):
                 i_s[k._all_dims_active] += k.input_sensitivity(summarize)
             return i_s
         else:
+
             return super(Add, self).input_sensitivity(summarize)
+            
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+        part_start_param_index = 0
+        for p in self.parts:
+            if not p.is_fixed:
+                part_param_num = len(p.param_array) # number of parameters in the part
+                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
+                part_start_param_index += part_param_num
+    
+    def sde(self):
+        """
+        Support adding kernels for sde representation
+        """
+        
+        import scipy.linalg as la
+
+        F     = None
+        L     = None
+        Qc    = None
+        H     = None
+        Pinf  = None
+        dF    = None
+        dQc   = None
+        dPinf = None    
+        n = 0
+        nq = 0
+        nd = 0
+
+         # Assign models
+        for p in self.parts:
+            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
+            F = la.block_diag(F,Ft) if (F is not None) else Ft
+            L = la.block_diag(L,Lt) if (L is not None) else Lt
+            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
+            H = np.hstack((H,Ht)) if (H is not None) else Ht
+             
+            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
+             
+            if dF is not None:
+                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
+                        'constant', constant_values=0)
+                dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
+            else:
+                dF = dFt
+             
+            if dQc is not None:
+                dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
+                        'constant', constant_values=0)
+                dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
+            else:
+                dQc = dQct
+             
+            if dPinf is not None:
+                dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
+                        'constant', constant_values=0)
+                dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
+            else:
+                dPinf = dPinft
+             
+            n += Ft.shape[0]
+            nq += Qct.shape[0]
+            nd += dFt.shape[2]
+        
+        assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
+        assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
+        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
+        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
+        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"        
+        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
+        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
+        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
+        
+        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py
index e690dfc4..f170ec1b 100644
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@@ -305,51 +305,6 @@ class Kern(Parameterized):
     def _check_active_dims(self, X):
         assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
 
-    def sde(self):
-         # TODO: should support adding kernels together
-
-         #raise NameError('Problem')
-
-         # Find out state dimensions
-         n = 0
-         nq = 0
-         nd = 0
-         for p in self.parts:
-             (F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde()
-             n  += F.shape[0]
-             nq += Qc.shape[0]
-             nd += dF.shape[2]
-
-         # Allocate space for the matrices
-         F     = np.zeros((n,n))
-         L     = np.zeros((n,nq))
-         Qc    = np.zeros((nq,nq))
-         H     = np.zeros((1,n))
-         Pinf  = np.zeros((n,n))
-         dF    = np.zeros((n,n,nd))
-         dQc   = np.zeros((nq,nq,nd))
-         dPinf = np.zeros((n,n,nd))
-         n = 0
-         nq = 0
-         nd = 0
-
-         # Assign models
-         for p in self.parts:
-             (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
-             F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft
-             L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt
-             Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct
-             H[0,n:n+Ht.shape[1]] = Ht
-             Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft
-             dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt
-             dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct
-             dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft
-             n += Ft.shape[0]
-             nq += Qct.shape[0]
-             nd += dFt.shape[2]
-
-         return (F,L,Qc,H,Pinf,dF,dQc,dPinf) 
-
 class CombinationKernel(Kern):
     """
     Abstract super class for combination kernels.
diff --git a/GPy/kern/src/prod.py b/GPy/kern/src/prod.py
index 1e18b405..2b2588a0 100644
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@@ -105,3 +105,110 @@ class Prod(CombinationKernel):
             return i_s
         else:
             return super(Prod, self).input_sensitivity(summarize)
+
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+        part_start_param_index = 0
+        for p in self.parts:
+            if not p.is_fixed:
+                part_param_num = len(p.param_array) # number of parameters in the part
+                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
+                part_start_param_index += part_param_num
+                
+    def sde(self):
+        """
+        """
+        F      = np.array((0,), ndmin=2)
+        L      = np.array((1,), ndmin=2)
+        Qc     = np.array((1,), ndmin=2)
+        H      = np.array((1,), ndmin=2)
+        Pinf   = np.array((1,), ndmin=2)
+        dF     = None
+        dQc    = None
+        dPinf  = None
+          
+         # Assign models
+        for p in self.parts:
+            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
+            
+            # check derivative dimensions ->
+            number_of_parameters = len(p.param_array)            
+            assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
+            assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
+            assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
+            # check derivative dimensions <-
+            
+            # exception for periodic kernel
+            if (p.name == 'std_periodic'):
+                Qct = P_inft  
+                dQct = dP_inft                 
+            
+            dF    = dkron(F,dF,Ft,dFt,'sum')
+            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
+            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
+             
+            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
+            L    = np.kron(L,Lt)
+            Qc   = np.kron(Qc,Qct)
+            Pinf = np.kron(Pinf,P_inft)
+            H    = np.kron(H,Ht)
+            
+        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+
+def dkron(A,dA,B,dB, operation='prod'):
+    """
+    Function computes the derivative of Kronecker product A*B 
+    (or Kronecker sum A+B).
+    
+    Input:
+    -----------------------
+    
+    A: 2D matrix
+        Some matrix 
+    dA: 3D (or 2D matrix)
+        Derivarives of A
+    B: 2D matrix
+        Some matrix 
+    dB: 3D (or 2D matrix)
+        Derivarives of B    
+    
+    operation: str 'prod' or 'sum'
+        Which operation is considered. If the operation is 'sum' it is assumed
+        that A and are square matrices.s
+    
+    Output:
+        dC: 3D matrix
+        Derivative of Kronecker product A*B (or Kronecker sum A+B)
+    """
+    
+    if dA is None:
+        dA_param_num = 0
+        dA = np.zeros((A.shape[0], A.shape[1],1))
+    else:
+        dA_param_num = dA.shape[2]
+    
+    if dB is None:
+        dB_param_num = 0
+        dB = np.zeros((B.shape[0], B.shape[1],1))    
+    else:
+        dB_param_num = dB.shape[2]
+
+    # Space allocation for derivative matrix
+    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))    
+    
+    for k in range(dA_param_num):
+        if operation == 'prod':
+            dC[:,:,k] = np.kron(dA[:,:,k],B);
+        else:
+            dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
+            
+    for k in range(dB_param_num):
+        if operation == 'prod':
+            dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
+        else:
+            dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
+            
+    return dC
diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py
index b903ef34..6273423b 100644
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@@ -723,7 +723,7 @@ class DescreteStateSpace(object):
                                          v*v / S)
                      log_likelihood_update = log_likelihood_update[0,0] # to make int
                      if np.isnan(log_likelihood_update):
-                         pass
+                         raise ValueError("Errrrr 1")
                  LL = None; islower = None
              else:
                  LL,islower = linalg.cho_factor(S)
@@ -806,7 +806,7 @@ class DescreteStateSpace(object):
              P_upd = K.dot(S).dot(K.T)
              P_upd = 0.5*(P_upd + P_upd.T)
              P_upd =  P_pred - P_upd# this update matrix is symmetric
-        
+         
              return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
     
     @staticmethod
@@ -1278,7 +1278,7 @@ class ContDescrStateSpace(DescreteStateSpace):
             dQc = None
             dH = None
             dR = None
-        
+            
             
         # TODO: Defaults for m_init, P_init, dm_init, dP_init. !!!
         # Also for dH, dR and probably for all derivatives
diff --git a/GPy/models/state_space_new.py b/GPy/models/state_space_new.py
index 567ee434..d3d17e66 100644
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@@ -25,6 +25,7 @@ import pylab as pb
 from GPy.core.parameterization.param import Param
 
 import GPy
+from .. import likelihoods
 import GPy.models.state_space_main as ssm
 #import state_space_main as ssm
 reload(ssm)
@@ -45,17 +46,16 @@ class StateSpace(Model):
         self.Y = Y[sort_index]
 
         # Noise variance
-        self.sigma2 = Param('Gaussian_noise', sigma2)
-        self.link_parameter(self.sigma2)
-
+        self.likelihood = likelihoods.Gaussian()
+        
         # Default kernel
         if kernel is None:
             self.kern = kern.Matern32(1)
         else:
             self.kern = kernel
         self.link_parameter(self.kern)
-
-        self.sigma2.constrain_positive()
+        self.link_parameter(self.likelihood)
+        self.posterior = None
 
         # Assert that the kernel is supported
         if not hasattr(self.kern, 'sde'):
@@ -98,7 +98,7 @@ class StateSpace(Model):
         grad_calc_params['dR'] = dR
         
         (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
                                       P_init=None, calc_log_likelihood=True, 
                                       calc_grad_log_likelihood=True, 
                                       grad_params_no=grad_params_no, 
@@ -106,9 +106,10 @@ class StateSpace(Model):
                       
         self._log_marginal_likelihood = log_likelihood
         #gradients  = self.compute_gradients()
-        self.sigma2.gradient_full[:] = grad_log_likelihood[-1,0]
-        self.kern.gradient_full[:] = grad_log_likelihood[:-1,0]
-
+        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
+        
+        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
+        
     def log_likelihood(self):
         return self._log_marginal_likelihood
 
@@ -319,4 +320,4 @@ class StateSpace(Model):
             f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
 
         # Return values
-        return f
\ No newline at end of file
+        return f