ENH: Adding SDE representation of addition, sumation and standard periodic kernel.

All changes have been tested tests are added in later commits.
2026-05-08 11:32:39 +02:00 · 2015-05-19 14:03:12 +03:00 · 2015-05-19 14:03:12 +03:00 · 06a7fedd22
commit 06a7fedd22
parent 00e95f957d
8 changed files with 477 additions and 82 deletions
--- a/GPy/examples/state_space.py
+++ b/GPy/examples/state_space.py
@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
 import GPy.models.state_space_new as SS_new
-X = np.linspace(0, 10, 4000)[:, None]
+X = np.linspace(0, 10, 2000)[:, None]
 Y = np.sin(X) + np.random.randn(*X.shape)*0.1
 # Need to run these lines when X and Y are imported ->
@ -23,14 +23,14 @@ Y = np.sin(X) + np.random.randn(*X.shape)*0.1
 #plt.plot( X, Y)
 #plt.show()
-#kernel = GPy.kern.Matern32(X.shape[1])
+kernel = GPy.kern.Matern32(X.shape[1])
-#m = GPy.models.StateSpace(X,Y, kernel)
+m = GPy.models.StateSpace(X,Y, kernel)
 print m
 #
-#print m
+m.optimize(optimizer='bfgs',messages=True)
-##
+#
-#m.optimize(optimizer='bfgs',messages=True)
+print m
 ##
 #print m
 kernel1 = GPy.kern.Matern32(X.shape[1])
 m1  = GPy.models.GPRegression(X,Y, kernel1)
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@ -20,6 +20,14 @@ class sde_Matern32(Matern32):
       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
    def sde(self): 
        """ 
@ -28,6 +36,7 @@ class sde_Matern32(Matern32):
        variance = float(self.variance.values)
        lengthscale = float(self.lengthscale.values)
        foo  = np.sqrt(3.)/lengthscale 
        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
        L    = np.array([[0], [1]]) 
@ -71,16 +80,125 @@ class sde_Matern52(Matern52):
       k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        # Arno, insert your code here
+        variance = float(self.variance.values)
        lengthscale = float(self.lengthscale.values)
-        # Params to use:
+        lamda = np.sqrt(5.0)/lengthscale
-        # self.lengthscale
+        kappa = 5.0/3.0*variance/lengthscale**2        
        # self.variance
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
+        F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
        L = np.array(((0,),(0,),(1,)))
        Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
        H = np.array(((1,0,0),))        
        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
        # Allocate space for the derivatives         
        dF = np.empty((3,3,2))        
        dQc = np.empty((1,1,2))        
        dPinf = np.empty((3,3,2))
         # The partial derivatives 
        dFvariance = np.zeros((3,3))
        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 
                                   30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
        dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))        
        dPinf_variance = Pinf/variance
        kappa2 = -2.0*kappa/lengthscale
        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, 
                                    0,-100*variance/lengthscale**5)))        
        # Combine the derivatives 
        dF[:,:,0] = dFvariance
        dF[:,:,1] = dFlengthscale        
        dQc[:,:,0] = dQcvariance         
        dQc[:,:,1] = dQclengthscale        
        dPinf[:,:,0] = dPinf_variance
        dPinf[:,:,1] = dPinf_lengthscale
 #        % Derivative of F w.r.t. parameter magnSigma2
 #    dFmagnSigma2    =  [0,  0,  0;
 #                        0,  0,  0;
 #                        0,  0,  0];
 #    
 #    % Derivative of F w.r.t parameter lengthScale
 #    dFlengthScale   =  [0,                          0,                  0;
 #                        0,                          0,                  0;
 #                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
 #    
 #    % Derivative of Qc w.r.t. parameter magnSigma2
 #    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
 #    
 #    % Derivative of Qc w.r.t. parameter lengthScale
 #    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
 #    
 #    % Derivative of Pinf w.r.t. parameter magnSigma2    
 #    dPinfmagnSigma2 = Pinf/magnSigma2;
 #    
 #    % Derivative of Pinf w.r.t. parameter lengthScale
 #    kappa2 = -2*kappa/lengthScale;
 #    dPinflengthScale = [0,          0,       -kappa2;
 #                        0,          kappa2,  0;
 #                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
 #  
 #    % Stack all derivatives
 #    dF = zeros(3,3,2);  
 #    dQc = zeros(1,1,2); 
 #    dPinf = zeros(3,3,2);
 #  
 #    dF(:,:,1) = dFmagnSigma2;
 #    dF(:,:,2) = dFlengthScale;
 #    dQc(:,:,1) = dQcmagnSigma2;
 #    dQc(:,:,2) = dQclengthScale;
 #    dPinf(:,:,1) = dPinfmagnSigma2;
 #    dPinf(:,:,2) = dPinflengthScale; 
 #        % Derived constants
 #          lambda = sqrt(5)/lengthScale;
 #        
 #          % Feedback matrix
 #          F = [ 0,          1,          0;
 #                0,          0,          1;
 #               -lambda^3, -3*lambda^2, -3*lambda];
 #        
 #          % Noise effect matrix
 #          L = [0; 0; 1];
 #        
 #          % Spectral density
 #          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
 #        
 #          % Observation model
 #          H = [1, 0, 0];
 #        %% Stationary covariance
 #          
 #          % Calculate Pinf only if requested
 #          if nargout > 4,
 #              
 #            % Derived constant
 #            kappa = 5/3*magnSigma2/lengthScale^2;
 #            
 #            % Stationary covariance
 #            Pinf = [magnSigma2, 0,      -kappa;
 #                    0,          kappa,  0;
 #                    -kappa,     0,      25*magnSigma2/lengthScale^4];
 #                
 #          end
        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@ -6,6 +6,9 @@ Stochastic Differential Equation (SDE) functionality.
 from .standard_periodic import StdPeriodic
 import numpy as np
 import scipy as sp
 from scipy import special as special
 class sde_StdPeriodic(StdPeriodic):
    """
@ -21,20 +24,153 @@ class sde_StdPeriodic(StdPeriodic):
       \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
        self.wavelengths.gradient = gradients[1]
        self.lengthscales.gradient = gradients[2]
    def sde(self): 
        """ 
        Return the state space representation of the covariance.
        ! Note: one must constrain lengthscale not to drop below 0.25.
        After this bessel functions of the first kind grows to very high.
        ! Note: one must keep wevelength also not very low. Because then
        the gradients wrt wavelength become ustable. 
        However this might depend on the data. For test example with
        300 data points the low limit is 0.15.
        """ 
-        # Arno, insert your code here
+        # Params to use: (in that order)
        # Params to use:
        #self.variance
        #self.wavelengths
        #self.lengthscales
        N = 7 # approximation order        
        # Arno, you could visualize the Latex version of the kernel formula
        # and assume inputs are 1D, so no ARD is used. Then use parameters aboove.        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        w0 = 2*np.pi/self.wavelengths # frequency
        [q2,dq2l] = seriescoeff(N,2*self.lengthscales,self.variance)        
        # lengthscale is multiplied by 2 because of slightly different
        # formula for periodic covariance function.
        # For the same reason:
        dq2l = 2*dq2l
        if np.any( np.isnan(q2)):
            raise ValueError("SDE periodic covariance error1")
        if np.any( np.isnan(dq2l)):
            raise ValueError("SDE periodic covariance error1")
        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
        L    = np.eye(2*(N+1))
        Qc   = np.zeros((2*(N+1), 2*(N+1)))
        P_inf = np.kron(np.diag(q2),np.eye(2))
        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
        # Derivatives
        dF = np.empty((F.shape[0], F.shape[1], 3))
        dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
        dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))         
        # Derivatives wrt self.variance
        dF[:,:,0] = np.zeros(F.shape)
        dQc[:,:,0] = np.zeros(Qc.shape)
        dP_inf[:,:,0] = P_inf / self.variance
        # Derivatives self.wavelengths
        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.wavelengths );
        dQc[:,:,1] = np.zeros(Qc.shape)
        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
        # Derivatives self.lengthscales        
        dF[:,:,2] = np.zeros(F.shape)
        dQc[:,:,2] = np.zeros(Qc.shape)
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
 def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
    """
    Calculate the coefficients q_j^2 for the covariance function 
    approximation:
        k(\tau) =  \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
    Reference is:
    [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic 
        covariance functions and state space models. In Proceedings of the 
        Seventeenth International Conference on Artifcial Intelligence and 
        Statistics (AISTATS 2014). JMLR: W&CP, volume 33.    
    Note! Only the infinite approximation (through Bessel function) 
          is currently implemented.
    Input:
    ----------------
    m: int
        Degree of approximation. Default 6.
    lengthScale: float
        Length scale parameter in the kerenl
    magnSigma2:float
        Multiplier in front of the kernel.
    Output:
    -----------------
    coeffs: array(m+1)
        Covariance series coefficients
    coeffs_dl: array(m+1)
        Derivatives of the coefficients with respect to lengthscale.
    """
    if true_covariance:
        bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
            sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
            np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
        M,J = np.meshgrid(range(0,m+1),range(0,m+1))
        coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
             (lengthScale**(-2))**J  *magnSigma2
        coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)         
        coeffs = np.sum(coeffs,0)
    else:
        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
        if np.any( np.isnan(coeffs)):
            pass
        coeffs[0] = 0.5*coeffs[0]
        # Derivatives wrt (lengthScale)
        coeffs_dl = np.zeros(m+1)
        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
        (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )    
        # The first element
        coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
    return coeffs, coeffs_dl
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -263,4 +263,82 @@ class Add(CombinationKernel):
                i_s[k._all_dims_active] += k.input_sensitivity(summarize)
            return i_s
        else:
            return super(Add, self).input_sensitivity(summarize)
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        part_start_param_index = 0
        for p in self.parts:
            if not p.is_fixed:
                part_param_num = len(p.param_array) # number of parameters in the part
                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
                part_start_param_index += part_param_num
    def sde(self):
        """
        Support adding kernels for sde representation
        """
        import scipy.linalg as la
        F     = None
        L     = None
        Qc    = None
        H     = None
        Pinf  = None
        dF    = None
        dQc   = None
        dPinf = None    
        n = 0
        nq = 0
        nd = 0
         # Assign models
        for p in self.parts:
            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
            F = la.block_diag(F,Ft) if (F is not None) else Ft
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht
            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
                        'constant', constant_values=0)
                dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
            else:
                dF = dFt
            if dQc is not None:
                dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
                        'constant', constant_values=0)
                dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
            else:
                dQc = dQct
            if dPinf is not None:
                dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
                        'constant', constant_values=0)
                dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
            else:
                dPinf = dPinft
            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]
        assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
        assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"        
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@ -305,51 +305,6 @@ class Kern(Parameterized):
    def _check_active_dims(self, X):
        assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
    def sde(self):
         # TODO: should support adding kernels together
         #raise NameError('Problem')
         # Find out state dimensions
         n = 0
         nq = 0
         nd = 0
         for p in self.parts:
             (F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde()
             n  += F.shape[0]
             nq += Qc.shape[0]
             nd += dF.shape[2]
         # Allocate space for the matrices
         F     = np.zeros((n,n))
         L     = np.zeros((n,nq))
         Qc    = np.zeros((nq,nq))
         H     = np.zeros((1,n))
         Pinf  = np.zeros((n,n))
         dF    = np.zeros((n,n,nd))
         dQc   = np.zeros((nq,nq,nd))
         dPinf = np.zeros((n,n,nd))
         n = 0
         nq = 0
         nd = 0
         # Assign models
         for p in self.parts:
             (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
             F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft
             L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt
             Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct
             H[0,n:n+Ht.shape[1]] = Ht
             Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft
             dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt
             dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct
             dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft
             n += Ft.shape[0]
             nq += Qct.shape[0]
             nd += dFt.shape[2]
         return (F,L,Qc,H,Pinf,dF,dQc,dPinf) 
 class CombinationKernel(Kern):
    """
    Abstract super class for combination kernels.
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -105,3 +105,110 @@ class Prod(CombinationKernel):
            return i_s
        else:
            return super(Prod, self).input_sensitivity(summarize)
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        part_start_param_index = 0
        for p in self.parts:
            if not p.is_fixed:
                part_param_num = len(p.param_array) # number of parameters in the part
                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
                part_start_param_index += part_param_num
    def sde(self):
        """
        """
        F      = np.array((0,), ndmin=2)
        L      = np.array((1,), ndmin=2)
        Qc     = np.array((1,), ndmin=2)
        H      = np.array((1,), ndmin=2)
        Pinf   = np.array((1,), ndmin=2)
        dF     = None
        dQc    = None
        dPinf  = None
         # Assign models
        for p in self.parts:
            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
            # check derivative dimensions ->
            number_of_parameters = len(p.param_array)            
            assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
            assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
            assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
            # check derivative dimensions <-
            # exception for periodic kernel
            if (p.name == 'std_periodic'):
                Qct = P_inft  
                dQct = dP_inft                 
            dF    = dkron(F,dF,Ft,dFt,'sum')
            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
            L    = np.kron(L,Lt)
            Qc   = np.kron(Qc,Qct)
            Pinf = np.kron(Pinf,P_inft)
            H    = np.kron(H,Ht)
        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
 def dkron(A,dA,B,dB, operation='prod'):
    """
    Function computes the derivative of Kronecker product A*B 
    (or Kronecker sum A+B).
    Input:
    -----------------------
    A: 2D matrix
        Some matrix 
    dA: 3D (or 2D matrix)
        Derivarives of A
    B: 2D matrix
        Some matrix 
    dB: 3D (or 2D matrix)
        Derivarives of B    
    operation: str 'prod' or 'sum'
        Which operation is considered. If the operation is 'sum' it is assumed
        that A and are square matrices.s
    Output:
        dC: 3D matrix
        Derivative of Kronecker product A*B (or Kronecker sum A+B)
    """
    if dA is None:
        dA_param_num = 0
        dA = np.zeros((A.shape[0], A.shape[1],1))
    else:
        dA_param_num = dA.shape[2]
    if dB is None:
        dB_param_num = 0
        dB = np.zeros((B.shape[0], B.shape[1],1))    
    else:
        dB_param_num = dB.shape[2]
    # Space allocation for derivative matrix
    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))    
    for k in range(dA_param_num):
        if operation == 'prod':
            dC[:,:,k] = np.kron(dA[:,:,k],B);
        else:
            dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
    for k in range(dB_param_num):
        if operation == 'prod':
            dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
        else:
            dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
    return dC
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@ -723,7 +723,7 @@ class DescreteStateSpace(object):
                                         v*v / S)
                     log_likelihood_update = log_likelihood_update[0,0] # to make int
                     if np.isnan(log_likelihood_update):
-                         pass
+                         raise ValueError("Errrrr 1")
                 LL = None; islower = None
             else:
                 LL,islower = linalg.cho_factor(S)
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@ -25,6 +25,7 @@ import pylab as pb
 from GPy.core.parameterization.param import Param
 import GPy
 from .. import likelihoods
 import GPy.models.state_space_main as ssm
 #import state_space_main as ssm
 reload(ssm)
@ -45,8 +46,7 @@ class StateSpace(Model):
        self.Y = Y[sort_index]
        # Noise variance
-        self.sigma2 = Param('Gaussian_noise', sigma2)
+        self.likelihood = likelihoods.Gaussian()
        self.link_parameter(self.sigma2)
        # Default kernel
        if kernel is None:
@ -54,8 +54,8 @@ class StateSpace(Model):
        else:
            self.kern = kernel
        self.link_parameter(self.kern)
-
+        self.link_parameter(self.likelihood)
-        self.sigma2.constrain_positive()
+        self.posterior = None
        # Assert that the kernel is supported
        if not hasattr(self.kern, 'sde'):
@ -98,7 +98,7 @@ class StateSpace(Model):
        grad_calc_params['dR'] = dR
        (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
                                      P_init=None, calc_log_likelihood=True, 
                                      calc_grad_log_likelihood=True, 
                                      grad_params_no=grad_params_no, 
@ -106,8 +106,9 @@ class StateSpace(Model):
        self._log_marginal_likelihood = log_likelihood
        #gradients  = self.compute_gradients()
-        self.sigma2.gradient_full[:] = grad_log_likelihood[-1,0]
+        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
-        self.kern.gradient_full[:] = grad_log_likelihood[:-1,0]
+        
        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
    def log_likelihood(self):
        return self._log_marginal_likelihood