ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
2026-05-08 19:42:39 +02:00 · 2015-07-14 16:44:21 +03:00 · 2015-07-14 16:44:21 +03:00 · 82cb626cd6
commit 82cb626cd6
parent 06a7fedd22
10 changed files with 1740 additions and 777 deletions
--- a/GPy/kern/_src/sde_brownian.py
+++ b/GPy/kern/_src/sde_brownian.py
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Classes in this module enhance Brownian motion covariance function with the
+Stochastic Differential Equation (SDE) functionality.
+"""
+
+from .brownian import Brownian
+
+import numpy as np
+
+class sde_Brownian(Brownian):
+    """
+    
+    Class provide extra functionality to transfer this covariance function into
+    SDE form.
+    
+    Linear kernel:
+
+    .. math::
+
+       k(x,y) = \sigma^2 min(x,y)
+
+    """
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
+    def sde(self): 
+        """ 
+        Return the state space representation of the covariance. 
+        """ 
+        
+        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
+        
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((1.0,),(0,)) )
+        Qc = np.array( ((variance,),) )
+        H = np.array( ((1.0,0),) )
+        
+        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
+        #P0 = Pinf.copy() 
+        P0 = np.zeros((2,2))   
+        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
+        dF = np.zeros((2,2,1))
+        dQc    = np.ones( (1,1,1) )
+        
+        dPinf = np.zeros((2,2,1))
+        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
+        #dP0 = dPinf.copy() 
+        dP0 = np.zeros((2,2,1))
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_linear.py
+++ b/GPy/kern/_src/sde_linear.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Linear covariance function with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .linear import Linear
@ -20,16 +20,45 @@ class sde_Linear(Linear):
       k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i

    """
+    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
+        """
+        Modify the init method, because one extra parameter is required. X - points
+        on the X axis.
+        """
+        
+        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
+        
+        self.t0 = np.min(X)
+        
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variances.gradient = gradients[0]
        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
+        t0 = float(self.t0)
        
-        # Params to use:
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((0,),(1.0,)) )
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,0),) )
        
-        # self.variances
+        Pinf   = np.zeros((2,2))
+        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
+        dF = np.zeros((2,2,1))
+        dQc    = np.zeros( (1,1,1) )
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        dPinf = np.zeros((2,2,1))
+        dP0 = np.zeros((2,2,1))
+        dP0[:,:,0]  = P0 / variance
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@ -38,25 +38,24 @@ class sde_Matern32(Matern32):
        lengthscale = float(self.lengthscale.values)
        
        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
-        L    = np.array([[0], [1]]) 
-        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) 
-        H    = np.array([[1, 0]]) 
-        Pinf = np.array([[variance, 0],  
-        [0,              3.*variance/(lengthscale**2)]]) 
+        F    = np.array(((0, 1), (-foo**2, -2*foo))) 
+        L    = np.array(( (0,), (1,) ))
+        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
+        H    = np.array(((1, 0),)) 
+        Pinf = np.array(((variance, 0), (0, 3.*variance/(lengthscale**2))))
+        P0 = Pinf.copy()
+        
        # Allocate space for the derivatives 
        dF    = np.empty([F.shape[0],F.shape[1],2])
        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
        # The partial derivatives 
-        dFvariance       = np.zeros([2,2]) 
-        dFlengthscale    = np.array([[0,0], 
-        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) 
-        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3]) 
-        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) 
-        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]]) 
-        dPinflengthscale = np.array([[0,0], 
-        [0,-6*variance/lengthscale**3]]) 
+        dFvariance       = np.zeros((2,2)) 
+        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
+        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
+        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
+        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
+        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
        # Combine the derivatives 
        dF[:,:,0]    = dFvariance 
        dF[:,:,1]    = dFlengthscale 
@ -64,8 +63,9 @@ class sde_Matern32(Matern32):
        dQc[:,:,1]   = dQclengthscale 
        dPinf[:,:,0] = dPinfvariance 
        dPinf[:,:,1] = dPinflengthscale 
+        dP0 = dPinf.copy()
        
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

 class sde_Matern52(Matern52):
    """
@ -106,7 +106,7 @@ class sde_Matern52(Matern52):
        H = np.array(((1,0,0),))        
        
        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        
+        P0 = Pinf.copy()
        # Allocate space for the derivatives         
        dF = np.empty((3,3,2))        
        dQc = np.empty((1,1,2))        
@ -130,75 +130,6 @@ class sde_Matern52(Matern52):
        dQc[:,:,1] = dQclengthscale        
        dPinf[:,:,0] = dPinf_variance
        dPinf[:,:,1] = dPinf_lengthscale
+        dP0 = dPinf.copy()
        
-#        % Derivative of F w.r.t. parameter magnSigma2
-#    dFmagnSigma2    =  [0,  0,  0;
-#                        0,  0,  0;
-#                        0,  0,  0];
-#    
-#    % Derivative of F w.r.t parameter lengthScale
-#    dFlengthScale   =  [0,                          0,                  0;
-#                        0,                          0,                  0;
-#                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
-#    
-#    % Derivative of Qc w.r.t. parameter magnSigma2
-#    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
-#    
-#    % Derivative of Qc w.r.t. parameter lengthScale
-#    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
-#    
-#    % Derivative of Pinf w.r.t. parameter magnSigma2    
-#    dPinfmagnSigma2 = Pinf/magnSigma2;
-#    
-#    % Derivative of Pinf w.r.t. parameter lengthScale
-#    kappa2 = -2*kappa/lengthScale;
-#    dPinflengthScale = [0,          0,       -kappa2;
-#                        0,          kappa2,  0;
-#                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
-#  
-#    % Stack all derivatives
-#    dF = zeros(3,3,2);  
-#    dQc = zeros(1,1,2); 
-#    dPinf = zeros(3,3,2);
-#  
-#    dF(:,:,1) = dFmagnSigma2;
-#    dF(:,:,2) = dFlengthScale;
-#    dQc(:,:,1) = dQcmagnSigma2;
-#    dQc(:,:,2) = dQclengthScale;
-#    dPinf(:,:,1) = dPinfmagnSigma2;
-#    dPinf(:,:,2) = dPinflengthScale; 
-  
-#        % Derived constants
-#          lambda = sqrt(5)/lengthScale;
-#        
-#          % Feedback matrix
-#          F = [ 0,          1,          0;
-#                0,          0,          1;
-#               -lambda^3, -3*lambda^2, -3*lambda];
-#        
-#          % Noise effect matrix
-#          L = [0; 0; 1];
-#        
-#          % Spectral density
-#          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
-#        
-#          % Observation model
-#          H = [1, 0, 0];
-  
-  
-#        %% Stationary covariance
-#          
-#          % Calculate Pinf only if requested
-#          if nargout > 4,
-#              
-#            % Derived constant
-#            kappa = 5/3*magnSigma2/lengthScale^2;
-#            
-#            % Stationary covariance
-#            Pinf = [magnSigma2, 0,      -kappa;
-#                    0,          kappa,  0;
-#                    -kappa,     0,      25*magnSigma2/lengthScale^4];
-#                
-#          end
-        
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@ -75,7 +75,7 @@ class sde_StdPeriodic(StdPeriodic):
        Qc   = np.zeros((2*(N+1), 2*(N+1)))
        P_inf = np.kron(np.diag(q2),np.eye(2))
        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        
+        P0 = P_inf.copy()
        
        # Derivatives
        dF = np.empty((F.shape[0], F.shape[1], 3))
@ -96,9 +96,9 @@ class sde_StdPeriodic(StdPeriodic):
        dF[:,:,2] = np.zeros(F.shape)
        dQc[:,:,2] = np.zeros(Qc.shape)
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
+        dP0 = dP_inf.copy()

-
-        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
+        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
        
        
        
--- a/GPy/kern/_src/sde_static.py
+++ b/GPy/kern/_src/sde_static.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Static covariance functions with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .static import White
@ -14,33 +14,7 @@ class sde_White(White):
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
    
-    Linear kernel:
-
-    .. math::
-
-       k(x,y) = \alpha
-
-    """
-
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        # Arno, insert your code here
-
-        # Params to use:
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
-
-class sde_Bias(Bias):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Linear kernel:
+    White kernel:

    .. math::

@ -48,14 +22,80 @@ class sde_Bias(Bias):

    """
    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        variance = float(self.variance.values) 
        
-        # Params to use:
-        # self.variance
+        F = np.array( ((-np.inf,),) )
+        L = np.array( ((1.0,),)  )
+        Qc = np.array( ((variance,),)  )
+        H = np.array( ((1.0,),) )
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        Pinf   = np.array( ((variance,),)  )
+        P0 = Pinf.copy()     
+        
+        dF = np.zeros((1,1,1))
+        dQc = np.zeros((1,1,1))
+        dQc[:,:,0]    = np.array( ((1.0,),) )
+        
+        dPinf = np.zeros((1,1,1))
+        dPinf[:,:,0] = np.array( ((1.0,),) )
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
+
+
+class sde_Bias(Bias):
+    """
+    
+    Class provide extra functionality to transfer this covariance function into
+    SDE forrm.
+    
+    Bias kernel:
+
+    .. math::
+
+       k(x,y) = \alpha
+
+    """
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
+    def sde(self): 
+        """ 
+        Return the state space representation of the covariance. 
+        """ 
+        variance = float(self.variance.values) 
+        
+        F = np.array( ((0.0,),))
+        L = np.array( ((1.0,),))
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,),))
+        
+        Pinf   = np.zeros((1,1))
+        P0 = np.array( ((variance,),) )      
+        
+        dF = np.zeros((1,1,1))
+        dQc    = np.zeros((1,1,1))
+        
+        dPinf = np.zeros((1,1,1))
+        dP0 = np.zeros((1,1,1))
+        dP0[:,:,0] = np.array( ((1.0,),) )
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_stationary.py
+++ b/GPy/kern/_src/sde_stationary.py
@ -8,6 +8,7 @@ from .stationary import Exponential
 from .stationary import RatQuad

 import numpy as np
+import scipy as sp

 class sde_RBF(RBF):
    """
@ -22,20 +23,87 @@ class sde_RBF(RBF):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }

    """
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]

    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        N = 10# approximation order ( number of terms in exponent series expansion)
+        roots_rounding_decimals = 6
        
-        # Params to use:
+        fn = np.math.factorial(N)        
        
-        # self.lengthscale
-        # self.variance
+        kappa = 1.0/2.0/self.lengthscale**2
       
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+       
+        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        
+        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
+            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
+        
+        pp = sp.poly1d(pp)
+        roots = sp.roots(pp)        
+        
+        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
+        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
+        
+        F = np.diag(np.ones((N-1,)),1)
+        F[-1,:] = -aa[-1:0:-1]
+        
+        L= np.zeros((N,1))
+        L[N-1,0] = 1
+        
+        H = np.zeros((1,N))
+        H[0,0] = 1
+        
+        # Infinite covariance:
+        Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
+        
+        # Allocating space for derivatives        
+        dF    = np.empty([F.shape[0],F.shape[1],2])
+        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
+        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
+        
+        # Derivatives:
+        dFvariance = np.zeros(F.shape)
+        dFlengthscale = np.zeros(F.shape)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+
+        dQcvariance = Qc/self.variance
+        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
+        
+        dPinf_variance = Pinf/self.variance
+        
+        lp = Pinf.shape[0]
+        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
+        coeff[np.mod(coeff,2) != 0] = 0
+        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        
+        dF[:,:,0]    = dFvariance 
+        dF[:,:,1]    = dFlengthscale 
+        dQc[:,:,0]   = dQcvariance 
+        dQc[:,:,1]   = dQclengthscale 
+        dPinf[:,:,0] = dPinf_variance 
+        dPinf[:,:,1] = dPinf_lengthscale
+        
+        # Benefits of this are unjustified
+        #import GPy.models.state_space_main as ssm
+        #(F, L, Qc, H, Pinf, dF, dQc, dPinf,T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        
+        P0 = Pinf.copy()
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

 class sde_Exponential(Exponential):
    """
@ -51,27 +119,45 @@ class sde_Exponential(Exponential):

    """
    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        F  = np.array([[-1/self.lengthscale]]) 
-        L  = np.array([[1]]) 
-        Qc = np.array([[2*self.variance/self.lengthscale]]) 
-        H = np.array([[1]]) 
-        Pinf = np.array([[self.variance]]) 
-        # TODO: return the derivatives as well 
+        variance = float(self.variance.values)
+        lengthscale = float(self.lengthscale)        
        
-        return (F, L, Qc, H, Pinf)
+        F  = np.array(((-1.0/lengthscale,),))
+        L  = np.array(((1.0,),)) 
+        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
+        H = np.array(((1,),)) 
+        Pinf = np.array(((variance,),)) 
+        P0 = Pinf.copy()        
        
-        # Arno, insert your code here
+        dF = np.zeros((1,1,2));  
+        dQc = np.zeros((1,1,2)); 
+        dPinf = np.zeros((1,1,2));
        
-        # Params to use:
+        dF[:,:,0] = 0.0        
+        dF[:,:,1] = 1.0/lengthscale**2
        
-        # self.lengthscale
-        # self.variance
+        dQc[:,:,0] = 2.0/lengthscale       
+        dQc[:,:,1] = -2.0*variance/lengthscale**2
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) 
+        dPinf[:,:,0] = 1.0
+        dPinf[:,:,1] = 0.0
+        
+        dP0 = dPinf.copy()        
+
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
        
 class sde_RatQuad(RatQuad):
    """
@ -92,7 +178,7 @@ class sde_RatQuad(RatQuad):
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        assert False, 'Not Implemented'
        
        # Params to use:

--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -290,22 +290,25 @@ class Add(CombinationKernel):
        Qc    = None
        H     = None
        Pinf  = None
+        P0    = None
        dF    = None
        dQc   = None
        dPinf = None
+        dP0   = None
        n = 0
        nq = 0
        nd = 0

         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
+            (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
            F = la.block_diag(F,Ft) if (F is not None) else Ft
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht
             
            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
+            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
            
            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
@ -328,6 +331,13 @@ class Add(CombinationKernel):
            else:
                dPinf = dPinft
                
+            if dP0 is not None:
+                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
+                        'constant', constant_values=0)
+                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
+            else:
+                dP0 = dP0t
+                
            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]
@ -337,8 +347,10 @@ class Add(CombinationKernel):
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
+        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"        
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
+        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
        
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -126,13 +126,15 @@ class Prod(CombinationKernel):
        Qc     = np.array((1,), ndmin=2)
        H      = np.array((1,), ndmin=2)
        Pinf   = np.array((1,), ndmin=2)
+        P0   = np.array((1,), ndmin=2)
        dF     = None
        dQc    = None
        dPinf  = None
+        dP0  = None
        
         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
+            (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
            
            # check derivative dimensions ->
            number_of_parameters = len(p.param_array)            
@ -149,14 +151,16 @@ class Prod(CombinationKernel):
            dF    = dkron(F,dF,Ft,dFt,'sum')
            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
+            dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
            
            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
            L    = np.kron(L,Lt)
            Qc   = np.kron(Qc,Qct)
            Pinf = np.kron(Pinf,P_inft)
+            P0 = np.kron(P0,P_inft)
            H    = np.kron(H,Ht)
            
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

 def dkron(A,dA,B,dB, operation='prod'):
    """
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@ -16,6 +16,7 @@

 import numpy as np
 from scipy import linalg
+from scipy import stats
 from ..core import Model
 from .. import kern
 from GPy.plotting.matplot_dep.models_plots import gpplot
@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param

 import GPy
 from .. import likelihoods
-import GPy.models.state_space_main as ssm
-#import state_space_main as ssm
-reload(ssm)
-print ssm.__file__
+
+from . import state_space_main as ssm

 class StateSpace(Model):
    def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
        super(StateSpace, self).__init__(name=name)
        self.num_data, input_dim = X.shape
        assert input_dim==1, "State space methods for time only"
-        num_data_Y, self.output_dim = Y.shape
+        if len(Y.shape) ==2: # TODO make this nice
+            num_data_Y, self.output_dim = Y.shape
+        elif len(Y.shape) ==3:
+            num_data_Y, self.output_dim, ts_number = Y.shape
        assert num_data_Y == self.num_data, "X and Y data don't match"
        assert self.output_dim == 1, "State space methods for single outputs only"

@ -68,7 +70,7 @@ class StateSpace(Model):
        """
        
        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
        
        # necessary parameters
        measurement_dim = self.output_dim
@ -78,17 +80,19 @@ class StateSpace(Model):
        dF    = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
        dQc   = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
        dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
+        dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
        
        # Assign the values for the kernel function
        dF[:,:,:-1] = dFt
        dQc[:,:,:-1] = dQct
        dP_inf[:,:,:-1] = dP_inft
+        dP0[:,:,:-1] = dP0t
        
        # The sigma2 derivative
        dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
        dR[:,:,-1] = np.eye(measurement_dim)

-
+        #(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
        # Use the Kalman filter to evaluate the likelihood
        
        grad_calc_params = {}
@ -96,26 +100,53 @@ class StateSpace(Model):
        grad_calc_params['dF'] = dF
        grad_calc_params['dQc'] = dQc
        grad_calc_params['dR'] = dR
+        grad_calc_params['dP_init'] = dP0
        
        (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=True, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
+                                      float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=True, 
                                      calc_grad_log_likelihood=True, 
                                      grad_params_no=grad_params_no, 
                                      grad_calc_params=grad_calc_params)
        
-        self._log_marginal_likelihood = log_likelihood
-        #gradients  = self.compute_gradients()
-        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
+        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
+        grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
+        self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
+        self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
        
-        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
+        self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
        
    def log_likelihood(self):
        return self._log_marginal_likelihood

-    def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
+    def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
        """
-        Inner function. It is called only from inside this class
+        Performs the actual prediction for new X points.
+        Inner function. It is called only from inside this class.
+        
+        Input:
+        ---------------------
+        
+        Xnews: vector or (n_points,1) matrix
+            New time points where to evaluate predictions.
+            
+        Ynews: (n_train_points, ts_no) matrix
+            This matrix can substitude the original training points (in order 
+            to use only the parameters of the model).
+            
+        filteronly: bool
+            Use only Kalman Filter for prediction. In this case the output does
+            not coincide with corresponding Gaussian process.
+            
+        Output:
+        --------------------
+        
+        m: vector
+            Mean prediction
+        
+        V: vector
+            Variance in every point
        """
        
        # Set defaults
@ -128,41 +159,44 @@ class StateSpace(Model):

        # Sort the matrix (save the order)
        _, return_index, return_inverse = np.unique(X,True,True)
-        X = X[return_index]
+        X = X[return_index] # TODO they are not used
        Y = Y[return_index]

        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
        state_dim = F.shape[0]        
        
+        #import pdb; pdb.set_trace()
+        #Y = self.Y[:, 0,0]
        # Run the Kalman filter
-        (M, P, tmp_log_likelihood, 
-         tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=False, 
+        #import pdb; pdb.set_trace()
+        (M, P, log_likelihood, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
+                                      F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=False, 
                                      calc_grad_log_likelihood=False)                              
-                                      
        # Run the Rauch-Tung-Striebel smoother
        if not filteronly:
            (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, 
                                AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
        
        # remove initial values        
-        M = M[:,1:]
-        P = P[:,:,1:]        
+        M = M[1:,:]
+        P = P[1:,:,:]        
        
        # Put the data back in the original order
-        M = M[:,return_inverse]
-        P = P[:,:,return_inverse]
+        M = M[return_inverse,:]
+        P = P[return_inverse,:,:]

        # Only return the values for Xnew
-        M = M[:,self.num_data:]
-        P = P[:,:,self.num_data:]
+        M = M[self.num_data:,:]
+        P = P[self.num_data:,:,:]

        # Calculate the mean and variance
-        m = H.dot(M).T
-        V = np.tensordot(H[0],P,(0,0))
-        V = np.tensordot(V,H[0],(0,0))
-        V = V[:,None]
+        m = np.dot(M,H.T)
+        V = np.einsum('ij,ajk,kl', H, P, H.T)
+        
+        V.shape = (V.shape[0], V.shape[1]) # remove the third dimension

        # Return the posterior of the state
        return (m, V)
@ -170,10 +204,10 @@ class StateSpace(Model):
    def predict(self, Xnew, filteronly=False):

        # Run the Kalman filter to get the state
-        (m, V) = self._predict_raw(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)

        # Add the noise variance to the state variance
-        V += self.sigma2
+        V += float(self.Gaussian_noise.variance)

        # Lower and upper bounds
        lower = m - 2*np.sqrt(V)
@ -182,142 +216,148 @@ class StateSpace(Model):
        # Return mean and variance
        return (m, V, lower, upper)
        
-    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
-            ax=None, resolution=None, plot_raw=False, plot_filter=False,
-            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
-
-        # Deal with optional parameters
-        if ax is None:
-            fig = pb.figure(num=fignum)
-            ax = fig.add_subplot(111)
-
-        # Define the frame on which to plot
-        resolution = resolution or 200
-        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-
-        # Make a prediction on the frame and plot it
-        if plot_raw:
-            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
-            lower = m - 2*np.sqrt(v)
-            upper = m + 2*np.sqrt(v)
-            Y = self.Y
-        else:
-            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
-            Y = self.Y
-
-        # Plot the values
-        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
-        ax.plot(self.X, self.Y, 'kx', mew=1.5)
-
-        # Optionally plot some samples
-        if samples:
-            if plot_raw:
-                Ysim = self.posterior_samples_f(Xgrid, samples)
-            else:
-                Ysim = self.posterior_samples(Xgrid, samples)
-            for yi in Ysim.T:
-                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
-
-        # Set the limits of the plot to some sensible values
-        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
-        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-        ax.set_xlim(xmin, xmax)
-        ax.set_ylim(ymin, ymax)
-
-    def prior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Allocate space for results
-        Y = np.empty((size,X.shape[0]))
-
-        # Simulate random draws
-        #for j in range(0,size):
-        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
-        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
-
-        # Only observations
-        Y = np.tensordot(H[0],Y,(0,0))
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return trajectory
-        return Y.T
-
-    def posterior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Run smoother on original data
-        (m,V) = self.predict_raw(X)
-
-        # Simulate random draws from the GP prior
-        y = self.prior_samples_f(np.vstack((self.X, X)),size)
-
-        # Allocate space for sample trajectories
-        Y = np.empty((size,X.shape[0]))
-
-        # Run the RTS smoother on each of these values
-        for j in range(0,size):
-            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
-            (m2,V2) = self.predict_raw(X,Ynew=yobs)
-            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return posterior sample trajectories
-        return Y.T
-
-    def posterior_samples(self, X, size=10):
-
-        # Make samples of f
-        Y = self.posterior_samples_f(X,size)
-
-        # Add noise
-        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
-
-        # Return trajectory
-        return Y
+    def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
+        mu, var = self._raw_predict(Xnew)
+        #import pdb; pdb.set_trace()
+        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
        
        
-    def simulate(self,F,L,Qc,Pinf,X,size=1):
-        # Simulate a trajectory using the state space model
-
-        # Allocate space for results
-        f = np.zeros((F.shape[0],size,X.shape[1]))
-
-        # Initial state
-        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
-
-        # Time step lengths
-        dt = np.empty(X.shape)
-        dt[:,0] = X[:,1]-X[:,0]
-        dt[:,1:] = np.diff(X)
-
-        # Solve the LTI SDE for these time steps
-        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
-
-        # Sweep through remaining time points
-        for k in range(1,X.shape[1]):
-
-            # Form discrete-time model
-            A = As[:,:,index[1-k]]
-            Q = Qs[:,:,index[1-k]]
-
-            # Draw the state
-            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
-
-        # Return values
-        return f
+#    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
+#            ax=None, resolution=None, plot_raw=False, plot_filter=False,
+#            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
+#
+#        # Deal with optional parameters
+#        if ax is None:
+#            fig = pb.figure(num=fignum)
+#            ax = fig.add_subplot(111)
+#
+#        # Define the frame on which to plot
+#        resolution = resolution or 200
+#        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+#
+#        # Make a prediction on the frame and plot it
+#        if plot_raw:
+#            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
+#            lower = m - 2*np.sqrt(v)
+#            upper = m + 2*np.sqrt(v)
+#            Y = self.Y
+#        else:
+#            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
+#            Y = self.Y
+#
+#        # Plot the values
+#        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
+#        ax.plot(self.X, self.Y, 'kx', mew=1.5)
+#
+#        # Optionally plot some samples
+#        if samples:
+#            if plot_raw:
+#                Ysim = self.posterior_samples_f(Xgrid, samples)
+#            else:
+#                Ysim = self.posterior_samples(Xgrid, samples)
+#            for yi in Ysim.T:
+#                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
+#
+#        # Set the limits of the plot to some sensible values
+#        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
+#        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+#        ax.set_xlim(xmin, xmax)
+#        ax.set_ylim(ymin, ymax)
+#
+#    def prior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Allocate space for results
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Simulate random draws
+#        #for j in range(0,size):
+#        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
+#        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
+#
+#        # Only observations
+#        Y = np.tensordot(H[0],Y,(0,0))
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return trajectory
+#        return Y.T
+#
+#    def posterior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Run smoother on original data
+#        (m,V) = self.predict_raw(X)
+#
+#        # Simulate random draws from the GP prior
+#        y = self.prior_samples_f(np.vstack((self.X, X)),size)
+#
+#        # Allocate space for sample trajectories
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Run the RTS smoother on each of these values
+#        for j in range(0,size):
+#            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
+#            (m2,V2) = self.predict_raw(X,Ynew=yobs)
+#            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return posterior sample trajectories
+#        return Y.T
+#
+#    def posterior_samples(self, X, size=10):
+#
+#        # Make samples of f
+#        Y = self.posterior_samples_f(X,size)
+#
+#        # Add noise
+#        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
+#
+#        # Return trajectory
+#        return Y
+#        
+#        
+#    def simulate(self,F,L,Qc,Pinf,X,size=1):
+#        # Simulate a trajectory using the state space model
+#
+#        # Allocate space for results
+#        f = np.zeros((F.shape[0],size,X.shape[1]))
+#
+#        # Initial state
+#        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
+#
+#        # Time step lengths
+#        dt = np.empty(X.shape)
+#        dt[:,0] = X[:,1]-X[:,0]
+#        dt[:,1:] = np.diff(X)
+#
+#        # Solve the LTI SDE for these time steps
+#        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
+#
+#        # Sweep through remaining time points
+#        for k in range(1,X.shape[1]):
+#
+#            # Form discrete-time model
+#            A = As[:,:,index[1-k]]
+#            Q = Qs[:,:,index[1-k]]
+#
+#            # Draw the state
+#            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
+#
+#        # Return values
+#        return f