diff --git a/GPy/kern/_src/sde_brownian.py b/GPy/kern/_src/sde_brownian.py
new file mode 100644
index 00000000..55950143
--- /dev/null
+++ b/GPy/kern/_src/sde_brownian.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Classes in this module enhance Brownian motion covariance function with the
+Stochastic Differential Equation (SDE) functionality.
+"""
+
+from .brownian import Brownian
+
+import numpy as np
+
+class sde_Brownian(Brownian):
+    """
+    
+    Class provide extra functionality to transfer this covariance function into
+    SDE form.
+    
+    Linear kernel:
+
+    .. math::
+
+       k(x,y) = \sigma^2 min(x,y)
+
+    """
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
+    def sde(self): 
+        """ 
+        Return the state space representation of the covariance. 
+        """ 
+        
+        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
+        
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((1.0,),(0,)) )
+        Qc = np.array( ((variance,),) )
+        H = np.array( ((1.0,0),) )
+        
+        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
+        #P0 = Pinf.copy() 
+        P0 = np.zeros((2,2))   
+        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
+        dF = np.zeros((2,2,1))
+        dQc    = np.ones( (1,1,1) )
+        
+        dPinf = np.zeros((2,2,1))
+        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
+        #dP0 = dPinf.copy() 
+        dP0 = np.zeros((2,2,1))
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
diff --git a/GPy/kern/_src/sde_linear.py b/GPy/kern/_src/sde_linear.py
index 48c4a169..031f0f5f 100644
--- a/GPy/kern/_src/sde_linear.py
+++ b/GPy/kern/_src/sde_linear.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Linear covariance function with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .linear import Linear
@@ -20,16 +20,45 @@ class sde_Linear(Linear):
        k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
 
     """
-
+    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
+        """
+        Modify the init method, because one extra parameter is required. X - points
+        on the X axis.
+        """
+        
+        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
+        
+        self.t0 = np.min(X)
+        
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variances.gradient = gradients[0]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
         
-        # Arno, insert your code here
-
-        # Params to use:
-
-        # self.variances
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
+        t0 = float(self.t0)
+        
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((0,),(1.0,)) )
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,0),) )
+        
+        Pinf   = np.zeros((2,2))
+        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
+        dF = np.zeros((2,2,1))
+        dQc    = np.zeros( (1,1,1) )
+        
+        dPinf = np.zeros((2,2,1))
+        dP0 = np.zeros((2,2,1))
+        dP0[:,:,0]  = P0 / variance
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
diff --git a/GPy/kern/_src/sde_matern.py b/GPy/kern/_src/sde_matern.py
index 0016c5b8..102a9200 100644
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@@ -38,25 +38,24 @@ class sde_Matern32(Matern32):
         lengthscale = float(self.lengthscale.values)
         
         foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
-        L    = np.array([[0], [1]]) 
-        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) 
-        H    = np.array([[1, 0]]) 
-        Pinf = np.array([[variance, 0],  
-        [0,              3.*variance/(lengthscale**2)]]) 
+        F    = np.array(((0, 1), (-foo**2, -2*foo))) 
+        L    = np.array(( (0,), (1,) ))
+        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
+        H    = np.array(((1, 0),)) 
+        Pinf = np.array(((variance, 0), (0, 3.*variance/(lengthscale**2))))
+        P0 = Pinf.copy()
+        
         # Allocate space for the derivatives 
         dF    = np.empty([F.shape[0],F.shape[1],2])
         dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
         dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
         # The partial derivatives 
-        dFvariance       = np.zeros([2,2]) 
-        dFlengthscale    = np.array([[0,0], 
-        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) 
-        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3]) 
-        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) 
-        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]]) 
-        dPinflengthscale = np.array([[0,0], 
-        [0,-6*variance/lengthscale**3]]) 
+        dFvariance       = np.zeros((2,2)) 
+        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
+        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
+        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
+        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
+        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
         # Combine the derivatives 
         dF[:,:,0]    = dFvariance 
         dF[:,:,1]    = dFlengthscale 
@@ -64,8 +63,9 @@ class sde_Matern32(Matern32):
         dQc[:,:,1]   = dQclengthscale 
         dPinf[:,:,0] = dPinfvariance 
         dPinf[:,:,1] = dPinflengthscale 
-
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 
 class sde_Matern52(Matern52):
     """
@@ -106,7 +106,7 @@ class sde_Matern52(Matern52):
         H = np.array(((1,0,0),))        
         
         Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        
+        P0 = Pinf.copy()
         # Allocate space for the derivatives         
         dF = np.empty((3,3,2))        
         dQc = np.empty((1,1,2))        
@@ -130,75 +130,6 @@ class sde_Matern52(Matern52):
         dQc[:,:,1] = dQclengthscale        
         dPinf[:,:,0] = dPinf_variance
         dPinf[:,:,1] = dPinf_lengthscale
+        dP0 = dPinf.copy()
         
-#        % Derivative of F w.r.t. parameter magnSigma2
-#    dFmagnSigma2    =  [0,  0,  0;
-#                        0,  0,  0;
-#                        0,  0,  0];
-#    
-#    % Derivative of F w.r.t parameter lengthScale
-#    dFlengthScale   =  [0,                          0,                  0;
-#                        0,                          0,                  0;
-#                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
-#    
-#    % Derivative of Qc w.r.t. parameter magnSigma2
-#    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
-#    
-#    % Derivative of Qc w.r.t. parameter lengthScale
-#    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
-#    
-#    % Derivative of Pinf w.r.t. parameter magnSigma2    
-#    dPinfmagnSigma2 = Pinf/magnSigma2;
-#    
-#    % Derivative of Pinf w.r.t. parameter lengthScale
-#    kappa2 = -2*kappa/lengthScale;
-#    dPinflengthScale = [0,          0,       -kappa2;
-#                        0,          kappa2,  0;
-#                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
-#  
-#    % Stack all derivatives
-#    dF = zeros(3,3,2);  
-#    dQc = zeros(1,1,2); 
-#    dPinf = zeros(3,3,2);
-#  
-#    dF(:,:,1) = dFmagnSigma2;
-#    dF(:,:,2) = dFlengthScale;
-#    dQc(:,:,1) = dQcmagnSigma2;
-#    dQc(:,:,2) = dQclengthScale;
-#    dPinf(:,:,1) = dPinfmagnSigma2;
-#    dPinf(:,:,2) = dPinflengthScale; 
-  
-#        % Derived constants
-#          lambda = sqrt(5)/lengthScale;
-#        
-#          % Feedback matrix
-#          F = [ 0,          1,          0;
-#                0,          0,          1;
-#               -lambda^3, -3*lambda^2, -3*lambda];
-#        
-#          % Noise effect matrix
-#          L = [0; 0; 1];
-#        
-#          % Spectral density
-#          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
-#        
-#          % Observation model
-#          H = [1, 0, 0];
-  
-  
-#        %% Stationary covariance
-#          
-#          % Calculate Pinf only if requested
-#          if nargout > 4,
-#              
-#            % Derived constant
-#            kappa = 5/3*magnSigma2/lengthScale^2;
-#            
-#            % Stationary covariance
-#            Pinf = [magnSigma2, 0,      -kappa;
-#                    0,          kappa,  0;
-#                    -kappa,     0,      25*magnSigma2/lengthScale^4];
-#                
-#          end
-        
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
\ No newline at end of file
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
\ No newline at end of file
diff --git a/GPy/kern/_src/sde_standard_periodic.py b/GPy/kern/_src/sde_standard_periodic.py
index 3829b816..3434cc74 100644
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@@ -75,7 +75,7 @@ class sde_StdPeriodic(StdPeriodic):
         Qc   = np.zeros((2*(N+1), 2*(N+1)))
         P_inf = np.kron(np.diag(q2),np.eye(2))
         H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        
+        P0 = P_inf.copy()
         
         # Derivatives
         dF = np.empty((F.shape[0], F.shape[1], 3))
@@ -96,9 +96,9 @@ class sde_StdPeriodic(StdPeriodic):
         dF[:,:,2] = np.zeros(F.shape)
         dQc[:,:,2] = np.zeros(Qc.shape)
         dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
-        
+        dP0 = dP_inf.copy()
 
-        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
+        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
         
         
         
diff --git a/GPy/kern/_src/sde_static.py b/GPy/kern/_src/sde_static.py
index 8d50e5eb..ae8ed194 100644
--- a/GPy/kern/_src/sde_static.py
+++ b/GPy/kern/_src/sde_static.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Static covariance functions with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .static import White
@@ -14,25 +14,47 @@ class sde_White(White):
     Class provide extra functionality to transfer this covariance function into
     SDE forrm.
     
-    Linear kernel:
+    White kernel:
 
     .. math::
 
-       k(x,y) = \alpha
+       k(x,y) = \alpha*\delta(x-y)
 
     """
-
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
         
-        # Arno, insert your code here
+        variance = float(self.variance.values) 
+        
+        F = np.array( ((-np.inf,),) )
+        L = np.array( ((1.0,),)  )
+        Qc = np.array( ((variance,),)  )
+        H = np.array( ((1.0,),) )
+        
+        Pinf   = np.array( ((variance,),)  )
+        P0 = Pinf.copy()     
+        
+        dF = np.zeros((1,1,1))
+        dQc = np.zeros((1,1,1))
+        dQc[:,:,0]    = np.array( ((1.0,),) )
+        
+        dPinf = np.zeros((1,1,1))
+        dPinf[:,:,0] = np.array( ((1.0,),) )
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 
-        # Params to use:
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
 
 class sde_Bias(Bias):
     """
@@ -40,22 +62,40 @@ class sde_Bias(Bias):
     Class provide extra functionality to transfer this covariance function into
     SDE forrm.
     
-    Linear kernel:
+    Bias kernel:
 
     .. math::
 
-       k(x,y) = \alpha*\delta(x-y)
+       k(x,y) = \alpha
 
     """
-
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
+        variance = float(self.variance.values) 
         
-        # Arno, insert your code here
+        F = np.array( ((0.0,),))
+        L = np.array( ((1.0,),))
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,),))
         
-        # Params to use:
-        # self.variance
+        Pinf   = np.zeros((1,1))
+        P0 = np.array( ((variance,),) )      
         
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
\ No newline at end of file
+        dF = np.zeros((1,1,1))
+        dQc    = np.zeros((1,1,1))
+        
+        dPinf = np.zeros((1,1,1))
+        dP0 = np.zeros((1,1,1))
+        dP0[:,:,0] = np.array( ((1.0,),) )
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
\ No newline at end of file
diff --git a/GPy/kern/_src/sde_stationary.py b/GPy/kern/_src/sde_stationary.py
index 0194ffb5..1dfa97a2 100644
--- a/GPy/kern/_src/sde_stationary.py
+++ b/GPy/kern/_src/sde_stationary.py
@@ -8,6 +8,7 @@ from .stationary import Exponential
 from .stationary import RatQuad
 
 import numpy as np
+import scipy as sp
 
 class sde_RBF(RBF):
     """
@@ -22,20 +23,87 @@ class sde_RBF(RBF):
         k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
 
     """
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
 
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
         
-        # Arno, insert your code here
-
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
+        N = 10# approximation order ( number of terms in exponent series expansion)
+        roots_rounding_decimals = 6
         
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        fn = np.math.factorial(N)        
+        
+        kappa = 1.0/2.0/self.lengthscale**2
+       
+        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+       
+        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        
+        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
+            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
+        
+        pp = sp.poly1d(pp)
+        roots = sp.roots(pp)        
+        
+        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
+        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
+        
+        F = np.diag(np.ones((N-1,)),1)
+        F[-1,:] = -aa[-1:0:-1]
+        
+        L= np.zeros((N,1))
+        L[N-1,0] = 1
+        
+        H = np.zeros((1,N))
+        H[0,0] = 1
+        
+        # Infinite covariance:
+        Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
+        
+        # Allocating space for derivatives        
+        dF    = np.empty([F.shape[0],F.shape[1],2])
+        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
+        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
+        
+        # Derivatives:
+        dFvariance = np.zeros(F.shape)
+        dFlengthscale = np.zeros(F.shape)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+
+        dQcvariance = Qc/self.variance
+        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
+        
+        dPinf_variance = Pinf/self.variance
+        
+        lp = Pinf.shape[0]
+        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
+        coeff[np.mod(coeff,2) != 0] = 0
+        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        
+        dF[:,:,0]    = dFvariance 
+        dF[:,:,1]    = dFlengthscale 
+        dQc[:,:,0]   = dQcvariance 
+        dQc[:,:,1]   = dQclengthscale 
+        dPinf[:,:,0] = dPinf_variance 
+        dPinf[:,:,1] = dPinf_lengthscale
+        
+        # Benefits of this are unjustified
+        #import GPy.models.state_space_main as ssm
+        #(F, L, Qc, H, Pinf, dF, dQc, dPinf,T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        
+        P0 = Pinf.copy()
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 
 class sde_Exponential(Exponential):
     """
@@ -50,29 +118,47 @@ class sde_Exponential(Exponential):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
 
     """
-
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
+        
     def sde(self): 
         """ 
         Return the state space representation of the covariance. 
         """ 
-        F  = np.array([[-1/self.lengthscale]]) 
-        L  = np.array([[1]]) 
-        Qc = np.array([[2*self.variance/self.lengthscale]]) 
-        H = np.array([[1]]) 
-        Pinf = np.array([[self.variance]]) 
-        # TODO: return the derivatives as well 
+        variance = float(self.variance.values)
+        lengthscale = float(self.lengthscale)        
         
-        return (F, L, Qc, H, Pinf)
+        F  = np.array(((-1.0/lengthscale,),))
+        L  = np.array(((1.0,),)) 
+        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
+        H = np.array(((1,),)) 
+        Pinf = np.array(((variance,),)) 
+        P0 = Pinf.copy()        
         
-        # Arno, insert your code here
+        dF = np.zeros((1,1,2));  
+        dQc = np.zeros((1,1,2)); 
+        dPinf = np.zeros((1,1,2));
         
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) 
+        dF[:,:,0] = 0.0        
+        dF[:,:,1] = 1.0/lengthscale**2
+        
+        dQc[:,:,0] = 2.0/lengthscale       
+        dQc[:,:,1] = -2.0*variance/lengthscale**2
+        
+        dPinf[:,:,0] = 1.0
+        dPinf[:,:,1] = 0.0
+        
+        dP0 = dPinf.copy()        
 
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
+        
 class sde_RatQuad(RatQuad):
     """
     
@@ -92,7 +178,7 @@ class sde_RatQuad(RatQuad):
         Return the state space representation of the covariance. 
         """ 
         
-        # Arno, insert your code here
+        assert False, 'Not Implemented'
         
         # Params to use:
 
@@ -100,4 +186,4 @@ class sde_RatQuad(RatQuad):
         # self.variance
         #self.power
         
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
\ No newline at end of file
+        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py
index 46e86e13..ec09f157 100644
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@@ -290,23 +290,26 @@ class Add(CombinationKernel):
         Qc    = None
         H     = None
         Pinf  = None
+        P0    = None
         dF    = None
         dQc   = None
-        dPinf = None    
+        dPinf = None
+        dP0   = None
         n = 0
         nq = 0
         nd = 0
 
          # Assign models
         for p in self.parts:
-            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
+            (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
             F = la.block_diag(F,Ft) if (F is not None) else Ft
             L = la.block_diag(L,Lt) if (L is not None) else Lt
             Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
             H = np.hstack((H,Ht)) if (H is not None) else Ht
              
             Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
-             
+            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
+            
             if dF is not None:
                 dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
                         'constant', constant_values=0)
@@ -327,7 +330,14 @@ class Add(CombinationKernel):
                 dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
             else:
                 dPinf = dPinft
-             
+                
+            if dP0 is not None:
+                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
+                        'constant', constant_values=0)
+                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
+            else:
+                dP0 = dP0t
+                
             n += Ft.shape[0]
             nq += Qct.shape[0]
             nd += dFt.shape[2]
@@ -336,9 +346,11 @@ class Add(CombinationKernel):
         assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
         assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
         assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
-        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"        
+        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
+        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"        
         assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
         assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
         assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
+        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
         
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
diff --git a/GPy/kern/src/prod.py b/GPy/kern/src/prod.py
index 2b2588a0..674365ba 100644
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@@ -126,13 +126,15 @@ class Prod(CombinationKernel):
         Qc     = np.array((1,), ndmin=2)
         H      = np.array((1,), ndmin=2)
         Pinf   = np.array((1,), ndmin=2)
+        P0   = np.array((1,), ndmin=2)
         dF     = None
         dQc    = None
         dPinf  = None
-          
+        dP0  = None
+        
          # Assign models
         for p in self.parts:
-            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
+            (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
             
             # check derivative dimensions ->
             number_of_parameters = len(p.param_array)            
@@ -149,14 +151,16 @@ class Prod(CombinationKernel):
             dF    = dkron(F,dF,Ft,dFt,'sum')
             dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
             dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
-             
+            dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
+            
             F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
             L    = np.kron(L,Lt)
             Qc   = np.kron(Qc,Qct)
             Pinf = np.kron(Pinf,P_inft)
+            P0 = np.kron(P0,P_inft)
             H    = np.kron(H,Ht)
             
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
 
 def dkron(A,dA,B,dB, operation='prod'):
     """
diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py
index 6273423b..066a2aa3 100644
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@@ -12,6 +12,9 @@ import numpy as np
 import scipy as sp
 import scipy.linalg as linalg
 
+
+print_verbose = False
+
 class DescreteStateSpace(object):
     """
     This class implents state-space inference for linear and non-linear
@@ -36,176 +39,231 @@ class DescreteStateSpace(object):
     """
     
     @staticmethod
-    def reshape_input_data(shape):
+    def _reshape_input_data(shape):
         """
-        Function returns the column-wise shape for for an input shape.
+        Static function returns the column-wise shape for for an input shape.
         
-        Inputs:
+        Input:
         --------------
             shape: tuple
                 Shape of an input array, so that it is always a column.
+            
+        Output:
+        --------------
+            new_shape: tuple
+                New shape of the measurements array. Idea is that samples are 
+                along dimension 0.
             old_shape: tuple or None
                 If the shape has been modified, return old shape, otherwise None
         """
     
     
-        if (len(shape) > 2):
-            raise ValueError("Input array is not supposed to be 3 or more dimensional")            
+        if (len(shape) > 3):
+            raise ValueError("Input array is not supposed to be more than 3 dimensional")            
         elif len(shape) == 1:
             return ((shape[0],1), shape)
-        else: # len(shape) == 2
-            return ((shape[1],1), shape) if (shape[0] == 1) else (shape,None)
+        elif len(shape) == 2:
+            return ((shape[1],1), shape) if (shape[0] == 1) else (shape,None)  # convert to column vector
+        else: # len(shape) == 3
+            return (shape,None) # do nothing
             
-    
-#    def __init__(self, X, Y, kernel):
-#        """
-#        Inputs:
-#        --------------
-#        
-#            X: double array (N_samples,1)
-#                Time points of the observations
-#            Y: double array (N_samples,N_dim)
-#                Observations
-#        """        
-#        
-#        # If X is 1D array make it column (x.shape[0],1)
-#        X.shape, old_Y_shape  =  DescreteStateSpace.reshape_input_data(X.shape)         
-#        Y.shape, old_X_shape  =  DescreteStateSpace.reshape_input_data(Y.shape)  
-#        
-#        self.num_data, input_dim = X.shape
-#        assert input_dim==1, "State space methods for time only"
-#        num_data_Y, self.output_dim = Y.shape
-#        assert num_data_Y == self.num_data, "Number of samples in X and Y don't match"
-#        assert self.output_dim == 1, "State space methods for single outputs only"
-#
-#        # Make sure the observations are ordered in time
-#        sort_index = np.argsort(X[:,0])
-#        self.X = X[sort_index]
-#        self.Y = Y[sort_index]
-#
-#        # Noise variance ??? - TODO: figure out
-#        self.sigma2 = 1 
-#        
-#        self.kern = kernel
-#
-#        # Make sure all parameters are positive
-#        #self.ensure_default_constraints()
-#
-#        # Assert that the kernel is supported
-#        #assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
-
     @classmethod
     def kalman_filter(cls,p_A, p_Q, p_H, p_R, Y, index = None, m_init=None,
                       P_init=None, calc_log_likelihood=False, 
                       calc_grad_log_likelihood=False, grad_params_no=None,
                       grad_calc_params=None):
         """
-        This function implements the basic (raw) Kalman Filter algorithm
-        These notations are assumed:
+        This function implements the basic Kalman Filter algorithm
+        These notations for the State-Space model are assumed:
             x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
             y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
             
         Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))            
             
-        Currently, H_{k} and R_{k} are not supposed to change over time.
-        I. e. They are assumed to be constant. Later they may be done time
-        dependent by analogy with A_{k} and Q_{k-1}.
+        Current Features:
+        ----------------------------------------
+        1) The function generaly do not modify the passed parameters. If 
+        it happens then it is an error. There are several exeprions: scalars
+        can be modified into a matrix, in some rare cases shapes of 
+        the derivatives matrices may be changed, it is ignored for now.
+        
+        2) Copies of p_A, p_Q, index are created in memory to be used later
+        in smoother. References to copies are kept in "matrs_for_smoother"
+        return parameter.
+        
+        3) Function support "multiple time series mode" which means that exactly
+        the same State-Space model is used to filter several sets of measurements.
+        In this case third dimension of Y should include these state-space measurements
+        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
 
+        4) Calculation of Grad_log_likelihood is not supported if matrices A,Q,
+        H, or R changes overf time. (later may be changed)
 
+        5) Measurement may include missing values. In this case update step is
+        not done for this measurement. (later may be changed)
+        
         Input:            
         -----------------
         
         p_A: scalar, square matrix, 3D array
             A_{k} in the model. If matrix then A_{k} = A - constant.
-            If it is 3D array then A_{k} = p_A[:,:, index[k]]            
+            If it is 3D array then A_{k} = p_A[:,:, index[0,k]]            
             
         p_Q: scalar, square symmetric matrix, 3D array
             Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant.
-            If it is 3D array then Q_{k-1} = p_Q[:,:, index[k]]
-        p_H: scalar, matrix
-            H_{k} = p_H. Assumed to be constant.
-        
-        p_R: scalar, square symmetric matrix
-            H_{k} = p_H. Assumed to be constant.
+            If it is 3D array then Q_{k-1} = p_Q[:,:, index[1,k]]
             
-        Y: matrix or vector
+        p_H: scalar, matrix (measurement_dim, state_dim) , 3D array
+            H_{k} in the model. If matrix then H_{k} = H - constant.
+            If it is 3D array then H_{k} = p_Q[:,:, index[2,k]]
+        
+        p_R: scalar, square symmetric matrix, 3D array
+            R_{k} in the model. If matrix then R_{k} = R - constant.
+            If it is 3D array then R_{k} = p_R[:,:, index[3,k]]
+            
+        Y: matrix or vector or 3D array
             Data. If Y is matrix then samples are along 0-th dimension and
-            features along the 1-st. May have missing values.
+            features along the 1-st. If 3D array then third dimension 
+            correspond to "multiple time series mode".
             
         index: vector
-            Which indices (on 3-rd dimension) from arrays p_A and p_Q to use
+            Which indices (on 3-rd dimension) from arrays p_A, p_Q,p_H, p_R to use
             on every time step. If this parameter is None then it is assumed 
-            that p_A and p_Q are matrices and indices are not needed.
+            that p_A, p_Q, p_H, p_R do not change over time and indices are not needed.
+            index[0,:] - correspond to A, index[1,:] - correspond to Q
+            index[2,:] - correspond to H, index[3,:] - correspond to R.
+            If index.shape[0] == 1, it is assumed that indides for all matrices
+            are the same.
             
-        p_mean: vector
-            Initial distribution mean. If None it is assumed to be zero
+        m_init: vector or matrix
+            Initial distribution mean. If None it is assumed to be zero.
+            For "multiple time series mode" it is matrix, second dimension of
+            which correspond to different time series. In regular case ("one
+            time series mode") it is a vector.
             
         P_init: square symmetric matrix or scalar
             Initial covariance of the states. If the parameter is scalar
             then it is assumed that initial covariance matrix is unit matrix 
             multiplied by this scalar. If None the unit matrix is used instead.
+            "multiple time series mode" does not affect it, since it does not
+            affect anything related to state variaces.
             
         calc_log_likelihood: boolean
             Whether to calculate marginal likelihood of the state-space model.
             
         calc_grad_log_likelihood: boolean
             Whether to calculate gradient of the marginal likelihood 
-            of the state-space model. If true then the next parameter must 
+            of the state-space model. If true then "grad_calc_params" parameter must 
             provide the extra parameters for gradient calculation.
             
         grad_params_no: int
-            Number of parameters 
+            If previous parameter is true, then this parameters gives the 
+            total number of parameters in the gradient. 
+        
+        grad_calc_params: dictionary
+            Dictionary with derivatives of model matrices with respect 
+            to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init".
+            They can be None, in this case zero matrices (no dependence on parameters)
+            is assumed. If there is only one parameter then third dimension is
+            automatically added.
         
         Output:
         --------------
         
-        M: (state_dim, no_steps) matrix 
-            Filter estimates of the state means
+        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
+            Filter estimates of the state means. In the extra step the initial
+            value is included. In the "multiple time series mode" third dimension
+            correspond to different timeseries.
             
-        P: (state_dim, state_dim, no_steps) 3D array
-            Filter estimates of the state covariances
+        P: (no_steps+1, state_dim, state_dim) 3D array
+            Filter estimates of the state covariances. In the extra step the initial
+            value is included.
         
-        log_likelihood: double
+        log_likelihood: double or (1, time_series_no) 3D array.
             If the parameter calc_log_likelihood was set to true, return
             logarithm of marginal likelihood of the state-space model. If
-            the parameter was false, return None
-            
+            the parameter was false, return None. In the "multiple time series mode" it is a vector
+            providing log_likelihood for each time series.
+        
+        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
+            If calc_grad_log_likelihood is true, return gradient of log likelihood
+            with respect to parameters. It returns it column wise, so in 
+            "multiple time series mode" gradients for each time series is in the 
+            corresponding column.
+        
+        matrs_for_smoother: dict
+            Dictionary with model functions for smoother. The intrinsic model
+            functions are computed in this functions and they are returned to
+            use in smoother for convenience. They are: 'p_a', 'p_f_A', 'p_f_Q'
+            The dictionary contains the same fields.
         """
     
         # Parameters checking ->
         # index        
         p_A = np.atleast_1d(p_A)
         p_Q = np.atleast_1d(p_Q)
-        if ((len(p_A.shape) >= 3) and (p_A.shape[2] != 1)) or \
-            ((len(p_Q.shape) >= 3) and (p_Q.shape[2] != 1)):
-            if index is None:
-                raise ValueError("A and Q can not change over time if indices are not given")
-            else:
-                A_or_Q_change_over_time = True
-        else:
-            index = np.zeros((Y.shape[0],) )
-            A_or_Q_change_over_time = False
-            
-        # p_A
-        (p_A, old_A_shape) = cls._check_A_matrix(p_A)
-
-        # p_Q
-        (p_Q,old_Q_shape) = cls._check_Q_matrix(p_Q)
-                    
-        # p_H
         p_H = np.atleast_1d(p_H)
-        (p_H,old_H_shape) = cls._check_H_matrix(p_H)
-        
-        # p_R
         p_R = np.atleast_1d(p_R)
-        (p_R,old_R_shape) = cls._check_R_matrix(p_R)
+        
+        # Reshape and check measurements:
+        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape)
+        measurement_dim = Y.shape[1]
+        if len(Y.shape) == 2: 
+            time_series_no = 1 # regular case
+        elif len(Y.shape) == 3:
+            time_series_no = Y.shape[2] # multiple time series mode
+        
+        if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\
+            ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\
+            ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
+            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
+            model_matrices_chage_with_time = True
+        else:
+            model_matrices_chage_with_time = False
+            
+        # Check index
+        old_index_shape = None
+        if index is None:
+            if (len(p_A.shape) == 3) or (len(p_Q.shape) == 3) or\
+                (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
+                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
+            else: # matrices do not change in time, so form dummy zero indices.
+                index = np.zeros((1,Y.shape[0]))
+        else:
+            if len(index.shape) == 1:
+                index.shape = (1,index.shape[0])
+                old_index_shape = (index.shape[0],)
+                
+            if (index.shape[1] != Y.shape[0]):   
+                raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}")
+                
+        if (index.shape[0] == 1):         
+            A_time_var_index = 0; Q_time_var_index = 0 
+            H_time_var_index = 0; R_time_var_index = 0 
+        elif (index.shape[0] == 4):
+            A_time_var_index = 0; Q_time_var_index = 1 
+            H_time_var_index = 2; R_time_var_index = 3
+        else:
+            raise ValueError("First Dimension of index must be either 1 or 4.")
         
         state_dim = p_A.shape[0]
+        # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension
+        # correspond to change in time.
+        (p_A, old_A_shape) = cls._check_SS_matrix(p_A, state_dim, measurement_dim, which='A')
+        (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q')
+        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
+        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
+        
         # m_init
         if m_init is None:
-            m_init = np.zeros((state_dim,1))
+            if (time_series_no == 1):
+                m_init = m_init = np.zeros((state_dim,1))
+            else:
+                # multiple time series mode.
+                m_init = np.zeros((state_dim, time_series_no))
         else:
-            m_init = np.atleast_2d(m_init).T
+            if (time_series_no == 1):
+                m_init = np.atleast_2d(m_init).T
         
         # P_init
         if P_init is None:
@@ -213,44 +271,44 @@ class DescreteStateSpace(object):
         elif not isinstance(P_init, collections.Iterable): #scalar
             P_init = P_init*np.eye(state_dim)
             
-        # Y
-        Y.shape, old_Y_shape  = cls.reshape_input_data(Y.shape)
-        if (Y.shape[0] != len(index)):
-            raise ValueError("Number of measurements must be equal the number of A_{k} and Q_{k}")
-                        
-        measurement_dim = Y.shape[1]
         # Functions to pass to the kalman_filter algorithm:
-        # PArameters:
+        # Parameters:
         # k - number of Kalman filter iteration
         # m - vector for calculating matrices. Required for EKF. Not used here.
+            
+        c_p_A = p_A.copy() # create a copy because this object is passed to the smoother
+        c_p_Q = p_A.copy() # create a copy because this object is passed to the smoother
+        c_index = index.copy() # create a copy because this object is passed to the smoother
+        
         fa = lambda k,m,A: np.dot(A, m)
-        f_A = lambda k,m,P: p_A[:,:, index[k]]
-        f_Q = lambda k: p_Q[:,:, index[k]]
+        f_A = lambda k,m,P: c_p_A[:,:, c_index[A_time_var_index, k]]
+        f_Q = lambda k: c_p_Q[:,:, c_index[Q_time_var_index, k]]
         fh = lambda k,m,H: np.dot(H, m)
-        f_H = lambda k,m,P: p_H
-        f_R = lambda k: p_R
+        f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]]
+        f_R = lambda k: p_R[:,:, index[R_time_var_index, k]]
         
         
         grad_calc_params_pass_further = None
         if calc_grad_log_likelihood:
-            if A_or_Q_change_over_time:
+            if model_matrices_chage_with_time:
                 raise ValueError("When computing likelihood gradient A and Q can not change over time.")    
             
             f_dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA')
-            
             f_dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ')
-            
             f_dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
-             
-            f_dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dR')
+            f_dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
             
             dm_init = grad_calc_params.get('dm_init')
             if dm_init is None:
-                dm_init = np.zeros( (state_dim,grad_params_no) )
-            
+                if (time_series_no == 1):
+                    dm_init = np.zeros((state_dim,grad_params_no))
+                else:
+                     # multiple time series mode. Keep grad_params always as a last dimension
+                    dm_init = np.zeros((state_dim, time_series_no, grad_params_no))
+                
             dP_init = grad_calc_params.get('dP_init')
             if dP_init is None:
-                dP_init = np.zeros( (state_dim,state_dim,grad_params_no) )
+                dP_init = np.zeros((state_dim,state_dim,grad_params_no))
             
             grad_calc_params_pass_further = {}
             grad_calc_params_pass_further['f_dA'] = f_dA
@@ -264,7 +322,14 @@ class DescreteStateSpace(object):
                           P_init, calc_log_likelihood=calc_log_likelihood, 
                           calc_grad_log_likelihood=calc_grad_log_likelihood, 
                           grad_calc_params=grad_calc_params_pass_further)
+                          
         # restore shapes so that input parameters are unchenged
+        if old_index_shape is not None:
+            index.shape = old_index_shape
+            
+        if old_Y_shape is not None:
+            Y.shape = old_Y_shape
+            
         if old_A_shape is not None:
             p_A.shape = old_A_shape
             
@@ -276,11 +341,14 @@ class DescreteStateSpace(object):
             
         if old_R_shape is not None:
             p_R.shape = old_R_shape
-            
-        if old_Y_shape is not None:
-            Y.shape = old_Y_shape
         # Return values
-        return (M, P,log_likelihood, grad_log_likelihood)
+            
+        matrs_for_smoother = {}
+        matrs_for_smoother['p_a'] = fa
+        matrs_for_smoother['p_f_A'] = f_A
+        matrs_for_smoother['p_f_Q'] = f_Q
+        
+        return (M, P,log_likelihood, grad_log_likelihood, matrs_for_smoother)
         
     @classmethod        
     def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None,
@@ -355,7 +423,7 @@ class DescreteStateSpace(object):
         """
         
         # Y
-        Y.shape, old_Y_shape  =  cls.reshape_input_data(Y.shape)
+        Y.shape, old_Y_shape  =  cls._reshape_input_data(Y.shape)
         
          # m_init
         if m_init is None:
@@ -449,12 +517,26 @@ class DescreteStateSpace(object):
         
         Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))            
         
+        Current Features:
+        ----------------------------------------
+        
+        1) Function support "multiple time series mode" which means that exactly
+        the same State-Space model is used to filter several sets of measurements.
+        In this case third dimension of Y should include these state-space measurements
+        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
+
+        2) Measurement may include missing values. In this case update step is
+        not done for this measurement. (later may be changed)
+        
         Input:            
         -----------------
+        state_dim: int
+            Demensionality of the states
+            
         p_a: function (k, x_{k-1}, A_{k}). Dynamic function.        
             k (iteration number), 
             x_{k-1} 
-            x_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
+            A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
             
         p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
             passed into p_a.
@@ -480,15 +562,21 @@ class DescreteStateSpace(object):
             on iteration k.
             k (iteration number). 
             
-        Y: matrix or vector
+        Y: matrix or vector or 3D array
             Data. If Y is matrix then samples are along 0-th dimension and
-            features along the 1-st. May have missing values.
+            features along the 1-st. If 3D array then third dimension 
+            correspond to "multiple time series mode".
             
-        m_init: vector
-            Initial distribution mean. Must be not None
+        m_init: vector or matrix
+            Initial distribution mean. For "multiple time series mode" 
+            it is matrix, second dimension of which correspond to different
+            time series. In regular case ("one time series mode") it is a 
+            vector.
             
         P_init: matrix or scalar
             Initial covariance of the states. Must be not None
+            "multiple time series mode" does not affect it, since it does not
+            affect anything related to state variaces.
             
         calc_log_likelihood: boolean
             Whether to calculate marginal likelihood of the state-space model.
@@ -498,25 +586,42 @@ class DescreteStateSpace(object):
             of the state-space model. If true then the next parameter must 
             provide the extra parameters for gradient calculation.
         
-        
+        grad_calc_params: dictionary
+            Dictionary with derivatives of model matrices with respect 
+            to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init".
+            
         Output:
         --------------
         
-        M: (state_dim, no_steps) matrix 
-            Filter estimates of the state means
+        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
+            Filter estimates of the state means. In the extra step the initial
+            value is included. In the "multiple time series mode" third dimension
+            correspond to different timeseries.
             
-        P: (state_dim, state_dim, no_steps) 3D array
-            Filter estimates of the state covariances
+        P: (no_steps+1, state_dim, state_dim) 3D array
+            Filter estimates of the state covariances. In the extra step the initial
+            value is included.
         
-        log_likelihood: double
+        log_likelihood: double or (1, time_series_no) 3D array.
             If the parameter calc_log_likelihood was set to true, return
             logarithm of marginal likelihood of the state-space model. If
-            the parameter was false, return None
+            the parameter was false, return None. In the "multiple time series mode" it is a vector
+            providing log_likelihood for each time series.
+        
+        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
+            If calc_grad_log_likelihood is true, return gradient of log likelihood
+            with respect to parameters. It returns it column wise, so in 
+            "multiple time series mode" gradients for each time series is in the 
+            corresponding column.
             
         """
             
         steps_no = Y.shape[0] # number of steps in the Kalman Filter
-        
+        if len(Y.shape) == 2: 
+            time_series_no = 1 # regular case
+        elif len(Y.shape) == 3:
+            time_series_no = Y.shape[2] # multiple time series mode
+            
         if calc_grad_log_likelihood:
             grad_calc_params_1 = (grad_calc_params['f_dA'], grad_calc_params['f_dQ'])
             grad_calc_params_2 = (grad_calc_params['f_dH'],grad_calc_params['f_dR'])
@@ -530,39 +635,39 @@ class DescreteStateSpace(object):
                     
         # Allocate space for results
         # Mean estimations. Initial values will be included
-        M = np.empty((state_dim,(steps_no+1)))
+        if (time_series_no == 1): # one time series mode
+            M = np.empty(((steps_no+1),state_dim))
+            M[0,:] = np.squeeze(m_init) # Initialize mean values
+        else:  # multiple time series mode
+            M = np.empty(((steps_no+1),state_dim,time_series_no))
+            M[0,:,:] = m_init # Initialize mean values
         # Variance estimations. Initial values will be included
-        P = np.empty((state_dim,state_dim,(steps_no+1)))
-
-        # Initialize
-        M[:,0] = m_init
-        P[:,:,0] = P_init
+        P = np.empty(((steps_no+1),state_dim,state_dim))
+        P[0,:,:] = P_init # Initialize initial covariance matrix
         
-        if calc_log_likelihood:
-            log_likelihood = 0
-        else:
-            log_likelihood = None
-            
-        if calc_grad_log_likelihood:
-            grad_log_likelihood = 0
-        else:
-            grad_log_likelihood = None
+        log_likelihood = 0 if calc_log_likelihood else None
+        grad_log_likelihood = 0 if calc_grad_log_likelihood else None
+        
+        #setting initial values for derivatives update
+        dm_upd = dm_init
+        dP_upd = dP_init
         # Main loop of the Kalman filter
         for k in range(0,steps_no):
             # In this loop index for new estimations is (k+1), old - (k)
-            # This happened because initial values are stored at 0-th index.                 
+            # This happened because initial values are stored at 0-th index.
+
+            if (time_series_no == 1): # single time series mode
+                k_measurment = Y[k,:].T # measurement as column
+            else: # multiple time series mode
+                k_measurment = Y[k,:,:]
             
-            if k == 0: #settinginitial values
-                dm_upd = dm_init
-                dP_upd = dP_init
-                
             m_pred, P_pred, dm_pred, dP_pred = \
-            cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], p_a, p_f_A, p_f_Q, 
+            cls._kalman_prediction_step(k, M[k,:] ,P[k,:,:], p_a, p_f_A, p_f_Q, 
                 calc_grad_log_likelihood=calc_grad_log_likelihood, 
                 p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = grad_calc_params_1)
             
             m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-            cls._kalman_update_step(k,  m_pred , P_pred, p_h, p_f_H, p_f_R, Y, 
+            cls._kalman_update_step(k,  m_pred , P_pred, p_h, p_f_H, p_f_R, k_measurment, 
                         calc_log_likelihood=calc_log_likelihood, 
                         calc_grad_log_likelihood=calc_grad_log_likelihood, 
                         p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = grad_calc_params_2)                
@@ -572,11 +677,16 @@ class DescreteStateSpace(object):
             
             if calc_grad_log_likelihood:
                 grad_log_likelihood += d_log_likelihood_update
-            
-            M[:,k+1] = m_upd
-            P[:,:,k+1] = P_upd
-        # Return values, get rid of initial values
-        #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
+                
+            if (time_series_no == 1): # single time series mode
+                M[k+1,:] = np.squeeze(m_upd)
+            else: # multiple time series mode
+                M[k+1,:,:] = m_upd # separate mean value for each time series
+                
+            P[k+1,:,:] = P_upd
+        
+        # !!!Print statistics! Print sizes of matrices
+        # !!!Print statistics! Print iteration time base on another boolean variable
         return (M, P, log_likelihood, grad_log_likelihood)
         
     @staticmethod    
@@ -591,7 +701,10 @@ class DescreteStateSpace(object):
                 number of measurements.
               
             p_m: matrix of size (state_dim, time_series_no)
-                Mean value from the previous step                
+                Mean value from the previous step. For "multiple time series mode" 
+                it is matrix, second dimension of which correspond to different
+                time series.
+                
             p_P:
                 Covariance matrix from the previous step.
                 
@@ -613,13 +726,28 @@ class DescreteStateSpace(object):
                 Whether to calculate gradient of the marginal likelihood 
                 of the state-space model. If true then the next parameter must 
                 provide the extra parameters for gradient calculation.
+                
             p_dm: 3D array (state_dim, time_series_no, parameters_no)
-                Mean derivatives from the previous step
+                Mean derivatives from the previous step. For "multiple time series mode" 
+                it is 3D array, second dimension of which correspond to different
+                time series.
                 
             p_dP: 3D array (state_dim, state_dim, parameters_no)
                 Mean derivatives from the previous step
+                
+            grad_calc_params_1: List or None
+            List with derivatives. The first component is 'f_dA' - function(k)
+            which returns the derivative of H. The second element is 'f_dQ'
+             - function(k). Function which returns the derivative of Q.
+             
+        Output:
+        ----------------------------
+        m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
+            Results of the prediction steps.        
             
         """
+        if len(p_m.shape)<2:
+            p_m.shape = (p_m.shape[0],1)
         
         # index correspond to values from previous iteration.
         A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian)
@@ -631,28 +759,35 @@ class DescreteStateSpace(object):
         P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance
         # Prediction step <-
         
+        if (p_m.shape[1] > 1):
+            multiple_ts_mode = True
+        else:
+            multiple_ts_mode = False
         
         if calc_grad_log_likelihood:
             p_f_dA = grad_calc_params_1[0]; dA_all_params = p_f_dA(k) # derivatives of A wrt parameters 
             p_f_dQ = grad_calc_params_1[1]; dQ_all_params = p_f_dQ(k) # derivatives of Q wrt parameters
-            dm_all_params = p_dm # derivarites from the previous step 
-            dP_all_params = p_dP 
             
             param_number = p_dP.shape[2]
             
-            dm_pred = np.empty(dm_all_params.shape)
-            dP_pred = np.empty(dP_all_params.shape)
+            # p_dm, p_dP - derivatives form the previoius step
+            dm_pred = np.empty(p_dm.shape)
+            dP_pred = np.empty(p_dP.shape)
             
             for j in range(param_number):
                 dA = dA_all_params[:,:,j]
                 dQ = dQ_all_params[:,:,j]
                 
+                dP = p_dP[:,:,j]
+                if (multiple_ts_mode == False):
+                    dm = p_dm[:,j]; dm.shape = (dm.shape[0],1)
+                    dm_pred[:,j] =  np.squeeze(np.dot(dA, p_m) + np.dot(A, dm)) # dm can 3-dim (dim,ts,variable)
+                elif (multiple_ts_mode == True): # modification for several time series
+                    dm = p_dm[:,:,j]
+                    dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm)
+                # prediction step derivatives for current parameter:
                 
-                dm = dm_all_params[:,j]                 
-                dP = dP_all_params[:,:,j]
                 
-                # prediction step derivatives for current parameter:            
-                dm_pred[:,j] =  np.dot(dA, p_m) + np.dot(A, dm)
                 dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T))
                 dP_pred[:,:,j] += dP_pred[:,:,j].T            
                 dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ
@@ -666,7 +801,7 @@ class DescreteStateSpace(object):
     
     
     @staticmethod
-    def _kalman_update_step(k,   p_m , p_P, p_h, p_f_H, p_f_R, Y, calc_log_likelihood= False, 
+    def _kalman_update_step(k,   p_m , p_P, p_h, p_f_H, p_f_R, measurement, calc_log_likelihood= False, 
                             calc_grad_log_likelihood=False, p_dm = None, p_dP = None, grad_calc_params_2 = None):
         """
         Input:
@@ -676,7 +811,9 @@ class DescreteStateSpace(object):
               number of measurements.
           
         m_P: matrix of size (state_dim, time_series_no)
-             Mean value from the previous step.
+             Mean value from the previous step. For "multiple time series mode" 
+                it is matrix, second dimension of which correspond to different
+                time series.
                 
         p_P:
              Covariance matrix from the previous step.
@@ -695,35 +832,73 @@ class DescreteStateSpace(object):
         p_f_R: function (k). Returns noise matrix of measurement equation 
             on iteration k.
             k (iteration number). starts at 0
-        calc_grad_log_likelihood: int
+            
+        measurement: (measurement_dim, time_series_no) matrix
+            One measurement used on the current update step. For 
+            "multiple time series mode" it is matrix, second dimension of 
+            which correspond to different time series.
+            
+        calc_log_likelihood: boolean
+            Whether to calculate marginal likelihood of the state-space model.
         
-        p_dm: array
-            Mean derivatives from the prediction step
+        calc_grad_log_likelihood: boolean
+                Whether to calculate gradient of the marginal likelihood 
+                of the state-space model. If true then the next parameter must 
+                provide the extra parameters for gradient calculation.
+        
+        p_dm: 3D array (state_dim, time_series_no, parameters_no)
+                Mean derivatives from the prediction step. For "multiple time series mode" 
+                it is 3D array, second dimension of which correspond to different
+                time series.
         
         p_dP: array
-            Covariance derivatives from the prediction step
+            Covariance derivatives from the prediction step.
+        
+        grad_calc_params_2: List or None
+            List with derivatives. The first component is 'f_dH' - function(k)
+            which returns the derivative of H. The second element is 'f_dR'
+             - function(k). Function which returns the derivative of R.
+        
+        Output:
+        ----------------------------
+        m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
+            Results of the prediction steps.
+            
+        log_likelihood_update: double or 1D array
+            Update to the log_likelihood from this step        
+        
+        d_log_likelihood_update: (grad_params_no, time_series_no) matrix
+            Update to the gradient of log_likelihood, "multiple time series mode"
+            adds extra columns to the gradient.
+        
         """        
         
-        m_pred = p_m
-        P_pred = p_P        
+        m_pred = p_m # from prediction step
+        P_pred = p_P # from prediction step      
         
         H = p_f_H(k, m_pred, P_pred)
         R = p_f_R(k)
-        measurement = Y[k,:].T # measurement as column
+        
+        if (p_m.shape[1] > 1):
+            multiple_ts_mode = True
+            time_series_no = p_m.shape[1] # number of time serieses
+        else:
+            time_series_no = 1
+            multiple_ts_mode = False
         
         log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
         # Update step (only if there is data)
         if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
              v = measurement-p_h(k, m_pred, H)
              S = H.dot(P_pred).dot(H.T) + R
-             if Y.shape[1]==1: # measurements are one dimensional
+             if measurement.shape[0]==1: # measurements are one dimensional
                  K = P_pred.dot(H.T) / S
                  if calc_log_likelihood:
                      log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
                                          v*v / S)
-                     log_likelihood_update = log_likelihood_update[0,0] # to make int
-                     if np.isnan(log_likelihood_update):
-                         raise ValueError("Errrrr 1")
+                     #log_likelihood_update = log_likelihood_update[0,0] # to make int
+                     if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
+                         raise ValueError("Nan values in likelihood update!")
                  LL = None; islower = None
              else:
                  LL,islower = linalg.cho_factor(S)
@@ -731,9 +906,9 @@ class DescreteStateSpace(object):
                  
                  if calc_log_likelihood:
                      log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + 
-                         2*np.sum( np.log(np.diag(LL)) ) + 
-                         np.dot(v.T, linalg.cho_solve((LL,islower),v)) )
-                     log_likelihood_update = log_likelihood_update[0,0] # to make int  
+                         2*np.sum( np.log(np.diag(LL)) ) +\
+                             np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v
+                     #log_likelihood_update = log_likelihood_update[0,0] # to make int  
                  
             
              if calc_grad_log_likelihood:
@@ -748,13 +923,19 @@ class DescreteStateSpace(object):
                  dm_upd = np.empty(dm_pred_all_params.shape)
                  dP_upd = np.empty(dP_pred_all_params.shape)
                  
-                 d_log_likelihood_update = np.empty((param_number,1))
+                 # firts dimension parameter_no, second - time series number
+                 d_log_likelihood_update = np.empty((param_number,time_series_no))
                  for param in range(param_number):
                 
                     dH = dH_all_params[:,:,param]
                     dR = dR_all_params[:,:,param]
                     
-                    dm_pred = dm_pred_all_params[:,param]
+                    if (multiple_ts_mode == False):    
+                        dm_pred = dm_pred_all_params[:,param]
+                    else:
+                        dm_pred = dm_pred_all_params[:,:,param]
+                        
+                        
                     dP_pred = dP_pred_all_params[:,:,param]
                 
                     # Terms in the likelihood derivatives
@@ -779,7 +960,11 @@ class DescreteStateSpace(object):
                          np.dot( P_pred, np.dot( tmp1, tmp3 ) )
                     
                     # terms required for the next step, save this for each parameter
-                    dm_upd[:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
+                    if (multiple_ts_mode == False):
+                        dm_upd[:,param] = dm_pred + np.squeeze(np.dot(dK, v) + np.dot(K, dv))
+                    else:
+                        dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
+                        
                     dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))      
                     dP_upd[:,:,param] += dP_upd[:,:,param].T
                     dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
@@ -793,9 +978,13 @@ class DescreteStateSpace(object):
                         #tmp4 = dv / S
                         tmp5 = v / S
                         
+                    
+                    d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
+                        np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) 
                         
-                    d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
-                    np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) 
+                    # Before  
+                    #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
+                    #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) 
                  
             
             
@@ -806,15 +995,17 @@ class DescreteStateSpace(object):
              P_upd = K.dot(S).dot(K.T)
              P_upd = 0.5*(P_upd + P_upd.T)
              P_upd =  P_pred - P_upd# this update matrix is symmetric
-         
+        
              return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
     
     @staticmethod
     def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, 
                                   p_P_prev_step, p_f_A):
         """
-        Input:
+        Rauch–Tung–Striebel(RTS) update step
         
+        Input:
+        -----------------------------
         k: int
               Iteration No. Starts at 0. Total number of iterations equal to the 
               number of measurements.
@@ -822,14 +1013,14 @@ class DescreteStateSpace(object):
         p_m: matrix of size (state_dim, time_series_no)
              Filter mean on step k
                 
-        p_P:
+        p_P:  matrix of size (state_dim,state_dim)
              Filter Covariance on step k
         
         p_m_pred: matrix of size (state_dim, time_series_no)
-             Prediction mean
+             Means from the smoother prediction step.
                 
         p_P_pred:
-             Prediction covariance:
+             Covariance from the smoother prediction step.
              
         p_m_prev_step
             Smoother mean from the previous step.            
@@ -844,15 +1035,30 @@ class DescreteStateSpace(object):
             P: parameter for Jacobian, usually covariance matrix.
 
         """        
-        
+        if len(p_m.shape)<2:
+            p_m.shape = (p_m.shape[0],1)
+            
+        if len(p_m_prev_step.shape)<2:
+            p_m_prev_step.shape = (p_m_prev_step.shape[0],1)
+            
         A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian)
         
         tmp = np.dot( A, p_P.T)
         if A.shape[0] == 1: # 1D states
             G = tmp.T / p_P_pred # P[:,:,k] is symmetric
         else:
-            LL,islower = linalg.cho_factor(p_P_pred)
-            G = linalg.cho_solve((LL,islower),tmp).T
+            try:
+                LL,islower = linalg.cho_factor(p_P_pred)
+                G = linalg.cho_solve((LL,islower),tmp).T
+            except:
+                # It happende that p_P_pred has several near zero eigenvalues
+                # hence the Cholessky method does not work.
+                res = sp.linalg.lstsq(p_P_pred, tmp)
+                G = res[0].T
+                #import pdb; pdb.set_trace()
+                #pass
+            
+            
         
         m_upd = p_m + G.dot( p_m_prev_step-p_m_pred )
         P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T)
@@ -862,7 +1068,7 @@ class DescreteStateSpace(object):
         return m_upd, P_upd
              
     @classmethod  
-    def _rts_smoother_raw(cls,state_dim, p_a, p_f_A, p_f_Q, filter_means, 
+    def rts_smoother(cls,state_dim, p_a, p_f_A, p_f_Q, filter_means, 
                           filter_covars):
         """
         This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
@@ -890,130 +1096,118 @@ class DescreteStateSpace(object):
         p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
             k (iteration number). starts at 0
             
-        filter_means:
-            Include initial values
+        filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 
+            Results of the Kalman Filter means estimation.
             
-        filter_covars
-            Include initial values
+        filter_covars: (no_steps+1, state_dim, state_dim) 3D array
+            Results of the Kalman Filter covariance estimation.
             
         Output:
         -------------
         
-        M: (state_dim, no_steps) matrix 
+        M: (no_steps+1, state_dim) matrix 
             Smoothed estimates of the state means
             
-        P: (state_dim, state_dim, no_steps) 3D array
+        P: (no_steps+1, state_dim, state_dim) 3D array
             Smoothed estimates of the state covariances
         """
 
-        no_steps = filter_covars.shape[-1] # number 
+        no_steps = filter_covars.shape[0] # number of elements in covariance matrix
       
         M = np.empty(filter_means.shape) # smoothed means
         P = np.empty(filter_covars.shape) # smoothed covars
         
-        M[:,-1] = filter_means[:,-1]
-        P[:,:,-1] = filter_covars[:,:,-1]
+        M[-1,:] = filter_means[-1,:]
+        P[-1,:,:] = filter_covars[-1,:,:]
         for k in range(no_steps-2,-1,-1): 
             
             m_pred, P_pred, tmp1, tmp2 = \
-                    cls._kalman_prediction_step(k, filter_means[:,k], 
-                                                filter_covars[:,:,k], p_a, p_f_A, p_f_Q, 
+                    cls._kalman_prediction_step(k, filter_means[k,:], 
+                                                filter_covars[k,:,:], p_a, p_f_A, p_f_Q, 
                                                 calc_grad_log_likelihood=False) 
             
             m_upd, P_upd = cls._rts_smoother_update_step(k, 
-                            filter_means[:,k] ,filter_covars[:,:,k], 
-                            m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], p_f_A)
+                            filter_means[k,:] ,filter_covars[k,:,:], 
+                            m_pred, P_pred, M[k+1,:] ,P[k+1,:,:], p_f_A)
                       
-            M[:,k] = m_upd
-            P[:,:,k] = P_upd
+            M[k,:] = np.squeeze(m_upd)
+            P[k,:,:] = P_upd
         # Return values
             
         return (M, P)
     
     @staticmethod
-    def _check_A_matrix(p_A):
+    def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'):
         """
-        Check A matrix so that it has right shape
+        Veryfy that on exit the matrix has appropriate shape for the KF algorithm.
+        
+        Input:
+            p_M: matrix 
+                As it is given for the user
+            state_dim: int
+                State dimensioanlity
+            measurement_dim: int
+                Measurement dimensionality
+            which: string
+                One of: 'A', 'Q', 'H', 'R'
+        Output:
+        ---------------
+            p_M: matrix of the right shape
+            
+            old_M_shape: tuple
+                Old Shape
         """
         
-        old_A_shape = None
-        if len(p_A.shape) < 3:         
-            old_A_shape = p_A.shape # save shape to restore it on exit
-            if len(p_A.shape) == 2: # matrix
-                p_A.shape = (p_A.shape[0],p_A.shape[1],1)
-                if p_A.shape[0] != p_A.shape[1]:
-                    raise ValueError("p_A must be a square matrix")
-                
-            elif len(p_A.shape) == 1: # scalar but in array already 
-                if (p_A.shape[0] != 1):
-                    raise ValueError("Parameter p_A is an 1D array, while it must be matrix or scalar")
+        old_M_shape = None
+        if len(p_M.shape) < 3: # new shape is 3 dimensional         
+            old_M_shape = p_M.shape # save shape to restore it on exit
+            if len(p_M.shape) == 2: # matrix
+                p_M.shape = (p_M.shape[0],p_M.shape[1],1)
+            elif len(p_M.shape) == 1: # scalar but in array already 
+                if (p_M.shape[0] != 1):
+                    raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which)
                 else:
-                    p_A.shape = (1,1,1)
+                    p_M.shape = (1,1,1)
                     
-        return (p_A,old_A_shape)
-        
-    @staticmethod
-    def _check_Q_matrix(p_Q):
-        """
-        Check Q matrix
-        """
-        
-        old_Q_shape = None
-        if len(p_Q.shape) < 3:         
-            old_Q_shape = p_Q.shape # save shape to restore it on exit
-            if len(p_Q.shape) == 2: # matrix
-                p_Q.shape = (p_Q.shape[0],p_Q.shape[1],1)
-                if p_Q.shape[0] != p_Q.shape[1]:
-                    raise ValueError("p_Q must be a square matrix")
-                    
-            elif len(p_Q.shape) == 1: # scalar but in array already 
-                if (p_Q.shape[0] != 1):
-                    raise ValueError("Parameter p_Q is an 1D array, while it must be matrix or scalar")
-                else:
-                    p_Q.shape = (1,1,1)
-                    
-        return (p_Q,old_Q_shape)
-        
-    @staticmethod 
-    def _check_H_matrix(p_H):
-        """
-        Check H matrix
-        """
-
-        old_H_shape = None
-        if (len(p_H.shape) == 1):
-            if p_H.shape[0] != 1:
-                raise ValueError("Ambiguity in the shape of p_H")
-            else:
-                old_H_shape = p_H.shape
-                p_H.shape = (1,1)
+        if (which == 'A') or (which == 'Q'):
+            if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim):
+                raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim))
+        if (which == 'H'):
+            if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != state_dim):
+                raise ValueError("H must be of shape (measurement_dim, state_dim) (%i,%i)" % (measurement_dim, state_dim))
+        if (which == 'R'):
+            if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim):
+                raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim))
                 
-        return (p_H, old_H_shape)
-        
-    @staticmethod        
-    def _check_R_matrix(p_R):
-        """
-        Check R matrix
-        """        
-        
-        old_R_shape = None
-        if (len(p_R.shape) == 1):
-            if p_R.shape[0] != 1:
-                raise ValueError("Ambiguity in the shape of p_H")
-            else:
-                old_R_shape = p_R.shape
-                p_R.shape = (1,1)
-                
-        return (p_R, old_R_shape)
+        return (p_M,old_M_shape)
         
     @staticmethod        
     def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'):
         """
-        Check dA, dQ matrices
+        Function checks (mostly check dimensions) matrices for marginal likelihood
+        gradient parameters calculation. It check dA, dQ matrices.
         
         Input:
+        -------------
+            dM: None, scaler or 3D matrix        
+                It is supposed to be (state_dim,state_dim,grad_params_no) matrix.
+                If None then zero matrix is assumed. If scalar then the function
+                checks consistency with "state_dim" and "grad_params_no".
+                                      
+            state_dim: int
+                State dimensionality
+                
+            grad_params_no: int
+                How many parrameters of likelihood gradient in total.
+                
+            which: string
+                'dA' or 'dQ'
             
-            which: string 'dA' or 'dQ'
+            
+        Output:
+        --------------
+            function of (k) which returns the parameters matrix.        
+        
         """        
         
         
@@ -1031,6 +1225,7 @@ class DescreteStateSpace(object):
                 raise ValueError("When computing likelihood gradient wrong %s dimension." % which)
             else:
                 dM = np.ones((1,1,1)) * dM
+                
         if not isinstance(dM, types.FunctionType):
             f_dM = lambda k: dM
         else:
@@ -1042,11 +1237,35 @@ class DescreteStateSpace(object):
     @staticmethod        
     def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'):
         """
-        Check dA, dQ matrices
+        Function checks (mostly check dimensions) matrices for marginal likelihood
+        gradient parameters calculation. It check dH, dR matrices.
         
         Input:
+        -------------
+            dM: None, scaler or 3D matrix        
+                It is supposed to be 
+                (measurement_dim ,state_dim,grad_params_no) for "dH" matrix.
+                (measurement_dim,measurement_dim,grad_params_no) for "dR"
+                
+                If None then zero matrix is assumed. If scalar then the function
+                checks consistency with "state_dim" and "grad_params_no".
+                                      
+            state_dim: int
+                State dimensionality
+                
+            grad_params_no: int
+                How many parrameters of likelihood gradient in total.
             
-            which: string 'dH' or 'dR'
+            measurement_dim: int
+                Dimensionality of measurements.
+                
+            which: string
+                'dH' or 'dR'
+            
+            
+        Output:
+        --------------
+            function of (k) which returns the parameters matrix. 
         """  
         
         if dM is None:
@@ -1093,22 +1312,48 @@ class ContDescrStateSpace(DescreteStateSpace):
     
     class AQcompute_once(object):
         """
-        Class for providing the functions for A and Q dA and dQ calculation
-        and for caching the result of the call
+        Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
+        from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
+        equation. dt - time steps.
+        
+        It has the same interface as AQcompute_batch.
+        
+        It computes matrices for only one time step. This object is used when
+        there are many different time steps and storing matrices for each of them
+        would take too much memory.
         """
         
         def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
             """
+            Constructor. All necessary parameters are passed here and stored 
+            in the opject.            
+            
             Input:
-            
+            -------------------
+                F, L, Qc, P_inf : matrices
+                    Parameters of corresponding continuous state model
                 dt: array
-                    Array of dt steps
-            """
-            self.dt = dt                    
-            self.F = F
-            self.L = L
-            self.Qc = Qc
+                    All time steps
+                compute_derivatives: bool
+                    Whether to calculate derivatives
+                    
+                dP_inf, dF, dQc: 3D array
+                    Derivatives if they are required
             
+            Output:
+            -------------------
+            Nothing
+            """
+            # Copies are done because this object is used later in smoother
+            # and these parameters must not change.
+            self.F = F.copy()
+            self.L = L.copy() 
+            self.Qc = Qc.copy()
+            
+            self.dt = dt # copy is not taken because dt is internal parameter
+            
+            # Parameters are used to calculate derivatives but derivatives 
+            # are not used in the smoother. Therefore copies are not taken.
             self.P_inf = P_inf            
             self.dP_inf = dP_inf
             self.dF = dF
@@ -1122,27 +1367,25 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.last_k_computed = False
             self.Ak = None
             self.Qk = None
-        
-        def recompute_for_new_k(self,k):
-            """
-            """
+            self.dAk = None
+            self.dQk = None
             
-            if self.last_k == k:
-                if (self.last_k_computed == False):        
-                    Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
-                        self.L,self.Qc,self.dt[k],self.compute_derivatives, 
-                        grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
-                    self.Ak = Ak
-                    self.Qk = Qk
-                    self.dAk = dAk
-                    self.dQk = dQk
-                    self.last_k_computed = True
-                else:
-                    Ak = self.Ak
-                    Qk = self.Qk
-                    dAk = self.dAk
-                    dQk = self.dQk
-            else:
+            # !!!Print statistics! Which object is created
+        def _recompute_for_new_k(self,k):
+            """
+            Computes the necessary matrices for an index k and store the results.
+            
+            Input:
+            ----------------------
+            k: int
+                Index in the time differences array dt where to compute matrices            
+            
+            Output:
+            ----------------------
+                Ak,Qk, dAk, dQk: matrices and/or 3D arrays
+                    A, Q, dA dQ on step k
+            """
+            if (self.last_k != k) or (self.last_k_computed == False):
                 Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
                         self.L,self.Qc,self.dt[k],self.compute_derivatives, 
                         grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
@@ -1153,11 +1396,22 @@ class ContDescrStateSpace(DescreteStateSpace):
                 self.Qk = Qk
                 self.dAk = dAk
                 self.dQk = dQk
+            else:
+                Ak = self.Ak
+                Qk = self.Qk
+                dAk = self.dAk
+                dQk = self.dQk
+            
+            # !!!Print statistics! Print sizes of matrices
+            
             return Ak,Qk, dAk, dQk 
         
         def reset(self, compute_derivatives):
             """
-            For reusing this object e.g. in smoother computation
+            For reusing this object e.g. in smoother computation. Actually,
+            this object can not be reused because it computes the matrices on
+            every iteration. But this method is written for keeping the same 
+            interface with the class AQcompute_batch.
             """
             
             self.last_k = 0
@@ -1167,38 +1421,57 @@ class ContDescrStateSpace(DescreteStateSpace):
             return self
             
         def f_A(self,k,m,P):
-            
-            Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
+            Ak,Qk, dAk, dQk = self._recompute_for_new_k(k)
             return Ak
             
         def f_Q(self,k):
-            
-            Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
-            
+            Ak,Qk, dAk, dQk = self._recompute_for_new_k(k)
             return Qk
             
         def f_dA(self, k):
-            
-            Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
-            
+            Ak,Qk, dAk, dQk = self._recompute_for_new_k(k)
             return dAk
         
         def f_dQ(self, k):
-            
-            Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
-            
+            Ak,Qk, dAk, dQk = self._recompute_for_new_k(k)
             return dQk
             
             
     class AQcompute_batch(object):
         """
+        Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
+        from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
+        equation. dt - time steps.       
+        
+        It has the same interface as AQcompute_once.
+        
+        It computes matrices for all time steps. This object is used when
+        there are not so many (controlled by internal variable) 
+        different time steps and storing all the matrices do not take too much memory.
+        
+        Since all the matrices are computed all together, this object can be used
+        in smoother without repeating the computations.        
         """
         def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
             """
-            Input:
+            Constructor. All necessary parameters are passed here and stored 
+            in the opject.            
             
+            Input:
+            -------------------
+                F, L, Qc, P_inf : matrices
+                    Parameters of corresponding continuous state model
                 dt: array
-                    Array of dt steps
+                    All time steps
+                compute_derivatives: bool
+                    Whether to calculate derivatives
+                    
+                dP_inf, dF, dQc: 3D array
+                    Derivatives if they are required
+            
+            Output:
+            -------------------
+            Nothing
             """
             As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
                         L,Qc,dt,compute_derivatives, 
@@ -1209,65 +1482,253 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.dAs = dAs
             self.dQs = dQs
             self.reconstruct_indices = reconstruct_indices
-        
+            self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
+                            (self.dAs.nbytes if (self.dAs is not None) else 0) +\
+                            (self.dQs.nbytes if (self.dQs is not None) else 0) +\
+                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
+             # !!!Print statistics! Which object is created
+            # !!!Print statistics! Print sizes of matrices
         def reset(self, compute_derivatives):
             """
-            For reusing this object e.g. in smoother computation
+            For reusing this object e.g. in smoother computation. It makes sence
+            because necessary matrices have been already computed for all
+            time steps.
             """
-            
             return self
             
-        def f_A(self,k,m,P): 
-            """
-            """
+        def f_A(self,k,m,P):
             return self.As[:,:, self.reconstruct_indices[k]]
             
         def f_Q(self,k):
-            """
-            """
             return self.Qs[:,:, self.reconstruct_indices[k]]
         
         def f_dA(self,k): 
-            """
-            """
             return self.dAs[:,:, :, self.reconstruct_indices[k]]
         
-        def f_dQ(self,k): 
-            """
-            """
+        def f_dQ(self,k):
             return self.dQs[:,:, :, self.reconstruct_indices[k]]
     
     @classmethod
-    def cont_discr_kalman_filter(cls,F,L,Qc,H,R,P_inf,X,Y,m_init=None,
-                      P_init=None, calc_log_likelihood=False, 
+    def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None, 
+                                 m_init=None, P_init=None, calc_log_likelihood=False, 
                       calc_grad_log_likelihood=False, grad_params_no=None, grad_calc_params=None):
         """
+        This function implements the continuous-discrete Kalman Filter algorithm
+        These notations for the State-Space model are assumed:
+            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
+            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
+            
+        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))            
+            
+        Current Features:
+        ----------------------------------------
+        1) The function generaly do not modify the passed parameters. If 
+        it happens then it is an error. There are several exeprions: scalars
+        can be modified into a matrix, in some rare cases shapes of 
+        the derivatives matrices may be changed, it is ignored for now.
         
+        2) Copies of F,L,Qc are created in memory because they may be used later
+        in smoother. References to copies are kept in "AQcomp" object
+        return parameter.
+        
+        3) Function support "multiple time series mode" which means that exactly
+        the same State-Space model is used to filter several sets of measurements.
+        In this case third dimension of Y should include these state-space measurements
+        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
+
+        4) Calculation of Grad_log_likelihood is not supported if matrices 
+        H, or R changes overf time (with index k). (later may be changed)
+
+        5) Measurement may include missing values. In this case update step is
+        not done for this measurement. (later may be changed)
+        
+        Input:            
+        -----------------
+        
+        F: (state_dim, state_dim) matrix
+            F in the model.
+            
+        L: (state_dim, noise_dim) matrix
+            L in the model.
+            
+        Qc: (noise_dim, noise_dim) matrix
+            Q_c in the model.
+            
+        p_H: scalar, matrix (measurement_dim, state_dim) , 3D array
+            H_{k} in the model. If matrix then H_{k} = H - constant.
+            If it is 3D array then H_{k} = p_Q[:,:, index[2,k]]
+        
+        p_R: scalar, square symmetric matrix, 3D array
+            R_{k} in the model. If matrix then R_{k} = R - constant.
+            If it is 3D array then R_{k} = p_R[:,:, index[3,k]]
+        
+        P_inf: (state_dim, state_dim) matrix
+            State varince matrix on infinity.
+        
+        X: 1D array
+            Time points of measurements. Needed for converting continuos
+            problem to the discrete one.
+            
+        Y: matrix or vector or 3D array
+            Data. If Y is matrix then samples are along 0-th dimension and
+            features along the 1-st. If 3D array then third dimension 
+            correspond to "multiple time series mode".
+            
+        index: vector
+            Which indices (on 3-rd dimension) from arrays p_H, p_R to use
+            on every time step. If this parameter is None then it is assumed 
+            that p_H, p_R do not change over time and indices are not needed.
+            index[0,:] - correspond to H, index[1,:] - correspond to R
+            If index.shape[0] == 1, it is assumed that indides for all matrices
+            are the same.
+            
+        m_init: vector or matrix
+            Initial distribution mean. If None it is assumed to be zero.
+            For "multiple time series mode" it is matrix, second dimension of
+            which correspond to different time series. In regular case ("one
+            time series mode") it is a vector.
+            
+        P_init: square symmetric matrix or scalar
+            Initial covariance of the states. If the parameter is scalar
+            then it is assumed that initial covariance matrix is unit matrix 
+            multiplied by this scalar. If None the unit matrix is used instead.
+            "multiple time series mode" does not affect it, since it does not
+            affect anything related to state variaces.
+            
+        calc_log_likelihood: boolean
+            Whether to calculate marginal likelihood of the state-space model.
+            
+        calc_grad_log_likelihood: boolean
+            Whether to calculate gradient of the marginal likelihood 
+            of the state-space model. If true then "grad_calc_params" parameter must 
+            provide the extra parameters for gradient calculation.
+            
+        grad_params_no: int
+            If previous parameter is true, then this parameters gives the 
+            total number of parameters in the gradient. 
+        
+        grad_calc_params: dictionary
+            Dictionary with derivatives of model matrices with respect 
+            to parameters "dF", "dL", "dQc", "dH", "dR", "dm_init", "dP_init".
+            They can be None, in this case zero matrices (no dependence on parameters)
+            is assumed. If there is only one parameter then third dimension is
+            automatically added.
+        
+        Output:
+        --------------
+        
+        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
+            Filter estimates of the state means. In the extra step the initial
+            value is included. In the "multiple time series mode" third dimension
+            correspond to different timeseries.
+            
+        P: (no_steps+1, state_dim, state_dim) 3D array
+            Filter estimates of the state covariances. In the extra step the initial
+            value is included.
+        
+        log_likelihood: double or (1, time_series_no) 3D array.
+        
+            If the parameter calc_log_likelihood was set to true, return
+            logarithm of marginal likelihood of the state-space model. If
+            the parameter was false, return None. In the "multiple time series mode" it is a vector
+            providing log_likelihood for each time series.
+        
+        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
+            If calc_grad_log_likelihood is true, return gradient of log likelihood
+            with respect to parameters. It returns it column wise, so in 
+            "multiple time series mode" gradients for each time series is in the 
+            corresponding column.
+            
+        AQcomp: object 
+            Contains some pre-computed values for converting continuos model into
+            discrete one. It can be used later in the smoothing pahse.
         """
         
-        # Time step lengths
+        p_H = np.atleast_1d(p_H)
+        p_R = np.atleast_1d(p_R)
+        
+        X.shape, old_X_shape  = cls._reshape_input_data(X.shape) # represent as column
+        if (X.shape[1] != 1):
+            raise ValueError("Only one dimensional X data is supported.")
+        
+        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape) # represent as column
         state_dim = F.shape[0]
         measurement_dim = Y.shape[1]
         
+        if len(Y.shape) == 2: 
+            time_series_no = 1 # regular case
+        elif len(Y.shape) == 3:
+            time_series_no = Y.shape[2] # multiple time series mode
+            
+        if  ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
+            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
+            model_matrices_chage_with_time = True
+        else:
+            model_matrices_chage_with_time = False
+        
+        # Check index
+        old_index_shape = None
+        if index is None:
+            if (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
+                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
+            else: # matrices do not change in time, so form dummy zero indices.
+                index = np.zeros((1,Y.shape[0]))
+        else:
+            if len(index.shape) == 1:
+                index.shape = (1,index.shape[0])
+                old_index_shape = (index.shape[0],)
+                
+            if (index.shape[1] != Y.shape[0]):   
+                raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}")
+                
+        if (index.shape[0] == 1):         
+            H_time_var_index = 0; R_time_var_index = 0 
+        elif (index.shape[0] == 4):
+            H_time_var_index = 0; R_time_var_index = 1
+        else:
+            raise ValueError("First Dimension of index must be either 1 or 2.")
+        
+        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
+        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
+        
         if m_init is None:
-            m_init = np.zeros((state_dim,1))
+            if (time_series_no == 1):
+                m_init = m_init = np.zeros((state_dim,1))
+            else:
+                # multiple time series mode.
+                m_init = np.zeros((state_dim, time_series_no))
+        else:
+            if (time_series_no == 1):
+                m_init = np.atleast_2d(m_init).T
         
         if P_init is None:
             P_init = P_inf.copy()
         
+        # Functions to pass to the kalman_filter algorithm:
+        # Parameters:
+        # k - number of Kalman filter iteration
+        # m - vector for calculating matrices. Required for EKF. Not used here.
+        f_h = lambda k,m,H: np.dot(H, m)
+        f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]]
+        f_R = lambda k: p_R[:,:, index[R_time_var_index, k]]
+        
         if calc_grad_log_likelihood:
             
             dF = cls._check_grad_state_matrices( grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') 
             dQc = cls._check_grad_state_matrices( grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ')
-            dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dQ')
+            dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA')
             
             dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
             dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
             
             dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter
             if dm_init is None:
-                dm_init = np.zeros( (state_dim,grad_params_no) )
-            
+                if time_series_no == 1:
+                    dm_init = np.zeros( (state_dim,grad_params_no) )
+                else:
+                     # multiple time series mode. Keep grad_params always as a last dimension
+                    dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) )
+                    
             dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter
             if dP_init is None:
                 dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function
@@ -1278,75 +1739,173 @@ class ContDescrStateSpace(DescreteStateSpace):
             dQc = None
             dH = None
             dR = None
-            
-            
-        # TODO: Defaults for m_init, P_init, dm_init, dP_init. !!!
+            dm_init = None
+            dP_init = None
+        
+        if print_verbose:
+            print("General: run Continuos-Discrete Kalman Filter")
         # Also for dH, dR and probably for all derivatives
-        (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=m_init,
-                      P_init=P_init, calc_log_likelihood=calc_log_likelihood, 
+        (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,F, L, Qc, P_inf,
+                        f_h, f_H, f_R, X, Y, m_init=m_init, P_init=P_init, calc_log_likelihood=calc_log_likelihood, 
                       calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, dP_inf=dP_inf, 
                       dF = dF, dQc=dQc, dH=dH, dR=dR, dm_init=dm_init, dP_init=dP_init)
-                      
-        return (M, P, log_likelihood, grad_log_likelihood,AQcomp)
+        
+        if old_index_shape is not None:
+            index.shape = old_index_shape
+            
+        if old_X_shape is not None:
+            X.shape = old_X_shape
+            
+        if old_Y_shape is not None:
+            Y.shape = old_Y_shape
+            
+        if old_H_shape is not None:
+            p_H.shape = old_H_shape
+            
+        if old_R_shape is not None:
+            p_R.shape = old_R_shape
+             
+        return (M, P, log_likelihood, grad_log_likelihood, AQcomp)
         
     @classmethod
-    def _cont_discr_kalman_filter_raw(cls,state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=None,
-                      P_init=None, calc_log_likelihood=False, 
+    def _cont_discr_kalman_filter_raw(cls,state_dim,F,L,Qc, P_inf, f_h, f_H, f_R, X, Y, 
+                                      m_init=None, P_init=None, calc_log_likelihood=False, 
                       calc_grad_log_likelihood=False, grad_params_no=None, dP_inf=None, 
                       dF = None, dQc=None, dH=None, dR=None, dm_init=None, dP_init=None):
         """
-        Kalman filter for continuos dynamic model.
+        General filtering algorithm for inference in the continuos-discrete 
+        state-space model:
+
+            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
+            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
         
-        Input:
-            F:
-                Dynamic model matrix
+        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))            
+        
+        Current Features:
+        ----------------------------------------
+        
+        1) Function support "multiple time series mode" which means that exactly
+        the same State-Space model is used to filter several sets of measurements.
+        In this case third dimension of Y should include these state-space measurements
+        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
+
+        2) Measurement may include missing values. In this case update step is
+        not done for this measurement. (later may be changed)
+        
+        Input:            
+        -----------------
+        state_dim: int
+            Demensionality of the states
             
-            X:
-                Time steps
+        F: (state_dim, state_dim) matrix
+            F in the model.
+            
+        L: (state_dim, noise_dim) matrix
+            L in the model.
+            
+        Qc: (noise_dim, noise_dim) matrix
+            Q_c in the model.
+            
+        P_inf: (state_dim, state_dim) matrix
+            State varince matrix on infinity.
+            
+        p_h: function (k, x_{k}, H_{k}). Measurement function.
+            k (iteration number),
+            x_{k}
+            H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
+        
+        f_H: function (k, m, P) return Jacobian of dynamic function, it is
+            passed into p_h.
+            k (iteration number),
+            m: point where Jacobian is evaluated,
+            P: parameter for Jacobian, usually covariance matrix.
+            
+         p_f_R: function (k). Returns noise matrix of measurement equation 
+            on iteration k.
+            k (iteration number).
+            
+        m_init: vector or matrix
+            Initial distribution mean. For "multiple time series mode" 
+            it is matrix, second dimension of which correspond to different
+            time series. In regular case ("one time series mode") it is a 
+            vector.
+            
+        P_init: matrix or scalar
+            Initial covariance of the states. Must be not None
+            "multiple time series mode" does not affect it, since it does not
+            affect anything related to state variaces.
+            
+        calc_log_likelihood: boolean
+            Whether to calculate marginal likelihood of the state-space model.
+            
+        calc_grad_log_likelihood: boolean
+            Whether to calculate gradient of the marginal likelihood 
+            of the state-space model. If true then the next parameter must 
+            provide the extra parameters for gradient calculation.
+        
+        grad_params_no: int
+            Number of gradient parameters
+            
+        dP_inf, dF, dQc, dH, dR, dm_init, dP_init: matrices or 3D arrays.
+            Necessary parameters for derivatives calculation.
+        
         """
 
-        steps_no = X.shape[0] # number of steps in the Kalman Filter
-        
+        steps_no = Y.shape[0] # number of steps in the Kalman Filter
+        if len(Y.shape) == 2: 
+            time_series_no = 1 # regular case
+        elif len(Y.shape) == 3:
+            time_series_no = Y.shape[2] # multiple time series mode
+            
         # Return object which computes A, Q and possibly derivatives on the way, pass the derivative matrices not functions
         AQcomp = cls._cont_to_discrete_object(X, F,L,Qc,compute_derivatives=calc_grad_log_likelihood, 
                                               grad_params_no=grad_params_no, 
-                                              P_inf=P_inf, dP_inf=dP_inf(0), 
-                                              dF = dF(0), dQc=dQc(0))
-        
-        # Functions required for Kalman filter
+                                              P_inf=P_inf, dP_inf=(dP_inf(0) if calc_grad_log_likelihood else None), 
+                                              dF = (dF(0) if calc_grad_log_likelihood else None), 
+                                              dQc=(dQc(0) if calc_grad_log_likelihood else None))
+                                              
+        # Functions required for discrete Kalman filter. Other requirements
+        # are:  fh, p_H, p_R - provided as parameters
+        # p_A, p_Q - provided by the object AQcomp.                                    
         f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
-        f_h = lambda k,m,H: np.dot(H, m) # measurement model
-        f_H = lambda k,m,P: H
-        f_R = lambda k: R
-        
+  
         # Allocate space for results
         # Mean estimations. Initial values will be included
-        M = np.empty((state_dim,(steps_no+1)))
+        if (time_series_no == 1): # one time series mode
+            M = np.empty(((steps_no+1),state_dim))
+            M[0,:] = np.squeeze(m_init) # Initialize mean values # ??? why here squeze and in discrete case no squeeze
+        else: # multiple time series mode
+            M = np.empty(((steps_no+1),state_dim,time_series_no))
+            M[0,:,:] = m_init # Initialize mean values
         # Variance estimations. Initial values will be included
-        P = np.empty((state_dim,state_dim,(steps_no+1)))
-
-        # Initialize
-        M[:,0] = m_init[:,0]
-        P[:,:,0] = P_init
+        P = np.empty(((steps_no+1),state_dim,state_dim))
+        P[0,:,:] = P_init # Initialize initial covariance matrix
         
-        log_likelihood = 0
-        grad_log_likelihood = np.zeros((grad_params_no,1))
+        #log_likelihood = 0
+        #grad_log_likelihood = np.zeros((grad_params_no,1))
+        log_likelihood = 0 if calc_log_likelihood else None
+        grad_log_likelihood = 0 if calc_grad_log_likelihood else None
+        
+        #setting initial values for derivatives update
+        dm_upd = dm_init
+        dP_upd = dP_init
         # Main loop of the Kalman filter
         for k in range(0,steps_no):
             # In this loop index for new estimations is (k+1), old - (k)
             # This happened because initial values are stored at 0-th index.                 
             
-            if k == 0: #setting initial values
-                dm_upd = dm_init
-                dP_upd = dP_init
+            if (time_series_no == 1): # single time series mode
+                k_measurment = Y[k,:].T # measurement as column
+            else: # multiple time series mode
+                k_measurment = Y[k,:,:]
                 
             m_pred, P_pred, dm_pred, dP_pred = \
-            cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q, 
+            cls._kalman_prediction_step(k, M[k,:] ,P[k,:,:], f_a, AQcomp.f_A, AQcomp.f_Q, 
                 calc_grad_log_likelihood=calc_grad_log_likelihood, 
                 p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = (AQcomp.f_dA, AQcomp.f_dQ) )
             
             m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-            cls._kalman_update_step(k,  m_pred , P_pred, f_h, f_H, f_R, Y, 
+            cls._kalman_update_step(k,  m_pred , P_pred, f_h, f_H, f_R, k_measurment, 
                         calc_log_likelihood=calc_log_likelihood, 
                         calc_grad_log_likelihood=calc_grad_log_likelihood, 
                         p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))                
@@ -1357,12 +1916,15 @@ class ContDescrStateSpace(DescreteStateSpace):
             if calc_grad_log_likelihood:
                 grad_log_likelihood += d_log_likelihood_update
             
-            M[:,k+1] = m_upd
-            P[:,:,k+1] = P_upd
-        # Return values, get rid of initial values
-        #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
-        # Return values, get rid of initial values
-        #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
+            if (time_series_no == 1):
+                M[k+1,:] = np.squeeze(m_upd)
+            else:
+                M[k+1,:,:] = m_upd # separate mean value for each time series
+                
+            P[k+1,:,:] = P_upd
+            #print("kf it: %i" % k)
+            # !!!Print statistics! Print sizes of matrices
+            # !!!Print statistics! Print iteration time base on another boolean variable
         return (M, P, log_likelihood, grad_log_likelihood, AQcomp.reset(False))
     
     @classmethod  
@@ -1370,12 +1932,13 @@ class ContDescrStateSpace(DescreteStateSpace):
                                 AQcomp=None, X=None, F=None,L=None,Qc=None):
         """
         
-        UPDATE Description!
+        Continuos-discrete Rauch–Tung–Striebel(RTS) smoother.
         
         This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
-        based on the results of kalman_filter_raw.
-        These notations are the same:
-            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
+        based on the results of _cont_discr_kalman_filter_raw.
+        
+        Model:
+            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
             y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
             
         Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k))            
@@ -1383,73 +1946,119 @@ class ContDescrStateSpace(DescreteStateSpace):
         Input:
         --------------
         
-        filter_means:
-            Include initial values
-            
-        filter_covars
-            Include initial values
+        filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 
+            Results of the Kalman Filter means estimation.
             
+        filter_covars: (no_steps+1, state_dim, state_dim) 3D array
+            Results of the Kalman Filter covariance estimation.
+        
+        AQcomp: object or None
+            Object form the filter phase which provides functions for computing
+            A, Q, dA, dQ fro  discrete model from the continuos model.
+        
+         X, F, L, Qc: matrices
+             If AQcomp thiese matrices are used to create this object from scratch.
+        
         Output:
         -------------
         
-        M: (state_dim, no_steps) matrix 
+        M: (no_steps+1,state_dim) matrix 
             Smoothed estimates of the state means
             
-        P: (state_dim, state_dim, no_steps) 3D array
+        P: (no_steps+1,state_dim, state_dim) 3D array
             Smoothed estimates of the state covariances
         """
-        
+    
         if AQcomp is None: # make this object from scratch
             AQcomp = cls._cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False, 
                                                   grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None)
         
         f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
         
-        no_steps = filter_covars.shape[-1] # number 
+        no_steps = filter_covars.shape[0] # number 
       
         M = np.empty(filter_means.shape) # smoothed means
         P = np.empty(filter_covars.shape) # smoothed covars
         
-        M[:,-1] = filter_means[:,-1]
-        P[:,:,-1] = filter_covars[:,:,-1]
+        if print_verbose:
+            print("General: run Continuos-Discrete Kalman Smoother")
+            
+        M[-1,:] = filter_means[-1,:]
+        P[-1,:,:] = filter_covars[-1,:,:]
         for k in range(no_steps-2,-1,-1): 
             
             m_pred, P_pred, tmp1, tmp2 = \
-                    cls._kalman_prediction_step(k, filter_means[:,k], 
-                                                filter_covars[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q, 
+                    cls._kalman_prediction_step(k, filter_means[k,:], 
+                                                filter_covars[k,:,:], f_a, AQcomp.f_A, AQcomp.f_Q, 
                                                 calc_grad_log_likelihood=False) 
             
             m_upd, P_upd = cls._rts_smoother_update_step(k, 
-                            filter_means[:,k] ,filter_covars[:,:,k], 
-                            m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], AQcomp.f_A)
+                            filter_means[k,:] ,filter_covars[k,:,:], 
+                            m_pred, P_pred, M[(k+1),:] ,P[(k+1),:,:], AQcomp.f_A)
                       
-            M[:,k] = m_upd
-            P[:,:,k] = P_upd
+            M[k,:] = np.squeeze(m_upd)
+            P[k,:,:] = P_upd
         # Return values
             
         return (M, P)
     
     @classmethod
-    def _cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False,grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
+    def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
+                                 grad_params_no=None, 
+                                 P_inf=None, dP_inf=None, dF = None, dQc=None):
         """
-        Function return the object which can be used in Kalman filter and/or
-        smoother to obtain matrices A and Q as well as necessary derivatives(clarify).
+        Function return the object which is used in Kalman filter and/or
+        smoother to obtain matrices A, Q and their derivatives for discrete model
+        from the continuous model.
+        
+        There are 2 objects AQcompute_once and AQcompute_batch and the function 
+        returs the appropriate one based on the number of different time steps.
+        
+        Input:
+        ----------------------
+        X, F, L, Qc: matrices
+            Continuous model matrices
+        
+        compute_derivatives: boolean
+            Whether to compute derivatives
+            
+        grad_params_no: int
+            Number of parameters in the gradient
+        
+        P_inf, dP_inf, dF, dQ: matrices and 3D objects
+            Data necessary to compute derivatives.
+        
+        Output:
+        --------------------------
+        AQcomp: object
+            Its methods return matrices (and optionally derivatives) for the
+            discrete state-space model.
         
         """        
         
-        unique_round_decimals = 8
+        unique_round_decimals = 10
+        threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
         dt = np.empty((X.shape[0],))
         dt[1:] = np.diff(X[:,0],axis=0)
         dt[0]  = 0#dt[1]
         unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
-    
-        if len(unique_indices) > 20:        
+        number_unique_indices = len(unique_indices)
+        
+        if number_unique_indices > threshold_number_of_unique_time_steps:        
             AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives,
                                     grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
+            if print_verbose:
+                print("CDO:  Continue-to-discrete INSTANTANEOUS object is created.")
+                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )                                    
+            
         else:
             AQcomp = cls.AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives,
-                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) 
-    
+                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
+            if print_verbose:                        
+                print("CDO:  Continue-to-discrete BATCH object is created.")
+                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )
+                print("CDO:  Total size if its data: %i" % (AQcomp.total_size_of_data,) )                           
+                                       
         return AQcomp
         
     @staticmethod
@@ -1484,26 +2093,24 @@ class ContDescrStateSpace(DescreteStateSpace):
             If dt is iterable, then A and Q_noise are computed for every
             unique dt            
         
-        compute_derivatives
+        compute_derivatives: boolean
+            Whether derivatives of A and Q are required.
         
-        param_num: int
-            Number of parameters
+        grad_params_no: int
+            Number of gradient parameters
         
-        P_inf
+        P_inf: (state_dim. state_dim) matrix
         
         dP_inf
         
-        dF
-        
-        dQc
-        
-        dR
-        
-        dm_prev: 
-            Mean derivatives from the previos step
-        dP_prev:
-            Variance derivatives from the previous step
+        dF: 3D array
+            Derivatives of F
         
+        dQc: 3D array
+            Derivatives of Qc
+            
+        dR: 3D array
+            Derivatives of R
         
         Output:
         --------------
@@ -1522,16 +2129,20 @@ class ContDescrStateSpace(DescreteStateSpace):
             This reconstruct_index contain indices of the original dt's 
             in the uninue dt sequence. A[:,:, reconstruct_index[5]]
             is matrix A of 6-th(indices start from zero) dt in the original 
-            sequence. 
+            sequence.
+        dA: 3D array
+            Derivatives of A
+        
+        dQ: 3D array
+            Derivatives of Q
         """         
-         
         # Dimensionality
         n = F.shape[0]
         
         if not isinstance(dt, collections.Iterable): # not iterable, scalar
  
             # The dynamical model
-            A  = linalg.expm(F*dt) 
+            A  = matrix_exponent(F*dt) 
             if np.any( np.isnan(A)):
                 A  = linalg.expm3(F*dt) 
             
@@ -1540,9 +2151,7 @@ class ContDescrStateSpace(DescreteStateSpace):
             Phi[:n,:n] = F
             Phi[:n,n:] = L.dot(Qc).dot(L.T)
             Phi[n:,n:] = -F.T
-            AB = linalg.expm(Phi*dt)
-            if np.any(np.isnan(AB)):
-                AB = linalg.expm3(Phi*dt)
+            AB = matrix_exponent(Phi*dt)
             AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
             
             Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
@@ -1563,7 +2172,7 @@ class ContDescrStateSpace(DescreteStateSpace):
                     FF[n:,n:] = F
 
                     # Solve the matrix exponential
-                    AA[:,:,p] = linalg.expm(FF*dt)
+                    AA[:,:,p] = matrix_exponent(FF*dt)
 
                     # Solve the differential equation
                     #foo         = AA[:,:,p].dot(np.vstack([m, dm[:,p]]))
@@ -1582,15 +2191,12 @@ class ContDescrStateSpace(DescreteStateSpace):
                     dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
                        - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
     
-                    # The derivatives of P
-                    #dP[:,:,p] = dA.dot(P).dot(A.T) + A.dot(dP_prev[:,:,p]).dot(A.T) \
-                    #   + A.dot(P).dot(dA.T) + dQ
-                    #Q_noise = Q_noise_2
             else: 
               dA = None
               dQ = None
               Q_noise = Q_noise_1
-
+              
+            #Q_noise = Q_noise_1
 
             # Return
             return A, Q_noise,None, dA, dQ
@@ -1608,11 +2214,169 @@ class ContDescrStateSpace(DescreteStateSpace):
             if compute_derivatives:
                 dA = np.empty((n,n,grad_params_no,dt_unique.shape[0]))                 
                 dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) 
-                
+            else:
+                dA = None
+                dQ = None
             # Call this function for each unique dt
             for j in range(0,dt_unique.shape[0]):
-                A[:,:,j], Q_noise[:,:,j], tmp1, dA[:,:,:,j], dQ[:,:,:,j] = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j],
+                A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j],
                     compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
+                if compute_derivatives:
+                    dA[:,:,:,j] = dA_t     
+                    dQ[:,:,:,j] = dQ_t
 
             # Return
-            return A, Q_noise, reconstruct_index, dA, dQ
\ No newline at end of file
+            return A, Q_noise, reconstruct_index, dA, dQ
+
+def matrix_exponent(M):
+    """
+    The function computes matrix exponent and handles some special cases
+    """
+
+    if (M.shape[0] == 1): # 1*1 matrix
+        Mexp = np.array( ((np.exp(M[0,0]) ,),) )  
+    
+    else: # matrix is larger
+        Mexp = linalg.expm(M)      
+        if np.any(np.isnan(Mexp)):
+            Mexp = linalg.expm3(M)               
+            
+    return Mexp
+
+def balance_matrix(A):
+    """
+    Balance matrix, i.e. finds such similarity transformation of the original
+    matrix A:  A = T * bA * T^{-1}, where norms of columns of bA and of rows of bA
+    are as close as possible. It is usually used as a preprocessing step in
+    eigenvalue calculation routine. It is useful also for State-Space models.
+    
+    See also:
+        [1] Beresford N. Parlett and Christian Reinsch (1969). Balancing 
+            a matrix for calculation of eigenvalues and eigenvectors. 
+            Numerische Mathematik, 13(4): 293-304.
+    
+    Input:
+    ----------------------
+    A: square matrix
+        Matrix to be balanced
+    
+    Output:
+    ----------------
+        bA: matrix
+            Balanced matrix
+            
+        T: matrix
+            Left part of the similarity transformation
+            
+        T_inv: matrix 
+            Right part of the similarity transformation.
+    """
+    
+    if len(A.shape) != 2 or (A.shape[0] != A.shape[1]):
+        raise ValueError('balance_matrix: Expecting square matrix')    
+    
+    N = A.shape[0] # matrix size    
+    
+    gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,))
+    bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False)
+    if info < 0:
+        raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info)
+    #import pdb; pdb.set_trace()                                                
+    # calculating the similarity transforamtion:
+    def perm_matr(D, c1,c2):
+        """
+        Function creates the permutation matrix which swaps columns c1 and c2.
+        
+        Input:
+        --------------
+        D: int
+            Size of the permutation matrix
+        c1: int
+            Column 1. Numeration starts from 1...D
+        c2: int
+            Column 2. Numeration starts from 1...D
+        """
+        i1 = c1-1; i2 = c2-1 # indices
+        P = np.eye(D); 
+        P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements                    
+        P[i1,i2] = 1.0; P[i2,i1] = 1.0
+
+        return P
+                                
+    P = np.eye(N) # permutation matrix                                                
+    if (hi != N-1): # there are row permutations
+        for k in range(N-1,hi,-1):                                                     
+            new_perm = perm_matr(N, k+1, pivscale[k])                                            
+            P = np.dot(P,new_perm)
+    if (lo != 0):
+        for k in range(0,lo,1):
+            new_perm = perm_matr(N, k+1, pivscale[k])
+            P = np.dot(P,new_perm)
+    D = pivscale.copy()
+    D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one.
+    #D = np.diag(D) # make a diagonal matrix
+    
+    T = np.dot(P,np.diag(D)) # similarity transformation in question
+    T_inv = np.dot(np.diag(D**(-1)),P.T)
+    
+    #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) )
+    return bA.copy(), T, T_inv
+    
+def balance_ss_model(F,L,Qc,H,Pinf,dF=None,dQc=None,dPinf=None):
+    """
+    Balances State-Space model for more numerical stability
+
+    This is based on the following:
+   
+     dx/dt = F x + L w
+         y = H x
+  
+     Let T z = x, which gives
+   
+     dz/dt = inv(T) F T z + inv(T) L w
+         y = H T z    
+    """
+    
+    bF,T,T_inv = balance_matrix(F)
+    
+    bL = np.dot( T_inv, L)
+    bQc = Qc # not affected
+    
+    bH = np.dot(H, T)
+    
+    bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T))
+    
+    #import pdb; pdb.set_trace() 
+#    LL,islower = linalg.cho_factor(Pinf)
+#    inds = np.triu_indices(Pinf.shape[0],k=1)
+#    LL[inds] = 0.0
+#    bLL = np.dot(T_inv, LL)
+#    bPinf = np.dot( bLL, bLL.T)
+
+    
+    if dF is not None:
+        bdF = np.zeros(dF.shape)
+        for i in range(dF.shape[2]):
+            bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T))
+            
+    else:
+        bdF = None
+    
+    if dPinf is not None:
+        bdPinf = np.zeros(dPinf.shape)
+        for i in range(dPinf.shape[2]):
+            bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T))
+            
+#            LL,islower = linalg.cho_factor(dPinf[:,:,i])
+#            inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1)
+#            LL[inds] = 0.0
+#            bLL = np.dot(T_inv, LL)
+#            bdPinf[:,:,i] = np.dot( bLL, bLL.T)
+            
+            
+    else:
+        bdPinf = None
+        
+    bdQc = dQc # not affected
+    
+    return bF, bL, bQc, bH, bPinf, bdF, bdQc, bdPinf,T
\ No newline at end of file
diff --git a/GPy/models/state_space_new.py b/GPy/models/state_space_new.py
index d3d17e66..ca77f6b9 100644
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from scipy import linalg
+from scipy import stats
 from ..core import Model
 from .. import kern
 from GPy.plotting.matplot_dep.models_plots import gpplot
@@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param
 
 import GPy
 from .. import likelihoods
-import GPy.models.state_space_main as ssm
-#import state_space_main as ssm
-reload(ssm)
-print ssm.__file__
+
+from . import state_space_main as ssm
 
 class StateSpace(Model):
     def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
         super(StateSpace, self).__init__(name=name)
         self.num_data, input_dim = X.shape
         assert input_dim==1, "State space methods for time only"
-        num_data_Y, self.output_dim = Y.shape
+        if len(Y.shape) ==2: # TODO make this nice
+            num_data_Y, self.output_dim = Y.shape
+        elif len(Y.shape) ==3:
+            num_data_Y, self.output_dim, ts_number = Y.shape
         assert num_data_Y == self.num_data, "X and Y data don't match"
         assert self.output_dim == 1, "State space methods for single outputs only"
 
@@ -68,8 +70,8 @@ class StateSpace(Model):
         """
         
         # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
-
+        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
+        
         # necessary parameters
         measurement_dim = self.output_dim
         grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@@ -78,17 +80,19 @@ class StateSpace(Model):
         dF    = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
         dQc   = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
         dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
-
+        dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
+        
         # Assign the values for the kernel function
         dF[:,:,:-1] = dFt
         dQc[:,:,:-1] = dQct
         dP_inf[:,:,:-1] = dP_inft
-
+        dP0[:,:,:-1] = dP0t
+        
         # The sigma2 derivative
         dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
         dR[:,:,-1] = np.eye(measurement_dim)
 
-
+        #(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
         # Use the Kalman filter to evaluate the likelihood
         
         grad_calc_params = {}
@@ -96,26 +100,53 @@ class StateSpace(Model):
         grad_calc_params['dF'] = dF
         grad_calc_params['dQc'] = dQc
         grad_calc_params['dR'] = dR
+        grad_calc_params['dP_init'] = dP0
         
         (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=True, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
+                                      float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=True, 
                                       calc_grad_log_likelihood=True, 
                                       grad_params_no=grad_params_no, 
                                       grad_calc_params=grad_calc_params)
-                      
-        self._log_marginal_likelihood = log_likelihood
-        #gradients  = self.compute_gradients()
-        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
         
-        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
+        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
+        grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
+        self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
+        self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
+        
+        self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
         
     def log_likelihood(self):
         return self._log_marginal_likelihood
 
-    def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
+    def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
         """
-        Inner function. It is called only from inside this class
+        Performs the actual prediction for new X points.
+        Inner function. It is called only from inside this class.
+        
+        Input:
+        ---------------------
+        
+        Xnews: vector or (n_points,1) matrix
+            New time points where to evaluate predictions.
+            
+        Ynews: (n_train_points, ts_no) matrix
+            This matrix can substitude the original training points (in order 
+            to use only the parameters of the model).
+            
+        filteronly: bool
+            Use only Kalman Filter for prediction. In this case the output does
+            not coincide with corresponding Gaussian process.
+            
+        Output:
+        --------------------
+        
+        m: vector
+            Mean prediction
+        
+        V: vector
+            Variance in every point
         """
         
         # Set defaults
@@ -128,41 +159,44 @@ class StateSpace(Model):
 
         # Sort the matrix (save the order)
         _, return_index, return_inverse = np.unique(X,True,True)
-        X = X[return_index]
+        X = X[return_index] # TODO they are not used
         Y = Y[return_index]
 
         # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
         state_dim = F.shape[0]        
         
+        #import pdb; pdb.set_trace()
+        #Y = self.Y[:, 0,0]
         # Run the Kalman filter
-        (M, P, tmp_log_likelihood, 
-         tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=False, 
-                                      calc_grad_log_likelihood=False) 
-                                      
+        #import pdb; pdb.set_trace()
+        (M, P, log_likelihood, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
+                                      F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=False, 
+                                      calc_grad_log_likelihood=False)                              
         # Run the Rauch-Tung-Striebel smoother
         if not filteronly:
             (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, 
                                 AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
         
         # remove initial values        
-        M = M[:,1:]
-        P = P[:,:,1:]        
+        M = M[1:,:]
+        P = P[1:,:,:]        
         
         # Put the data back in the original order
-        M = M[:,return_inverse]
-        P = P[:,:,return_inverse]
+        M = M[return_inverse,:]
+        P = P[return_inverse,:,:]
 
         # Only return the values for Xnew
-        M = M[:,self.num_data:]
-        P = P[:,:,self.num_data:]
+        M = M[self.num_data:,:]
+        P = P[self.num_data:,:,:]
 
         # Calculate the mean and variance
-        m = H.dot(M).T
-        V = np.tensordot(H[0],P,(0,0))
-        V = np.tensordot(V,H[0],(0,0))
-        V = V[:,None]
+        m = np.dot(M,H.T)
+        V = np.einsum('ij,ajk,kl', H, P, H.T)
+        
+        V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
 
         # Return the posterior of the state
         return (m, V)
@@ -170,10 +204,10 @@ class StateSpace(Model):
     def predict(self, Xnew, filteronly=False):
 
         # Run the Kalman filter to get the state
-        (m, V) = self._predict_raw(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
 
         # Add the noise variance to the state variance
-        V += self.sigma2
+        V += float(self.Gaussian_noise.variance)
 
         # Lower and upper bounds
         lower = m - 2*np.sqrt(V)
@@ -181,143 +215,149 @@ class StateSpace(Model):
 
         # Return mean and variance
         return (m, V, lower, upper)
-
-    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
-            ax=None, resolution=None, plot_raw=False, plot_filter=False,
-            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
-
-        # Deal with optional parameters
-        if ax is None:
-            fig = pb.figure(num=fignum)
-            ax = fig.add_subplot(111)
-
-        # Define the frame on which to plot
-        resolution = resolution or 200
-        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-
-        # Make a prediction on the frame and plot it
-        if plot_raw:
-            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
-            lower = m - 2*np.sqrt(v)
-            upper = m + 2*np.sqrt(v)
-            Y = self.Y
-        else:
-            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
-            Y = self.Y
-
-        # Plot the values
-        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
-        ax.plot(self.X, self.Y, 'kx', mew=1.5)
-
-        # Optionally plot some samples
-        if samples:
-            if plot_raw:
-                Ysim = self.posterior_samples_f(Xgrid, samples)
-            else:
-                Ysim = self.posterior_samples(Xgrid, samples)
-            for yi in Ysim.T:
-                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
-
-        # Set the limits of the plot to some sensible values
-        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
-        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-        ax.set_xlim(xmin, xmax)
-        ax.set_ylim(ymin, ymax)
-
-    def prior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Allocate space for results
-        Y = np.empty((size,X.shape[0]))
-
-        # Simulate random draws
-        #for j in range(0,size):
-        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
-        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
-
-        # Only observations
-        Y = np.tensordot(H[0],Y,(0,0))
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return trajectory
-        return Y.T
-
-    def posterior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Run smoother on original data
-        (m,V) = self.predict_raw(X)
-
-        # Simulate random draws from the GP prior
-        y = self.prior_samples_f(np.vstack((self.X, X)),size)
-
-        # Allocate space for sample trajectories
-        Y = np.empty((size,X.shape[0]))
-
-        # Run the RTS smoother on each of these values
-        for j in range(0,size):
-            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
-            (m2,V2) = self.predict_raw(X,Ynew=yobs)
-            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return posterior sample trajectories
-        return Y.T
-
-    def posterior_samples(self, X, size=10):
-
-        # Make samples of f
-        Y = self.posterior_samples_f(X,size)
-
-        # Add noise
-        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
-
-        # Return trajectory
-        return Y
+        
+    def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
+        mu, var = self._raw_predict(Xnew)
+        #import pdb; pdb.set_trace()
+        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
         
         
-    def simulate(self,F,L,Qc,Pinf,X,size=1):
-        # Simulate a trajectory using the state space model
-
-        # Allocate space for results
-        f = np.zeros((F.shape[0],size,X.shape[1]))
-
-        # Initial state
-        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
-
-        # Time step lengths
-        dt = np.empty(X.shape)
-        dt[:,0] = X[:,1]-X[:,0]
-        dt[:,1:] = np.diff(X)
-
-        # Solve the LTI SDE for these time steps
-        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
-
-        # Sweep through remaining time points
-        for k in range(1,X.shape[1]):
-
-            # Form discrete-time model
-            A = As[:,:,index[1-k]]
-            Q = Qs[:,:,index[1-k]]
-
-            # Draw the state
-            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
-
-        # Return values
-        return f
+#    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
+#            ax=None, resolution=None, plot_raw=False, plot_filter=False,
+#            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
+#
+#        # Deal with optional parameters
+#        if ax is None:
+#            fig = pb.figure(num=fignum)
+#            ax = fig.add_subplot(111)
+#
+#        # Define the frame on which to plot
+#        resolution = resolution or 200
+#        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+#
+#        # Make a prediction on the frame and plot it
+#        if plot_raw:
+#            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
+#            lower = m - 2*np.sqrt(v)
+#            upper = m + 2*np.sqrt(v)
+#            Y = self.Y
+#        else:
+#            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
+#            Y = self.Y
+#
+#        # Plot the values
+#        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
+#        ax.plot(self.X, self.Y, 'kx', mew=1.5)
+#
+#        # Optionally plot some samples
+#        if samples:
+#            if plot_raw:
+#                Ysim = self.posterior_samples_f(Xgrid, samples)
+#            else:
+#                Ysim = self.posterior_samples(Xgrid, samples)
+#            for yi in Ysim.T:
+#                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
+#
+#        # Set the limits of the plot to some sensible values
+#        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
+#        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+#        ax.set_xlim(xmin, xmax)
+#        ax.set_ylim(ymin, ymax)
+#
+#    def prior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Allocate space for results
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Simulate random draws
+#        #for j in range(0,size):
+#        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
+#        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
+#
+#        # Only observations
+#        Y = np.tensordot(H[0],Y,(0,0))
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return trajectory
+#        return Y.T
+#
+#    def posterior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Run smoother on original data
+#        (m,V) = self.predict_raw(X)
+#
+#        # Simulate random draws from the GP prior
+#        y = self.prior_samples_f(np.vstack((self.X, X)),size)
+#
+#        # Allocate space for sample trajectories
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Run the RTS smoother on each of these values
+#        for j in range(0,size):
+#            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
+#            (m2,V2) = self.predict_raw(X,Ynew=yobs)
+#            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return posterior sample trajectories
+#        return Y.T
+#
+#    def posterior_samples(self, X, size=10):
+#
+#        # Make samples of f
+#        Y = self.posterior_samples_f(X,size)
+#
+#        # Add noise
+#        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
+#
+#        # Return trajectory
+#        return Y
+#        
+#        
+#    def simulate(self,F,L,Qc,Pinf,X,size=1):
+#        # Simulate a trajectory using the state space model
+#
+#        # Allocate space for results
+#        f = np.zeros((F.shape[0],size,X.shape[1]))
+#
+#        # Initial state
+#        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
+#
+#        # Time step lengths
+#        dt = np.empty(X.shape)
+#        dt[:,0] = X[:,1]-X[:,0]
+#        dt[:,1:] = np.diff(X)
+#
+#        # Solve the LTI SDE for these time steps
+#        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
+#
+#        # Sweep through remaining time points
+#        for k in range(1,X.shape[1]):
+#
+#            # Form discrete-time model
+#            A = As[:,:,index[1-k]]
+#            Q = Qs[:,:,index[1-k]]
+#
+#            # Draw the state
+#            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
+#
+#        # Return values
+#        return f