STATE-SPACE: Recent modifications to state-space inference, including bug fixes in state-space kernels.

2026-05-10 20:42:39 +02:00 · 2017-04-06 11:59:13 +03:00 · 2017-04-06 11:59:13 +03:00 · dfe32266b6
commit dfe32266b6
parent 5ce5c5d8ce
6 changed files with 533 additions and 91 deletions
--- a/GPy/kern/src/sde_standard_periodic.py
+++ b/GPy/kern/src/sde_standard_periodic.py
@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic

 import numpy as np
 import scipy as sp
+import warnings

 from scipy import special as special

@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic):
       \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }

    """
+    # TODO: write comment to the constructor arguments
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        StdPeriodic kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 7)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        #import pdb; pdb.set_trace()
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 7
+        
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = False
+        
+        super(sde_StdPeriodic, self).__init__(*args, **kwargs)
+        
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
@ -38,40 +71,47 @@ class sde_StdPeriodic(StdPeriodic):
        
    def sde(self): 
        """ 
-        Return the state space representation of the covariance.
+        Return the state space representation of the standard periodic covariance.
        
        
-        ! Note: one must constrain lengthscale not to drop below 0.25.
-        After this bessel functions of the first kind grows to very high.
+        ! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order)
+        After this Bessel functions of the first becomes NaN. Rescaling
+        time variable might help.
        
-        ! Note: one must keep wevelength also not very low. Because then
+        ! Note: one must keep period also not very low. Because then
        the gradients wrt wavelength become ustable. 
        However this might depend on the data. For test example with
        300 data points the low limit is 0.15. 
        """ 
        
+        #import pdb; pdb.set_trace()
        # Params to use: (in that order)
        #self.variance
        #self.period
        #self.lengthscale
-        N = 7 # approximation order        
+        if self.approx_order is not None:
+            N = int(self.approx_order)
+        else:
+            N = 7 # approximation order        
        
+        p_period = float(self.period)        
+        p_lengthscale = 2*float(self.lengthscale)
+        p_variance = float(self.variance)        
        
-        w0 = 2*np.pi/self.period # frequency
-        lengthscale = 2*self.lengthscale         
+        w0 = 2*np.pi/p_period # frequency
+        # lengthscale is multiplied by 2 because of different definition of lengthscale
        
-        [q2,dq2l] = seriescoeff(N,lengthscale,self.variance)        
-        # lengthscale is multiplied by 2 because of slightly different
-        # formula for periodic covariance function.
-        # For the same reason:
+        [q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance)        
        
-        dq2l = 2*dq2l
+        dq2l = 2*dq2l  # This is because the lengthscale if multiplied by 2.
        
-        if np.any( np.isfinite(q2) == False):
-            raise ValueError("SDE periodic covariance error 1")
+        eps = 1e-12
+        if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") )
+                                
+        if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") )
                 
-        if np.any( np.isfinite(dq2l) == False):
-            raise ValueError("SDE periodic covariance error 2")
                 
        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
        L    = np.eye(2*(N+1))
@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic):
        # Derivatives wrt self.variance
        dF[:,:,0] = np.zeros(F.shape)
        dQc[:,:,0] = np.zeros(Qc.shape)
-        dP_inf[:,:,0] = P_inf / self.variance
+        dP_inf[:,:,0] = P_inf / p_variance

        # Derivatives self.period
-        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.period );
+        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / p_period );
        dQc[:,:,1] = np.zeros(Qc.shape)
        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
        
@ -101,6 +141,11 @@ class sde_StdPeriodic(StdPeriodic):
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
        dP0 = dP_inf.copy()
        
+        if self.balance:
+            # Benefits of this are not very sound.
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 )
+            
        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
        
        
@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
        if np.any( np.isfinite(coeffs) == False):
            raise ValueError("sde_standard_periodic: Coefficients are not finite!")
-            #import pdb; pdb.set_trace()
+        #import pdb; pdb.set_trace()
        coeffs[0] = 0.5*coeffs[0]
-        
+        #print(coeffs)
        # Derivatives wrt (lengthScale)
        coeffs_dl = np.zeros(m+1)
        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
        

-    return coeffs, coeffs_dl
+    return coeffs.squeeze(), coeffs_dl.squeeze()
--- a/GPy/kern/src/sde_stationary.py
+++ b/GPy/kern/src/sde_stationary.py
@ -11,6 +11,7 @@ from .stationary import RatQuad

 import numpy as np
 import scipy as sp
+import warnings

 class sde_RBF(RBF):
    """
@ -25,6 +26,37 @@ class sde_RBF(RBF):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }

    """
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        RBF kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 10)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = True
+        
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 6
+        
+        
+        
+        super(sde_RBF, self).__init__(*args, **kwargs)
+        
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
@ -37,23 +69,43 @@ class sde_RBF(RBF):
    def sde(self):
        """
        Return the state space representation of the covariance.
-        """
        
-        N = 10# approximation order ( number of terms in exponent series expansion)
+        Note! For Sparse GP inference too small or two high values of lengthscale
+        lead to instabilities. This is because Qc are too high or too low
+        and P_inf are not full rank. This effect depends on approximatio order.
+        For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted.
+        N=6: (0.06,31)
+        Variance should be within reasonable bounds as well, but its dependence is linear.
+        
+        The above facts do not take into accout regularization.
+        """
+        #import pdb; pdb.set_trace()
+        if self.approx_order is not None:
+            N = self.approx_order
+        else:
+            N = 10# approximation order ( number of terms in exponent series expansion)
+            
        roots_rounding_decimals = 6

        fn = np.math.factorial(N)

-        kappa = 1.0/2.0/self.lengthscale**2
+        p_lengthscale = float( self.lengthscale )
+        p_variance = float(self.variance)
+        kappa = 1.0/2.0/p_lengthscale**2

-        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+        Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) )
        
-        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        eps = 1e-12
+        if (float(Qc) > 1.0/eps) or (float(Qc) < eps):
+            warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small. 
+                                It influece conditioning of P_inf: {0:e}""".format(float(Qc)) )
+
+        pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower

        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
-            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
+            pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
            
-        pp = sp.poly1d(pp)
+        pp = sp.poly1d(pp1)
        roots = sp.roots(pp)

        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
@ -69,6 +121,7 @@ class sde_RBF(RBF):
        H[0,0] = 1

        # Infinite covariance:
+        #import pdb; pdb.set_trace()
        Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
        Pinf = 0.5*(Pinf + Pinf.T)
        # Allocating space for derivatives
@ -79,17 +132,17 @@ class sde_RBF(RBF):
        # Derivatives:
        dFvariance = np.zeros(F.shape)
        dFlengthscale = np.zeros(F.shape)
-        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1)

-        dQcvariance = Qc/self.variance
-        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
+        dQcvariance = Qc/p_variance
+        dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),))
        
-        dPinf_variance = Pinf/self.variance
+        dPinf_variance = Pinf/p_variance

        lp = Pinf.shape[0]
        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
        coeff[np.mod(coeff,2) != 0] = 0
-        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff

        dF[:,:,0]    = dFvariance
        dF[:,:,1]    = dFlengthscale
@ -101,10 +154,11 @@ class sde_RBF(RBF):
        P0 = Pinf.copy()
        dP0 = dPinf.copy()

-        # Benefits of this are not very sound. Helps only in one case:
-        # SVD Kalman + RBF kernel
-        import GPy.models.state_space_main as ssm
-        (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
+        if self.balance:
+            # Benefits of this are not very sound. Helps only in one case:
+            # SVD Kalman + RBF kernel
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )

        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

--- a/GPy/models/state_space_cython.pyx
+++ b/GPy/models/state_space_cython.pyx
@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
                            
            self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
            self.last_k = 0
             # !!!Print statistics! Which object is created
            # !!!Print statistics! Print sizes of matrices
@ -478,18 +480,53 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
            cdef np.ndarray[DTYPE_t, ndim=1] S           
            cdef np.ndarray[DTYPE_t, ndim=2] Vh
            
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+        
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
            else:
-                U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
                                        full_matrices=False, compute_uv=True, 
                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
                       
                square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_suqare_root_dict[matrix_index] = square_root
            
            return square_root
            
+            
+        cpdef Q_inverse(self, int k, float jitter=0.0):
+            """
+            Square root of the noise matrix Q
+            """
+            
+            cdef int matrix_index = <int>self.reconstruct_indices[k]
+            cdef np.ndarray[DTYPE_t, ndim=2] square_root
+            
+            cdef np.ndarray[DTYPE_t, ndim=2] U
+            cdef np.ndarray[DTYPE_t, ndim=1] S           
+            cdef np.ndarray[DTYPE_t, ndim=2] Vh
+            
+        
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse = self.Q_inverse_dict[matrix_index]
+            else:
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                                        full_matrices=False, compute_uv=True, 
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
+                       
+               Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T )
+                self.Q_inverse_dict[matrix_index] = Q_inverse
+            
+            return Q_inverse
+            
 #        def return_last(self):
 #            """
 #            Function returns last available matrices.
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@ -12,6 +12,8 @@ import numpy as np
 import scipy as sp
 import scipy.linalg as linalg

+import warnings
+
 try:
    from . import state_space_setup
    setup_available = True
@ -41,6 +43,10 @@ if print_verbose:
    else:
        print("state_space: cython is NOT used")

+# When debugging external module can set some value to this variable (e.g.)
+# 'model' and in this module this variable can be seen.s
+tmp_buffer = None
+

 class Dynamic_Callables_Python(object):

@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class):
        self.R_square_root = {}

    def Rk(self, k):
-        return self.R[:, :, self.index[self.R_time_var_index, k]]
+        return self.R[:, :, int(self.index[self.R_time_var_index, k])]

    def dRk(self, k):
        if self.dR is None:
@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class):
            P: parameter for Jacobian, usually covariance matrix.
        """

-        return self.H[:, :, self.index[self.H_time_var_index, k]]
+        return self.H[:, :, int(self.index[self.H_time_var_index, k])]

    def dHk(self, k):
        if self.dH is None:
@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace):
            self.v_dQk = None

            self.square_root_computed = False
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
            # !!!Print statistics! Which object is created

        def f_a(self, k,m,A):
@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace):
                self.v_Qk = v_Qk
                self.v_dAk = v_dAk
                self.v_dQk = v_dQk
+                
                self.Q_square_root_computed = False
+                self.Q_inverse_computed = False
+                self.Q_svd_computed = False
            else:
                v_Ak = self.v_Ak
                v_Qk = self.v_Qk
@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace):
            self.last_k = 0
            self.last_k_computed = False
            self.compute_derivatives = compute_derivatives
-            self.Q_square_root_computed = False
            
+            self.Q_square_root_computed = False
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
+            self.Q_eigen_computed = False
            return self

        def Ak(self,k,m,P):
@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace):

        def Q_srk(self,k):
            """
+            Check square root, maybe rewriting for Spectral decomposition is needed.
            Square root of the noise matrix Q
            """

            if ((self.last_k == k) and (self.last_k_computed == True)):
                if not self.Q_square_root_computed:
-                    (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd
+                        
                    square_root = U * np.sqrt(S)
                    self.square_root_computed = True
                    self.Q_square_root = square_root
@ -2397,6 +2418,55 @@ class ContDescrStateSpace(DescreteStateSpace):

            return square_root
        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):        
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            
+            #import pdb; pdb.set_trace()
+                    
+            if ((self.last_k == k) and (self.last_k_computed == True)):
+                if not self.Q_inverse_computed:
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd
+
+                    Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
+                    
+                    self.Q_inverse_computed = True
+                    self.Q_inverse_r = Q_inverse_r
+                        
+                else:
+                    Q_inverse_r = self.Q_inverse_r
+            else:
+                raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
+                                     This requires some programming""")
+
+            return Q_inverse_r
+        
+        
        def return_last(self):
            """
            Function returns last computed matrices.
@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace):
                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)

            self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
+            
            self.last_k = None
             # !!!Print statistics! Which object is created
            # !!!Print statistics! Print sizes of matrices
@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace):
            Square root of the noise matrix Q
            """
            matrix_index = self.reconstruct_indices[k]
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
            else:
-                (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
                                        full_matrices=False, compute_uv=True,
                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                    
                square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_square_root_dict[matrix_index] = square_root

            return square_root
        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            #import pdb; pdb.set_trace()
+            
+            matrix_index = self.reconstruct_indices[k]
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse_r = self.Q_inverse_dict[matrix_index]
+            else:
+                
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                                        full_matrices=False, compute_uv=True,
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                
+                Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
+                self.Q_inverse_dict[matrix_index] = Q_inverse_r
+
+            return Q_inverse_r
+            
+        
        def return_last(self):
            """
            Function returns last available matrices.
@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace):
    @classmethod
    def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
                                 grad_params_no=None,
-                                 P_inf=None, dP_inf=None, dF = None, dQc=None):
+                                 P_inf=None, dP_inf=None, dF = None, dQc=None,
+                                 dt0=None):
        """
        Function return the object which is used in Kalman filter and/or
        smoother to obtain matrices A, Q and their derivatives for discrete model
@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace):
        threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
        dt = np.empty((X.shape[0],))
        dt[1:] = np.diff(X[:,0],axis=0)
-        dt[0]  = 0#dt[1]
+        if dt0 is None:
+            dt[0]  = 0#dt[1]
+        else:
+            if isinstance(dt0,str):
+                dt = dt[1:]
+            else:
+                dt[0] = dt0
+            
        unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
        number_unique_indices = len(unique_indices)

@ -3161,6 +3291,9 @@ class ContDescrStateSpace(DescreteStateSpace):

            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})

+        TODO: this function can be redone to "preprocess dataset", when
+        close time points are handeled properly (with rounding parameter) and
+        values are averaged accordingly.
        
        Input:
        --------------
@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace):
        n = F.shape[0]

        if not isinstance(dt, collections.Iterable): # not iterable, scalar
-
+            #import pdb; pdb.set_trace()
            # The dynamical model
            A  = matrix_exponent(F*dt)
-            if np.any( np.isnan(A)):
-                A  = linalg.expm3(F*dt)

            # The covariance matrix Q by matrix fraction decomposition ->
            Phi = np.zeros((2*n,2*n))
@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace):
                    # The discrete-time dynamical model*
                    if p==0:
                        A  = AA[:n,:n,p]
-                        Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
-                        Q_noise = Q_noise_2
+                        Q_noise_3  = P_inf - A.dot(P_inf).dot(A.T)
+                        Q_noise = Q_noise_3
                        #PP = A.dot(P).dot(A.T) + Q_noise_2

                    # The derivatives of A and Q
                    dA[:,:,p] = AA[n:,:n,p]
-                    dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
-                       - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
+                    tmp = dA[:,:,p].dot(P_inf).dot(A.T)
+                    dQ[:,:,p] = dP_inf[:,:,p] - tmp \
+                       - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
                    
+                    dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
            else:
              dA = None
              dQ = None
@ -3283,6 +3416,10 @@ class ContDescrStateSpace(DescreteStateSpace):
            #Q_noise = Q_noise_1

            # Return
+            #import pdb; pdb.set_trace()
+            #if dt != 0:
+            #    Q_noise = Q_noise + np.eye(Q_noise.shape[0])*1e-8
+            Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
            return A, Q_noise,None, dA, dQ

        else: # iterable, array
@ -3486,4 +3623,124 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):

    # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

-    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
+    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0
+
+#def psd_matrix_inverse(k,Q, U=None,S=None, p_largest_cond_num=None, regularization_type=2):
+#    """
+#    Function inverts positive definite matrix and regularizes the inverse.
+#    Regularization is useful when original matrix is badly conditioned.
+#    Function is currently used only in SparseGP code.
+#    
+#    Inputs:
+#    ------------------------------
+#    k: int
+#    Iteration umber. Used for information only. Value -1 corresponds to P_inf_inv.
+#    
+#    Q: matrix
+#    To be inverted
+#    
+#    U,S: matrix. vector
+#    SVD components of Q
+#    
+#    p_largest_cond_num: float
+#    Largest condition value for the inverted matrix. If cond. number is smaller than that
+#    no regularization happen.
+#    
+#    regularization_type: 1 or 2
+#    Regularization type.
+#    """
+#    #import pdb; pdb.set_trace()
+##    if (k == 0) or (k == -1): # -1 - P_inf_inv computation
+##        import pdb; pdb.set_trace()
+#    
+#    if p_largest_cond_num  is None:
+#        raise ValueError("psd_matrix_inverse: None p_largest_cond_num")
+#        
+#    if U is None or S is None:
+#        (U, S, Vh) = sp.linalg.svd( Q, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+#    if S[0] < (1e-4):
+#        #import pdb; pdb.set_trace()
+#        warnings.warn("""state_space_main psd_matrix_inverse: largest singular value is too small {0:e}.
+#            condition number is {1:e} Maybe somethigng is wrong
+#            """.format(S[0], S[0]/S[-1]))
+#        S = S + (1e-4 - S[0]) # make the S[0] at least 1e-4
+#        
+#    current_conditional_number = S[0]/S[-1]
+#    if (current_conditional_number > p_largest_cond_num):
+#        if (regularization_type == 1):
+#            regularizer = S[0] / p_largest_cond_num
+#            # the second computation of SVD is done to compute more precisely singular
+#            # vectors of small singular values, since small singular values become large.
+#            # It is not very clear how this step is useful but test is here.
+#            (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]), 
+#                                        full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+#            
+#            Q_inverse_r = np.dot( U * 1.0/S , U.T ) # Assume Q_inv is positive definite    
+#            
+#            # In this case, RBF kernel we get complx eigenvalues. Probably
+#            # for small eigenvalue corresponding eigenvectors are not very orthogonal.
+#            ##########Q_inverse = np.dot( Vh.T * ( 1.0/(S + regularizer)) , U.T )
+#        elif (regularization_type == 2):
+#            
+#             new_border_value = np.sqrt(current_conditional_number)/2 
+#             if p_largest_cond_num >= new_border_value: # this type of regularization works
+#                regularizer = ( S[0] / p_largest_cond_num / 2.0 )**2
+#                
+#                Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
+#             else:
+#                
+#                better_curr_cond_num = new_border_value 
+#                warnings.warn("""state_space_main psd_matrix_inverse: reg_type = 2 can't be done completely.
+#                    Current conditionakl number {0:e} is reduced to {1:e} by reg_type = 1""".format(current_conditional_number, better_curr_cond_num))
+#                
+#                regularizer = S[0] / better_curr_cond_num
+#                # the second computation of SVD is done to compute more precisely singular
+#                # vectors of small singular values, since small singular values become large.
+#                # It is not very clear how this step is useful but test is here.
+#                (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]), 
+#                                            full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+#                                            
+#                regularizer = ( S[0] / p_largest_cond_num / 2.0 )**2
+#                
+#                Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
+#            
+#             assert regularizer*10 < S[0], "regularizer is not << S[0]"
+#             assert regularizer > 10*S[-1], "regularizer is not >> S[-1]"
+#             
+## Old version ->
+##            lamda_star = np.sqrt(current_conditional_number)
+##            if 2*p_largest_cond_num >= lamda_star:
+##                lamda = current_conditional_number / 2 / p_largest_cond_num
+##                
+##                regularizer = (S[-1] * lamda)**2
+##                
+##                Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
+##            else:
+##                better_curr_cond_num = (2*p_largest_cond_num)**2 / 2 # division by 2 just in case here
+##                warnings.warn("""state_space_main psd_matrix_inverse: reg_type = 2 can't be done completely.
+##                    Current conditionakl number {0:e} is reduced to {1:e} by reg_type = 1""".format(current_conditional_number, better_curr_cond_num))
+##                
+##                regularizer = S[0] / better_curr_cond_num
+##                # the second computation of SVD is done to compute more precisely singular
+##                # vectors of small singular values, since small singular values become large.
+##                # It is not very clear how this step is useful but test is here.
+##                (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]), 
+##                                            full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+##                
+##                lamda = better_curr_cond_num / 2 / p_largest_cond_num
+##                
+##                regularizer = (S[-1] * lamda)**2
+##                
+##                Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
+##            
+##            assert lamda > 10, "Some assumptions are incorrect if this is not satisfied."
+## Old version <-            
+#        else:
+#            raise ValueError("AQcompute_batch_Python:Q_inverse: Invalid regularization type")
+#    
+#    else:
+#        Q_inverse_r = np.dot( U * 1.0/S , U.T ) # Assume Q_inv is positive definite
+#    # When checking conditional number 2 times difference is ok.
+#    Q_inverse_r = 0.5*(Q_inverse_r + Q_inverse_r.T)
+#
+#    return Q_inverse_r
--- a/GPy/models/state_space_model.py
+++ b/GPy/models/state_space_model.py
@ -23,7 +23,16 @@ from . import state_space_main as ssm
 from . import state_space_setup as ss_setup

 class StateSpace(Model):
-    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
+    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
        super(StateSpace, self).__init__(name=name)

        if len(X.shape) == 1:
@ -51,6 +60,7 @@ class StateSpace(Model):
        ss_setup.use_cython = use_cython

        #import pdb; pdb.set_trace()
+        self.balance = balance
        
        global ssm
        #from . import state_space_main as ssm
@ -58,8 +68,8 @@ class StateSpace(Model):
            reload(ssm)
        # Make sure the observations are ordered in time
        sort_index = np.argsort(X[:,0])
-        self.X = X[sort_index]
-        self.Y = Y[sort_index]
+        self.X = X[sort_index,:]
+        self.Y = Y[sort_index,:]

        # Noise variance
        self.likelihood = likelihoods.Gaussian(variance=noise_var)
@ -86,11 +96,12 @@ class StateSpace(Model):

        #np.set_printoptions(16)
        #print(self.param_array)
-        #import pdb; pdb.set_trace()
+        

        # Get the model matrices from the kernel
        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
-
+        #Qc = Qc + np.eye(Qc.shape[0]) * 1e-8
+        #import pdb; pdb.set_trace()
        # necessary parameters
        measurement_dim = self.output_dim
        grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -112,8 +123,9 @@ class StateSpace(Model):
        dR[:,:,-1] = np.eye(measurement_dim)

        # Balancing
-        #(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
-
+        if self.balance:
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM parameters_changed balancing!")
        # Use the Kalman filter to evaluate the likelihood
        grad_calc_params = {}
        grad_calc_params['dP_inf'] = dP_inf
@ -125,7 +137,7 @@ class StateSpace(Model):
        kalman_filter_type = self.kalman_filter_type

        # The following code is required because sometimes the shapes of self.Y
-        # becomes 3D even though is must be 2D. The reason is undescovered.
+        # becomes 3D even though is must be 2D. The reason is undiscovered.
        Y = self.Y
        if self.ts_number is None:
            Y.shape = (self.num_data,1)
@ -146,7 +158,7 @@ class StateSpace(Model):

        if np.any( np.isfinite(grad_log_likelihood) == False):
            #import pdb; pdb.set_trace()
-            print("State-Space: NaN valkues in the grad_log_likelihood")
+            print("State-Space: NaN values in the grad_log_likelihood")
        #print(grad_log_likelihood)

        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
@ -159,7 +171,7 @@ class StateSpace(Model):
    def log_likelihood(self):
        return self._log_marginal_likelihood

-    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
+    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw):
        """
        Performs the actual prediction for new X points.
        Inner function. It is called only from inside this class.
@ -178,6 +190,9 @@ class StateSpace(Model):
            Use only Kalman Filter for prediction. In this case the output does
            not coincide with corresponding Gaussian process.
        
+        balance: bool
+            Whether to balance or not the model as a whole
+        
        Output:
        --------------------

@ -211,6 +226,11 @@ class StateSpace(Model):
        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
        state_dim = F.shape[0]
        
+        # Balancing
+        if (p_balance==True):
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM _raw_predict balancing!")
+            
        #Y = self.Y[:, 0,0]
        # Run the Kalman filter
        #import pdb; pdb.set_trace()
@ -261,10 +281,23 @@ class StateSpace(Model):
        # Return the posterior of the state
        return (m, V)

-    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
+    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
            
        # Run the Kalman filter to get the state
-        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance)

        # Add the noise variance to the state variance
        if include_likelihood:
@ -277,8 +310,22 @@ class StateSpace(Model):
        # Return mean and variance
        return m, V

-    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
-        mu, var = self._raw_predict(Xnew)
+    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
+        
+        
+        mu, var = self._raw_predict(Xnew, p_balance=p_balance)
        #import pdb; pdb.set_trace()
        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]

--- a/GPy/testing/gpy_kernels_state_space_tests.py
+++ b/GPy/testing/gpy_kernels_state_space_tests.py
@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase):
                           mean_compare_decimal=5, var_compare_decimal=5)

    def test_RBF_kernel(self,):
+        #import pdb;pdb.set_trace()
+        
        np.random.seed(234) # seed the random number generator
        (X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
                        plot = False, points_num=50, x_interval = (0, 20), random=True)
        X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)

-        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,])
+        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10)
        gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,])

        self.run_for_model(X, Y, ss_kernel, check_gradients=True,
@ -375,7 +377,7 @@ if __name__ == "__main__":
    print("Running state-space inference tests...")
    unittest.main()

-    #tt = StateSpaceKernelsTests('test_periodic_kernel')
+    #tt = StateSpaceKernelsTests('test_RBF_kernel')
    #import pdb; pdb.set_trace()
    #tt.test_Matern32_kernel()
    #tt.test_Matern52_kernel()