diff --git a/GPy/kern/_src/sde_brownian.py b/GPy/kern/_src/sde_brownian.py
deleted file mode 100644
index 55950143..00000000
--- a/GPy/kern/_src/sde_brownian.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Brownian motion covariance function with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-
-from .brownian import Brownian
-
-import numpy as np
-
-class sde_Brownian(Brownian):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Linear kernel:
-
-    .. math::
-
-       k(x,y) = \sigma^2 min(x,y)
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
-        
-        F = np.array( ((0,1.0),(0,0) ))
-        L = np.array( ((1.0,),(0,)) )
-        Qc = np.array( ((variance,),) )
-        H = np.array( ((1.0,0),) )
-        
-        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
-        #P0 = Pinf.copy() 
-        P0 = np.zeros((2,2))   
-        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
-        dF = np.zeros((2,2,1))
-        dQc    = np.ones( (1,1,1) )
-        
-        dPinf = np.zeros((2,2,1))
-        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
-        #dP0 = dPinf.copy() 
-        dP0 = np.zeros((2,2,1))
-  
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
diff --git a/GPy/kern/_src/sde_linear.py b/GPy/kern/_src/sde_linear.py
deleted file mode 100644
index 031f0f5f..00000000
--- a/GPy/kern/_src/sde_linear.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Linear covariance function with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .linear import Linear
-
-import numpy as np
-
-class sde_Linear(Linear):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Linear kernel:
-
-    .. math::
-
-       k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
-
-    """
-    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
-        """
-        Modify the init method, because one extra parameter is required. X - points
-        on the X axis.
-        """
-        
-        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
-        
-        self.t0 = np.min(X)
-        
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variances.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
-        t0 = float(self.t0)
-        
-        F = np.array( ((0,1.0),(0,0) ))
-        L = np.array( ((0,),(1.0,)) )
-        Qc = np.zeros((1,1))
-        H = np.array( ((1.0,0),) )
-        
-        Pinf   = np.zeros((2,2))
-        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
-        dF = np.zeros((2,2,1))
-        dQc    = np.zeros( (1,1,1) )
-        
-        dPinf = np.zeros((2,2,1))
-        dP0 = np.zeros((2,2,1))
-        dP0[:,:,0]  = P0 / variance
-  
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
diff --git a/GPy/kern/_src/sde_matern.py b/GPy/kern/_src/sde_matern.py
deleted file mode 100644
index 0ce1cf98..00000000
--- a/GPy/kern/_src/sde_matern.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Matern covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .stationary import Matern32
-from .stationary import Matern52
-import numpy as np
-
-class sde_Matern32(Matern32):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Matern 3/2 kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale.values)
-        
-        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array(((0, 1.0), (-foo**2, -2*foo))) 
-        L    = np.array(( (0,), (1.0,) ))
-        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
-        H    = np.array(((1.0, 0),)) 
-        Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
-        P0 = Pinf.copy()
-        
-        # Allocate space for the derivatives 
-        dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        # The partial derivatives 
-        dFvariance       = np.zeros((2,2)) 
-        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
-        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
-        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
-        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
-        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
-        # Combine the derivatives 
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinfvariance 
-        dPinf[:,:,1] = dPinflengthscale 
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-class sde_Matern52(Matern52):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Matern 5/2 kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale.values)
-
-        lamda = np.sqrt(5.0)/lengthscale
-        kappa = 5.0/3.0*variance/lengthscale**2        
-        
-        F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
-        L = np.array(((0,),(0,),(1,)))
-        Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
-        H = np.array(((1,0,0),))        
-        
-        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        P0 = Pinf.copy()
-        # Allocate space for the derivatives         
-        dF = np.empty((3,3,2))        
-        dQc = np.empty((1,1,2))        
-        dPinf = np.empty((3,3,2))
-        
-         # The partial derivatives 
-        dFvariance = np.zeros((3,3))
-        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 
-                                   30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
-        dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
-        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))        
-        
-        dPinf_variance = Pinf/variance
-        kappa2 = -2.0*kappa/lengthscale
-        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, 
-                                    0,-100*variance/lengthscale**5)))        
-        # Combine the derivatives 
-        dF[:,:,0] = dFvariance
-        dF[:,:,1] = dFlengthscale        
-        dQc[:,:,0] = dQcvariance         
-        dQc[:,:,1] = dQclengthscale        
-        dPinf[:,:,0] = dPinf_variance
-        dPinf[:,:,1] = dPinf_lengthscale
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
\ No newline at end of file
diff --git a/GPy/kern/_src/sde_standard_periodic.py b/GPy/kern/_src/sde_standard_periodic.py
deleted file mode 100644
index c3df7d92..00000000
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Matern covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .standard_periodic import StdPeriodic
-
-import numpy as np
-import scipy as sp
-
-from scipy import special as special
-
-class sde_StdPeriodic(StdPeriodic):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Standard Periodic kernel:
-
-    .. math::
-
-       k(x,y) = \theta_1 \exp \left[  - \frac{1}{2} {}\sum_{i=1}^{input\_dim}  
-       \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.wavelengths.gradient = gradients[1]
-        self.lengthscales.gradient = gradients[2]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance.
-        
-        
-        ! Note: one must constrain lengthscale not to drop below 0.25.
-        After this bessel functions of the first kind grows to very high.
-        
-        ! Note: one must keep wevelength also not very low. Because then
-        the gradients wrt wavelength become ustable. 
-        However this might depend on the data. For test example with
-        300 data points the low limit is 0.15.
-        """ 
-        
-        # Params to use: (in that order)
-        #self.variance
-        #self.wavelengths
-        #self.lengthscales
-        N = 7 # approximation order        
-        
-        
-        w0 = 2*np.pi/self.wavelengths # frequency
-        lengthscales = 2*self.lengthscales         
-        
-        [q2,dq2l] = seriescoeff(N,lengthscales,self.variance)        
-        # lengthscale is multiplied by 2 because of slightly different
-        # formula for periodic covariance function.
-        # For the same reason:
-        
-        dq2l = 2*dq2l
-        
-        if np.any( np.isfinite(q2) == False):
-            raise ValueError("SDE periodic covariance error 1")
-        
-        if np.any( np.isfinite(dq2l) == False):
-            raise ValueError("SDE periodic covariance error 2")
-        
-        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
-        L    = np.eye(2*(N+1))
-        Qc   = np.zeros((2*(N+1), 2*(N+1)))
-        P_inf = np.kron(np.diag(q2),np.eye(2))
-        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        P0 = P_inf.copy()
-        
-        # Derivatives
-        dF = np.empty((F.shape[0], F.shape[1], 3))
-        dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
-        dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))         
-        
-        # Derivatives wrt self.variance
-        dF[:,:,0] = np.zeros(F.shape)
-        dQc[:,:,0] = np.zeros(Qc.shape)
-        dP_inf[:,:,0] = P_inf / self.variance
-
-        # Derivatives self.wavelengths
-        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.wavelengths );
-        dQc[:,:,1] = np.zeros(Qc.shape)
-        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
-        
-        # Derivatives self.lengthscales        
-        dF[:,:,2] = np.zeros(F.shape)
-        dQc[:,:,2] = np.zeros(Qc.shape)
-        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
-        dP0 = dP_inf.copy()
-
-        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
-        
-        
-        
-        
-def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
-    """
-    Calculate the coefficients q_j^2 for the covariance function 
-    approximation:
-    
-        k(\tau) =  \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
-    
-    Reference is:
-
-    [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic 
-        covariance functions and state space models. In Proceedings of the 
-        Seventeenth International Conference on Artifcial Intelligence and 
-        Statistics (AISTATS 2014). JMLR: W&CP, volume 33.    
-    
-    Note! Only the infinite approximation (through Bessel function) 
-          is currently implemented.
-
-    Input:
-    ----------------
-    
-    m: int
-        Degree of approximation. Default 6.
-    lengthScale: float
-        Length scale parameter in the kerenl
-    magnSigma2:float
-        Multiplier in front of the kernel.
-        
-    
-    Output:
-    -----------------
-    
-    coeffs: array(m+1)
-        Covariance series coefficients
-    
-    coeffs_dl: array(m+1)
-        Derivatives of the coefficients with respect to lengthscale.
-    
-    """
-    
-    if true_covariance:
-        
-        bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
-            sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
-            np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
-                
-        M,J = np.meshgrid(range(0,m+1),range(0,m+1))
-        
-        coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
-             (lengthScale**(-2))**J  *magnSigma2
-        
-        coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)         
-        
-        coeffs = np.sum(coeffs,0)
-        
-    else:
-        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
-        if np.any( np.isfinite(coeffs) == False):
-            raise ValueError("sde_standard_periodic: Coefficients are not finite!")
-            #import pdb; pdb.set_trace()
-        coeffs[0] = 0.5*coeffs[0]
-        
-        # Derivatives wrt (lengthScale)
-        coeffs_dl = np.zeros(m+1)
-        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
-        (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )    
-            
-        # The first element
-        coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
-            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
-        
-
-    return coeffs, coeffs_dl
diff --git a/GPy/kern/_src/sde_static.py b/GPy/kern/_src/sde_static.py
deleted file mode 100644
index ae8ed194..00000000
--- a/GPy/kern/_src/sde_static.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Static covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .static import White
-from .static import Bias
-
-import numpy as np
-
-class sde_White(White):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    White kernel:
-
-    .. math::
-
-       k(x,y) = \alpha*\delta(x-y)
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values) 
-        
-        F = np.array( ((-np.inf,),) )
-        L = np.array( ((1.0,),)  )
-        Qc = np.array( ((variance,),)  )
-        H = np.array( ((1.0,),) )
-        
-        Pinf   = np.array( ((variance,),)  )
-        P0 = Pinf.copy()     
-        
-        dF = np.zeros((1,1,1))
-        dQc = np.zeros((1,1,1))
-        dQc[:,:,0]    = np.array( ((1.0,),) )
-        
-        dPinf = np.zeros((1,1,1))
-        dPinf[:,:,0] = np.array( ((1.0,),) )
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-
-class sde_Bias(Bias):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Bias kernel:
-
-    .. math::
-
-       k(x,y) = \alpha
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        variance = float(self.variance.values) 
-        
-        F = np.array( ((0.0,),))
-        L = np.array( ((1.0,),))
-        Qc = np.zeros((1,1))
-        H = np.array( ((1.0,),))
-        
-        Pinf   = np.zeros((1,1))
-        P0 = np.array( ((variance,),) )      
-        
-        dF = np.zeros((1,1,1))
-        dQc    = np.zeros((1,1,1))
-        
-        dPinf = np.zeros((1,1,1))
-        dP0 = np.zeros((1,1,1))
-        dP0[:,:,0] = np.array( ((1.0,),) )
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
\ No newline at end of file
diff --git a/GPy/kern/_src/sde_stationary.py b/GPy/kern/_src/sde_stationary.py
deleted file mode 100644
index aeb77010..00000000
--- a/GPy/kern/_src/sde_stationary.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance several stationary covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .rbf import RBF
-from .stationary import Exponential
-from .stationary import RatQuad
-
-import numpy as np
-import scipy as sp
-try:
-    from scipy.linalg import solve_continuous_lyapunov as lyap
-except ImportError:
-    from scipy.linalg import solve_lyapunov as lyap
-
-class sde_RBF(RBF):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Radial Basis Function kernel:
-
-    .. math::
-
-        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        N = 10# approximation order ( number of terms in exponent series expansion)
-        roots_rounding_decimals = 6
-        
-        fn = np.math.factorial(N)        
-        
-        kappa = 1.0/2.0/self.lengthscale**2
-       
-        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
-       
-        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
-        
-        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
-            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
-        
-        pp = sp.poly1d(pp)
-        roots = sp.roots(pp)        
-        
-        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
-        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
-        
-        F = np.diag(np.ones((N-1,)),1)
-        F[-1,:] = -aa[-1:0:-1]
-        
-        L= np.zeros((N,1))
-        L[N-1,0] = 1
-        
-        H = np.zeros((1,N))
-        H[0,0] = 1
-        
-        # Infinite covariance:
-        Pinf = lyap(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
-        Pinf = 0.5*(Pinf + Pinf.T)
-        # Allocating space for derivatives        
-        dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        
-        # Derivatives:
-        dFvariance = np.zeros(F.shape)
-        dFlengthscale = np.zeros(F.shape)
-        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
-
-        dQcvariance = Qc/self.variance
-        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
-        
-        dPinf_variance = Pinf/self.variance
-        
-        lp = Pinf.shape[0]
-        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
-        coeff[np.mod(coeff,2) != 0] = 0
-        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
-        
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinf_variance 
-        dPinf[:,:,1] = dPinf_lengthscale
-        
-        P0 = Pinf.copy()
-        dP0 = dPinf.copy()
-        
-        # Benefits of this are not very sound. Helps only in one case:
-        # SVD Kalman + RBF kernel
-        import GPy.models.state_space_main as ssm
-        (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-class sde_Exponential(Exponential):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Exponential kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale)        
-        
-        F  = np.array(((-1.0/lengthscale,),))
-        L  = np.array(((1.0,),)) 
-        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
-        H = np.array(((1.0,),)) 
-        Pinf = np.array(((variance,),)) 
-        P0 = Pinf.copy()        
-        
-        dF = np.zeros((1,1,2));  
-        dQc = np.zeros((1,1,2)); 
-        dPinf = np.zeros((1,1,2));
-        
-        dF[:,:,0] = 0.0        
-        dF[:,:,1] = 1.0/lengthscale**2
-        
-        dQc[:,:,0] = 2.0/lengthscale       
-        dQc[:,:,1] = -2.0*variance/lengthscale**2
-        
-        dPinf[:,:,0] = 1.0
-        dPinf[:,:,1] = 0.0
-        
-        dP0 = dPinf.copy()        
-
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-        
-class sde_RatQuad(RatQuad):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Rational Quadratic kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-
-    def sde(self):
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        assert False, 'Not Implemented'
-        
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
-        #self.power
-        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
diff --git a/GPy/kern/src/sde_standard_periodic.py b/GPy/kern/src/sde_standard_periodic.py
index 3729bf57..be32f7b2 100644
--- a/GPy/kern/src/sde_standard_periodic.py
+++ b/GPy/kern/src/sde_standard_periodic.py
@@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic
 
 import numpy as np
 import scipy as sp
+import warnings
 
 from scipy import special as special
 
@@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic):
        \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
 
     """
+    # TODO: write comment to the constructor arguments
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        StdPeriodic kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 7)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        #import pdb; pdb.set_trace()
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 7
+        
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = False
+        
+        super(sde_StdPeriodic, self).__init__(*args, **kwargs)
+        
     def sde_update_gradient_full(self, gradients):
         """
         Update gradient in the order in which parameters are represented in the
@@ -38,41 +71,48 @@ class sde_StdPeriodic(StdPeriodic):
         
     def sde(self): 
         """ 
-        Return the state space representation of the covariance.
+        Return the state space representation of the standard periodic covariance.
         
         
-        ! Note: one must constrain lengthscale not to drop below 0.25.
-        After this bessel functions of the first kind grows to very high.
+        ! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order)
+        After this Bessel functions of the first becomes NaN. Rescaling
+        time variable might help.
         
-        ! Note: one must keep wevelength also not very low. Because then
+        ! Note: one must keep period also not very low. Because then
         the gradients wrt wavelength become ustable. 
         However this might depend on the data. For test example with
-        300 data points the low limit is 0.15.
+        300 data points the low limit is 0.15. 
         """ 
         
+        #import pdb; pdb.set_trace()
         # Params to use: (in that order)
         #self.variance
         #self.period
         #self.lengthscale
-        N = 7 # approximation order        
+        if self.approx_order is not None:
+            N = int(self.approx_order)
+        else:
+            N = 7 # approximation order        
         
+        p_period = float(self.period)        
+        p_lengthscale = 2*float(self.lengthscale)
+        p_variance = float(self.variance)        
         
-        w0 = 2*np.pi/self.period # frequency
-        lengthscale = 2*self.lengthscale         
+        w0 = 2*np.pi/p_period # frequency
+        # lengthscale is multiplied by 2 because of different definition of lengthscale
         
-        [q2,dq2l] = seriescoeff(N,lengthscale,self.variance)        
-        # lengthscale is multiplied by 2 because of slightly different
-        # formula for periodic covariance function.
-        # For the same reason:
+        [q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance)        
         
-        dq2l = 2*dq2l
-        
-        if np.any( np.isfinite(q2) == False):
-            raise ValueError("SDE periodic covariance error 1")
-        
-        if np.any( np.isfinite(dq2l) == False):
-            raise ValueError("SDE periodic covariance error 2")
+        dq2l = 2*dq2l  # This is because the lengthscale if multiplied by 2.
         
+        eps = 1e-12
+        if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") )
+                                
+        if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") )
+                 
+                 
         F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
         L    = np.eye(2*(N+1))
         Qc   = np.zeros((2*(N+1), 2*(N+1)))
@@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic):
         # Derivatives wrt self.variance
         dF[:,:,0] = np.zeros(F.shape)
         dQc[:,:,0] = np.zeros(Qc.shape)
-        dP_inf[:,:,0] = P_inf / self.variance
+        dP_inf[:,:,0] = P_inf / p_variance
 
         # Derivatives self.period
-        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.period );
+        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / p_period );
         dQc[:,:,1] = np.zeros(Qc.shape)
         dP_inf[:,:,1] = np.zeros(P_inf.shape)      
         
@@ -100,7 +140,12 @@ class sde_StdPeriodic(StdPeriodic):
         dQc[:,:,2] = np.zeros(Qc.shape)
         dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
         dP0 = dP_inf.copy()
-
+        
+        if self.balance:
+            # Benefits of this are not very sound.
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 )
+            
         return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
         
         
@@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
         coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
         if np.any( np.isfinite(coeffs) == False):
             raise ValueError("sde_standard_periodic: Coefficients are not finite!")
-            #import pdb; pdb.set_trace()
+        #import pdb; pdb.set_trace()
         coeffs[0] = 0.5*coeffs[0]
-        
+        #print(coeffs)
         # Derivatives wrt (lengthScale)
         coeffs_dl = np.zeros(m+1)
         coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
@@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
             (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
         
 
-    return coeffs, coeffs_dl
+    return coeffs.squeeze(), coeffs_dl.squeeze()
diff --git a/GPy/kern/src/sde_stationary.py b/GPy/kern/src/sde_stationary.py
index ae3dd89c..9802905c 100644
--- a/GPy/kern/src/sde_stationary.py
+++ b/GPy/kern/src/sde_stationary.py
@@ -15,6 +15,7 @@ try:
     from scipy.linalg import solve_continuous_lyapunov as lyap
 except ImportError:
     from scipy.linalg import solve_lyapunov as lyap
+import warnings
 
 class sde_RBF(RBF):
     """
@@ -29,6 +30,37 @@ class sde_RBF(RBF):
         k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
 
     """
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        RBF kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 10)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = True
+        
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 6
+        
+        
+        
+        super(sde_RBF, self).__init__(*args, **kwargs)
+        
     def sde_update_gradient_full(self, gradients):
         """
         Update gradient in the order in which parameters are represented in the
@@ -41,23 +73,43 @@ class sde_RBF(RBF):
     def sde(self):
         """
         Return the state space representation of the covariance.
+        
+        Note! For Sparse GP inference too small or two high values of lengthscale
+        lead to instabilities. This is because Qc are too high or too low
+        and P_inf are not full rank. This effect depends on approximatio order.
+        For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted.
+        N=6: (0.06,31)
+        Variance should be within reasonable bounds as well, but its dependence is linear.
+        
+        The above facts do not take into accout regularization.
         """
-
-        N = 10# approximation order ( number of terms in exponent series expansion)
+        #import pdb; pdb.set_trace()
+        if self.approx_order is not None:
+            N = self.approx_order
+        else:
+            N = 10# approximation order ( number of terms in exponent series expansion)
+            
         roots_rounding_decimals = 6
 
         fn = np.math.factorial(N)
 
-        kappa = 1.0/2.0/self.lengthscale**2
+        p_lengthscale = float( self.lengthscale )
+        p_variance = float(self.variance)
+        kappa = 1.0/2.0/p_lengthscale**2
 
-        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+        Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) )
+        
+        eps = 1e-12
+        if (float(Qc) > 1.0/eps) or (float(Qc) < eps):
+            warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small. 
+                                It influece conditioning of P_inf: {0:e}""".format(float(Qc)) )
 
-        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
 
         for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
-            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
-
-        pp = sp.poly1d(pp)
+            pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
+            
+        pp = sp.poly1d(pp1)
         roots = sp.roots(pp)
 
         neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
@@ -83,17 +135,17 @@ class sde_RBF(RBF):
         # Derivatives:
         dFvariance = np.zeros(F.shape)
         dFlengthscale = np.zeros(F.shape)
-        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1)
 
-        dQcvariance = Qc/self.variance
-        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
-
-        dPinf_variance = Pinf/self.variance
+        dQcvariance = Qc/p_variance
+        dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),))
+        
+        dPinf_variance = Pinf/p_variance
 
         lp = Pinf.shape[0]
         coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
         coeff[np.mod(coeff,2) != 0] = 0
-        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff
 
         dF[:,:,0]    = dFvariance
         dF[:,:,1]    = dFlengthscale
@@ -105,10 +157,11 @@ class sde_RBF(RBF):
         P0 = Pinf.copy()
         dP0 = dPinf.copy()
 
-        # Benefits of this are not very sound. Helps only in one case:
-        # SVD Kalman + RBF kernel
-        import GPy.models.state_space_main as ssm
-        (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
+        if self.balance:
+            # Benefits of this are not very sound. Helps only in one case:
+            # SVD Kalman + RBF kernel
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
 
         return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 
diff --git a/GPy/models/state_space_cython.pyx b/GPy/models/state_space_cython.pyx
index ae09d1cd..2626b9e7 100644
--- a/GPy/models/state_space_cython.pyx
+++ b/GPy/models/state_space_cython.pyx
@@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
                             (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
                             
             self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
             self.last_k = 0
              # !!!Print statistics! Which object is created
             # !!!Print statistics! Print sizes of matrices
@@ -477,19 +479,54 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
             cdef np.ndarray[DTYPE_t, ndim=2] U
             cdef np.ndarray[DTYPE_t, ndim=1] S           
             cdef np.ndarray[DTYPE_t, ndim=2] Vh
+            
         
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
             else:
-                U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
                                         full_matrices=False, compute_uv=True, 
-                                        overwrite_a=False, check_finite=False)                     
-                                        
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
+                       
                 square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_suqare_root_dict[matrix_index] = square_root
             
             return square_root
-
+            
+            
+        cpdef Q_inverse(self, int k, float jitter=0.0):
+            """
+            Square root of the noise matrix Q
+            """
+            
+            cdef int matrix_index = <int>self.reconstruct_indices[k]
+            cdef np.ndarray[DTYPE_t, ndim=2] square_root
+            
+            cdef np.ndarray[DTYPE_t, ndim=2] U
+            cdef np.ndarray[DTYPE_t, ndim=1] S           
+            cdef np.ndarray[DTYPE_t, ndim=2] Vh
+            
+        
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse = self.Q_inverse_dict[matrix_index]
+            else:
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                                        full_matrices=False, compute_uv=True, 
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
+                       
+               Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T )
+                self.Q_inverse_dict[matrix_index] = Q_inverse
+            
+            return Q_inverse
+            
 #        def return_last(self):
 #            """
 #            Function returns last available matrices.
diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py
index 65763a05..6ed2fbeb 100644
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@@ -12,6 +12,8 @@ import numpy as np
 import scipy as sp
 import scipy.linalg as linalg
 
+import warnings
+
 try:
     from . import state_space_setup
     setup_available = True
@@ -41,6 +43,10 @@ if print_verbose:
     else:
         print("state_space: cython is NOT used")
 
+# When debugging external module can set some value to this variable (e.g.)
+# 'model' and in this module this variable can be seen.s
+tmp_buffer = None
+
 
 class Dynamic_Callables_Python(object):
 
@@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class):
         self.R_square_root = {}
 
     def Rk(self, k):
-        return self.R[:, :, self.index[self.R_time_var_index, k]]
+        return self.R[:, :, int(self.index[self.R_time_var_index, k])]
 
     def dRk(self, k):
         if self.dR is None:
@@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class):
             P: parameter for Jacobian, usually covariance matrix.
         """
 
-        return self.H[:, :, self.index[self.H_time_var_index, k]]
+        return self.H[:, :, int(self.index[self.H_time_var_index, k])]
 
     def dHk(self, k):
         if self.dH is None:
@@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.v_dQk = None
 
             self.square_root_computed = False
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
             # !!!Print statistics! Which object is created
 
         def f_a(self, k,m,A):
@@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace):
                 self.v_Qk = v_Qk
                 self.v_dAk = v_dAk
                 self.v_dQk = v_dQk
+                
                 self.Q_square_root_computed = False
+                self.Q_inverse_computed = False
+                self.Q_svd_computed = False
             else:
                 v_Ak = self.v_Ak
                 v_Qk = self.v_Qk
@@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.last_k = 0
             self.last_k_computed = False
             self.compute_derivatives = compute_derivatives
+            
             self.Q_square_root_computed = False
-
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
+            self.Q_eigen_computed = False
             return self
 
         def Ak(self,k,m,P):
@@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace):
 
         def Q_srk(self,k):
             """
+            Check square root, maybe rewriting for Spectral decomposition is needed.
             Square root of the noise matrix Q
             """
 
             if ((self.last_k == k) and (self.last_k_computed == True)):
                 if not self.Q_square_root_computed:
-                    (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd
+                        
                     square_root = U * np.sqrt(S)
                     self.square_root_computed = True
                     self.Q_square_root = square_root
@@ -2396,7 +2417,56 @@ class ContDescrStateSpace(DescreteStateSpace):
                 raise ValueError("Square root of Q can not be computed")
 
             return square_root
+        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):        
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            
+            #import pdb; pdb.set_trace()
+                    
+            if ((self.last_k == k) and (self.last_k_computed == True)):
+                if not self.Q_inverse_computed:
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd
 
+                    Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
+                    
+                    self.Q_inverse_computed = True
+                    self.Q_inverse_r = Q_inverse_r
+                        
+                else:
+                    Q_inverse_r = self.Q_inverse_r
+            else:
+                raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
+                                     This requires some programming""")
+
+            return Q_inverse_r
+        
+        
         def return_last(self):
             """
             Function returns last computed matrices.
@@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace):
                             (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
 
             self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
+            
             self.last_k = None
              # !!!Print statistics! Which object is created
             # !!!Print statistics! Print sizes of matrices
@@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace):
             Square root of the noise matrix Q
             """
             matrix_index = self.reconstruct_indices[k]
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
             else:
-                (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
                                         full_matrices=False, compute_uv=True,
                                         overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                    
                 square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_square_root_dict[matrix_index] = square_root
 
             return square_root
+        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            #import pdb; pdb.set_trace()
+            
+            matrix_index = self.reconstruct_indices[k]
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse_r = self.Q_inverse_dict[matrix_index]
+            else:
+                
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                                        full_matrices=False, compute_uv=True,
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                
+                Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
+                self.Q_inverse_dict[matrix_index] = Q_inverse_r
 
+            return Q_inverse_r
+            
+        
         def return_last(self):
             """
             Function returns last available matrices.
@@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace):
     @classmethod
     def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
                                  grad_params_no=None,
-                                 P_inf=None, dP_inf=None, dF = None, dQc=None):
+                                 P_inf=None, dP_inf=None, dF = None, dQc=None,
+                                 dt0=None):
         """
         Function return the object which is used in Kalman filter and/or
         smoother to obtain matrices A, Q and their derivatives for discrete model
@@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace):
         threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
         dt = np.empty((X.shape[0],))
         dt[1:] = np.diff(X[:,0],axis=0)
-        dt[0]  = 0#dt[1]
+        if dt0 is None:
+            dt[0]  = 0#dt[1]
+        else:
+            if isinstance(dt0,str):
+                dt = dt[1:]
+            else:
+                dt[0] = dt0
+            
         unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
         number_unique_indices = len(unique_indices)
 
@@ -3161,7 +3291,10 @@ class ContDescrStateSpace(DescreteStateSpace):
 
             x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
 
-
+        TODO: this function can be redone to "preprocess dataset", when
+        close time points are handeled properly (with rounding parameter) and
+        values are averaged accordingly.
+        
         Input:
         --------------
         F,L: LTI SDE matrices of corresponding dimensions
@@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace):
         n = F.shape[0]
 
         if not isinstance(dt, collections.Iterable): # not iterable, scalar
-
+            #import pdb; pdb.set_trace()
             # The dynamical model
             A  = matrix_exponent(F*dt)
-            if np.any( np.isnan(A)):
-                A  = linalg.expm3(F*dt)
 
             # The covariance matrix Q by matrix fraction decomposition ->
             Phi = np.zeros((2*n,2*n))
@@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace):
                     # The discrete-time dynamical model*
                     if p==0:
                         A  = AA[:n,:n,p]
-                        Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
-                        Q_noise = Q_noise_2
+                        Q_noise_3  = P_inf - A.dot(P_inf).dot(A.T)
+                        Q_noise = Q_noise_3
                         #PP = A.dot(P).dot(A.T) + Q_noise_2
 
                     # The derivatives of A and Q
                     dA[:,:,p] = AA[n:,:n,p]
-                    dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
-                       - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
-
+                    tmp = dA[:,:,p].dot(P_inf).dot(A.T)
+                    dQ[:,:,p] = dP_inf[:,:,p] - tmp \
+                       - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
+                    
+                    dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
             else:
               dA = None
               dQ = None
@@ -3282,7 +3415,7 @@ class ContDescrStateSpace(DescreteStateSpace):
 	
             #Q_noise = Q_noise_1
 
-            # Return
+            Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
             return A, Q_noise,None, dA, dQ
 
         else: # iterable, array
@@ -3486,4 +3619,4 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
 
     # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
 
-    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
+    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0
diff --git a/GPy/models/state_space_model.py b/GPy/models/state_space_model.py
index 5d22c0fc..d16b5adc 100644
--- a/GPy/models/state_space_model.py
+++ b/GPy/models/state_space_model.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2013, Arno Solin.
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-#
+# 
 # This implementation of converting GPs to state space models is based on the article:
 #
 #  @article{Sarkka+Solin+Hartikainen:2013,
@@ -23,7 +23,16 @@ from . import state_space_main as ssm
 from . import state_space_setup as ss_setup
 
 class StateSpace(Model):
-    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
+    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
         super(StateSpace, self).__init__(name=name)
 
         if len(X.shape) == 1:
@@ -51,15 +60,16 @@ class StateSpace(Model):
         ss_setup.use_cython = use_cython
 
         #import pdb; pdb.set_trace()
-
+        self.balance = balance
+        
         global ssm
         #from . import state_space_main as ssm
         if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
             reload(ssm)
         # Make sure the observations are ordered in time
         sort_index = np.argsort(X[:,0])
-        self.X = X[sort_index]
-        self.Y = Y[sort_index]
+        self.X = X[sort_index,:]
+        self.Y = Y[sort_index,:]
 
         # Noise variance
         self.likelihood = likelihoods.Gaussian(variance=noise_var)
@@ -86,11 +96,12 @@ class StateSpace(Model):
 
         #np.set_printoptions(16)
         #print(self.param_array)
-        #import pdb; pdb.set_trace()
+        
 
         # Get the model matrices from the kernel
         (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
-
+        
+        
         # necessary parameters
         measurement_dim = self.output_dim
         grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@@ -112,8 +123,9 @@ class StateSpace(Model):
         dR[:,:,-1] = np.eye(measurement_dim)
 
         # Balancing
-        #(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
-
+        if self.balance:
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM parameters_changed balancing!")
         # Use the Kalman filter to evaluate the likelihood
         grad_calc_params = {}
         grad_calc_params['dP_inf'] = dP_inf
@@ -125,7 +137,7 @@ class StateSpace(Model):
         kalman_filter_type = self.kalman_filter_type
 
         # The following code is required because sometimes the shapes of self.Y
-        # becomes 3D even though is must be 2D. The reason is undescovered.
+        # becomes 3D even though is must be 2D. The reason is undiscovered.
         Y = self.Y
         if self.ts_number is None:
             Y.shape = (self.num_data,1)
@@ -146,7 +158,7 @@ class StateSpace(Model):
 
         if np.any( np.isfinite(grad_log_likelihood) == False):
             #import pdb; pdb.set_trace()
-            print("State-Space: NaN valkues in the grad_log_likelihood")
+            print("State-Space: NaN values in the grad_log_likelihood")
         #print(grad_log_likelihood)
 
         grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
@@ -159,7 +171,7 @@ class StateSpace(Model):
     def log_likelihood(self):
         return self._log_marginal_likelihood
 
-    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
+    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw):
         """
         Performs the actual prediction for new X points.
         Inner function. It is called only from inside this class.
@@ -177,7 +189,10 @@ class StateSpace(Model):
         filteronly: bool
             Use only Kalman Filter for prediction. In this case the output does
             not coincide with corresponding Gaussian process.
-
+        
+        balance: bool
+            Whether to balance or not the model as a whole
+        
         Output:
         --------------------
 
@@ -210,7 +225,12 @@ class StateSpace(Model):
         # Get the model matrices from the kernel
         (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
         state_dim = F.shape[0]
-
+        
+        # Balancing
+        if (p_balance==True):
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM _raw_predict balancing!")
+            
         #Y = self.Y[:, 0,0]
         # Run the Kalman filter
         #import pdb; pdb.set_trace()
@@ -261,10 +281,23 @@ class StateSpace(Model):
         # Return the posterior of the state
         return (m, V)
 
-    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
-
+    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
+            
         # Run the Kalman filter to get the state
-        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance)
 
         # Add the noise variance to the state variance
         if include_likelihood:
@@ -277,8 +310,22 @@ class StateSpace(Model):
         # Return mean and variance
         return m, V
 
-    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
-        mu, var = self._raw_predict(Xnew)
+    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
+        
+        
+        mu, var = self._raw_predict(Xnew, p_balance=p_balance)
         #import pdb; pdb.set_trace()
         return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
 
diff --git a/GPy/testing/gpy_kernels_state_space_tests.py b/GPy/testing/gpy_kernels_state_space_tests.py
index c06093dd..1e48b168 100644
--- a/GPy/testing/gpy_kernels_state_space_tests.py
+++ b/GPy/testing/gpy_kernels_state_space_tests.py
@@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase):
                            mean_compare_decimal=5, var_compare_decimal=5)
 
     def test_RBF_kernel(self,):
+        #import pdb;pdb.set_trace()
+        
         np.random.seed(234) # seed the random number generator
         (X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
                         plot = False, points_num=50, x_interval = (0, 20), random=True)
         X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
 
-        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,])
+        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10)
         gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,])
 
         self.run_for_model(X, Y, ss_kernel, check_gradients=True,
@@ -267,7 +269,7 @@ class StateSpaceKernelsTests(np.testing.TestCase):
                                gp_kernel=gp_kernel,
                                mean_compare_decimal=2, var_compare_decimal=2)
         except AssertionError:
-            raise SkipTest("Skipping Regular kalman filter for kernel addition, as it seems to be bugged for some python versions")
+            raise SkipTest("Skipping Regular kalman filter for kernel addition, because it is not stable (normal situation) for this data.")
 
 
     def test_kernel_multiplication(self,):
@@ -436,7 +438,7 @@ if __name__ == "__main__":
     print("Running state-space inference tests...")
     unittest.main()
 
-    #tt = StateSpaceKernelsTests('test_periodic_kernel')
+    #tt = StateSpaceKernelsTests('test_RBF_kernel')
     #import pdb; pdb.set_trace()
     #tt.test_Matern32_kernel()
     #tt.test_Matern52_kernel()