ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
2026-05-08 03:22:38 +02:00 · 2015-07-14 16:44:21 +03:00 · 2015-07-14 16:44:21 +03:00 · 82cb626cd6
commit 82cb626cd6
parent 06a7fedd22
10 changed files with 1740 additions and 777 deletions
--- a/GPy/kern/_src/sde_brownian.py
+++ b/GPy/kern/_src/sde_brownian.py
@ -0,0 +1,57 @@
 # -*- coding: utf-8 -*-
 """
 Classes in this module enhance Brownian motion covariance function with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .brownian import Brownian
 import numpy as np
 class sde_Brownian(Brownian):
    """
    Class provide extra functionality to transfer this covariance function into
    SDE form.
    Linear kernel:
    .. math::
       k(x,y) = \sigma^2 min(x,y)
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
        F = np.array( ((0,1.0),(0,0) ))
        L = np.array( ((1.0,),(0,)) )
        Qc = np.array( ((variance,),) )
        H = np.array( ((1.0,0),) )
        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
        #P0 = Pinf.copy() 
        P0 = np.zeros((2,2))   
        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
        dF = np.zeros((2,2,1))
        dQc    = np.ones( (1,1,1) )
        dPinf = np.zeros((2,2,1))
        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
        #dP0 = dPinf.copy() 
        dP0 = np.zeros((2,2,1))
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_linear.py
+++ b/GPy/kern/_src/sde_linear.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Linear covariance function with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .linear import Linear
@ -20,16 +20,45 @@ class sde_Linear(Linear):
       k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
    """
    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
        """
        Modify the init method, because one extra parameter is required. X - points
        on the X axis.
        """
        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
        self.t0 = np.min(X)
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variances.gradient = gradients[0]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        # Arno, insert your code here
+        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
        t0 = float(self.t0)
-        # Params to use:
+        F = np.array( ((0,1.0),(0,0) ))
        L = np.array( ((0,),(1.0,)) )
        Qc = np.zeros((1,1))
        H = np.array( ((1.0,0),) )
-        # self.variances
+        Pinf   = np.zeros((2,2))
        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
        dF = np.zeros((2,2,1))
        dQc    = np.zeros( (1,1,1) )
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        dPinf = np.zeros((2,2,1))
        dP0 = np.zeros((2,2,1))
        dP0[:,:,0]  = P0 / variance
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@ -38,25 +38,24 @@ class sde_Matern32(Matern32):
        lengthscale = float(self.lengthscale.values)
        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
+        F    = np.array(((0, 1), (-foo**2, -2*foo))) 
-        L    = np.array([[0], [1]]) 
+        L    = np.array(( (0,), (1,) ))
-        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) 
+        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
-        H    = np.array([[1, 0]]) 
+        H    = np.array(((1, 0),)) 
-        Pinf = np.array([[variance, 0],  
+        Pinf = np.array(((variance, 0), (0, 3.*variance/(lengthscale**2))))
-        [0,              3.*variance/(lengthscale**2)]]) 
+        P0 = Pinf.copy()
        # Allocate space for the derivatives 
        dF    = np.empty([F.shape[0],F.shape[1],2])
        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
        # The partial derivatives 
-        dFvariance       = np.zeros([2,2]) 
+        dFvariance       = np.zeros((2,2)) 
-        dFlengthscale    = np.array([[0,0], 
+        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
-        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) 
+        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
-        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3]) 
+        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
-        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) 
+        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
-        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]]) 
+        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
        dPinflengthscale = np.array([[0,0], 
        [0,-6*variance/lengthscale**3]]) 
        # Combine the derivatives 
        dF[:,:,0]    = dFvariance 
        dF[:,:,1]    = dFlengthscale 
@ -64,8 +63,9 @@ class sde_Matern32(Matern32):
        dQc[:,:,1]   = dQclengthscale 
        dPinf[:,:,0] = dPinfvariance 
        dPinf[:,:,1] = dPinflengthscale 
        dP0 = dPinf.copy()
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 class sde_Matern52(Matern52):
    """
@ -106,7 +106,7 @@ class sde_Matern52(Matern52):
        H = np.array(((1,0,0),))        
        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        
+        P0 = Pinf.copy()
        # Allocate space for the derivatives         
        dF = np.empty((3,3,2))        
        dQc = np.empty((1,1,2))        
@ -130,75 +130,6 @@ class sde_Matern52(Matern52):
        dQc[:,:,1] = dQclengthscale        
        dPinf[:,:,0] = dPinf_variance
        dPinf[:,:,1] = dPinf_lengthscale
        dP0 = dPinf.copy()
-#        % Derivative of F w.r.t. parameter magnSigma2
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
 #    dFmagnSigma2    =  [0,  0,  0;
 #                        0,  0,  0;
 #                        0,  0,  0];
 #    
 #    % Derivative of F w.r.t parameter lengthScale
 #    dFlengthScale   =  [0,                          0,                  0;
 #                        0,                          0,                  0;
 #                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
 #    
 #    % Derivative of Qc w.r.t. parameter magnSigma2
 #    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
 #    
 #    % Derivative of Qc w.r.t. parameter lengthScale
 #    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
 #    
 #    % Derivative of Pinf w.r.t. parameter magnSigma2    
 #    dPinfmagnSigma2 = Pinf/magnSigma2;
 #    
 #    % Derivative of Pinf w.r.t. parameter lengthScale
 #    kappa2 = -2*kappa/lengthScale;
 #    dPinflengthScale = [0,          0,       -kappa2;
 #                        0,          kappa2,  0;
 #                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
 #  
 #    % Stack all derivatives
 #    dF = zeros(3,3,2);  
 #    dQc = zeros(1,1,2); 
 #    dPinf = zeros(3,3,2);
 #  
 #    dF(:,:,1) = dFmagnSigma2;
 #    dF(:,:,2) = dFlengthScale;
 #    dQc(:,:,1) = dQcmagnSigma2;
 #    dQc(:,:,2) = dQclengthScale;
 #    dPinf(:,:,1) = dPinfmagnSigma2;
 #    dPinf(:,:,2) = dPinflengthScale; 
 #        % Derived constants
 #          lambda = sqrt(5)/lengthScale;
 #        
 #          % Feedback matrix
 #          F = [ 0,          1,          0;
 #                0,          0,          1;
 #               -lambda^3, -3*lambda^2, -3*lambda];
 #        
 #          % Noise effect matrix
 #          L = [0; 0; 1];
 #        
 #          % Spectral density
 #          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
 #        
 #          % Observation model
 #          H = [1, 0, 0];
 #        %% Stationary covariance
 #          
 #          % Calculate Pinf only if requested
 #          if nargout > 4,
 #              
 #            % Derived constant
 #            kappa = 5/3*magnSigma2/lengthScale^2;
 #            
 #            % Stationary covariance
 #            Pinf = [magnSigma2, 0,      -kappa;
 #                    0,          kappa,  0;
 #                    -kappa,     0,      25*magnSigma2/lengthScale^4];
 #                
 #          end
        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@ -75,7 +75,7 @@ class sde_StdPeriodic(StdPeriodic):
        Qc   = np.zeros((2*(N+1), 2*(N+1)))
        P_inf = np.kron(np.diag(q2),np.eye(2))
        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        
+        P0 = P_inf.copy()
        # Derivatives
        dF = np.empty((F.shape[0], F.shape[1], 3))
@ -96,9 +96,9 @@ class sde_StdPeriodic(StdPeriodic):
        dF[:,:,2] = np.zeros(F.shape)
        dQc[:,:,2] = np.zeros(Qc.shape)
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
        dP0 = dP_inf.copy()
-
+        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
--- a/GPy/kern/_src/sde_static.py
+++ b/GPy/kern/_src/sde_static.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Static covariance functions with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .static import White
@ -14,33 +14,7 @@ class sde_White(White):
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
-    Linear kernel:
+    White kernel:
    .. math::
       k(x,y) = \alpha
    """
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        # Arno, insert your code here
        # Params to use:
        # self.variance
        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
 class sde_Bias(Bias):
    """
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
    Linear kernel:
    .. math::
@ -48,14 +22,80 @@ class sde_Bias(Bias):
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        # Arno, insert your code here
+        variance = float(self.variance.values) 
-        # Params to use:
+        F = np.array( ((-np.inf,),) )
-        # self.variance
+        L = np.array( ((1.0,),)  )
        Qc = np.array( ((variance,),)  )
        H = np.array( ((1.0,),) )
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        Pinf   = np.array( ((variance,),)  )
        P0 = Pinf.copy()     
        dF = np.zeros((1,1,1))
        dQc = np.zeros((1,1,1))
        dQc[:,:,0]    = np.array( ((1.0,),) )
        dPinf = np.zeros((1,1,1))
        dPinf[:,:,0] = np.array( ((1.0,),) )
        dP0 = dPinf.copy()
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 class sde_Bias(Bias):
    """
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
    Bias kernel:
    .. math::
       k(x,y) = \alpha
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        variance = float(self.variance.values) 
        F = np.array( ((0.0,),))
        L = np.array( ((1.0,),))
        Qc = np.zeros((1,1))
        H = np.array( ((1.0,),))
        Pinf   = np.zeros((1,1))
        P0 = np.array( ((variance,),) )      
        dF = np.zeros((1,1,1))
        dQc    = np.zeros((1,1,1))
        dPinf = np.zeros((1,1,1))
        dP0 = np.zeros((1,1,1))
        dP0[:,:,0] = np.array( ((1.0,),) )
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_stationary.py
+++ b/GPy/kern/_src/sde_stationary.py
@ -8,6 +8,7 @@ from .stationary import Exponential
 from .stationary import RatQuad
 import numpy as np
 import scipy as sp
 class sde_RBF(RBF):
    """
@ -22,20 +23,87 @@ class sde_RBF(RBF):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        # Arno, insert your code here
+        N = 10# approximation order ( number of terms in exponent series expansion)
        roots_rounding_decimals = 6
-        # Params to use:
+        fn = np.math.factorial(N)        
-        # self.lengthscale
+        kappa = 1.0/2.0/self.lengthscale**2
        # self.variance
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
        pp = sp.poly1d(pp)
        roots = sp.roots(pp)        
        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
        F = np.diag(np.ones((N-1,)),1)
        F[-1,:] = -aa[-1:0:-1]
        L= np.zeros((N,1))
        L[N-1,0] = 1
        H = np.zeros((1,N))
        H[0,0] = 1
        # Infinite covariance:
        Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
        # Allocating space for derivatives        
        dF    = np.empty([F.shape[0],F.shape[1],2])
        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
        # Derivatives:
        dFvariance = np.zeros(F.shape)
        dFlengthscale = np.zeros(F.shape)
        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
        dQcvariance = Qc/self.variance
        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
        dPinf_variance = Pinf/self.variance
        lp = Pinf.shape[0]
        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
        coeff[np.mod(coeff,2) != 0] = 0
        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
        dF[:,:,0]    = dFvariance 
        dF[:,:,1]    = dFlengthscale 
        dQc[:,:,0]   = dQcvariance 
        dQc[:,:,1]   = dQclengthscale 
        dPinf[:,:,0] = dPinf_variance 
        dPinf[:,:,1] = dPinf_lengthscale
        # Benefits of this are unjustified
        #import GPy.models.state_space_main as ssm
        #(F, L, Qc, H, Pinf, dF, dQc, dPinf,T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, dF, dQc, dPinf)
        P0 = Pinf.copy()
        dP0 = dPinf.copy()
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 class sde_Exponential(Exponential):
    """
@ -51,27 +119,45 @@ class sde_Exponential(Exponential):
    """
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        F  = np.array([[-1/self.lengthscale]]) 
+        variance = float(self.variance.values)
-        L  = np.array([[1]]) 
+        lengthscale = float(self.lengthscale)        
        Qc = np.array([[2*self.variance/self.lengthscale]]) 
        H = np.array([[1]]) 
        Pinf = np.array([[self.variance]]) 
        # TODO: return the derivatives as well 
-        return (F, L, Qc, H, Pinf)
+        F  = np.array(((-1.0/lengthscale,),))
        L  = np.array(((1.0,),)) 
        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
        H = np.array(((1,),)) 
        Pinf = np.array(((variance,),)) 
        P0 = Pinf.copy()        
-        # Arno, insert your code here
+        dF = np.zeros((1,1,2));  
        dQc = np.zeros((1,1,2)); 
        dPinf = np.zeros((1,1,2));
-        # Params to use:
+        dF[:,:,0] = 0.0        
        dF[:,:,1] = 1.0/lengthscale**2
-        # self.lengthscale
+        dQc[:,:,0] = 2.0/lengthscale       
-        # self.variance
+        dQc[:,:,1] = -2.0*variance/lengthscale**2
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) 
+        dPinf[:,:,0] = 1.0
        dPinf[:,:,1] = 0.0
        dP0 = dPinf.copy()        
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
 class sde_RatQuad(RatQuad):
    """
@ -92,7 +178,7 @@ class sde_RatQuad(RatQuad):
        Return the state space representation of the covariance. 
        """ 
-        # Arno, insert your code here
+        assert False, 'Not Implemented'
        # Params to use:
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -290,22 +290,25 @@ class Add(CombinationKernel):
        Qc    = None
        H     = None
        Pinf  = None
        P0    = None
        dF    = None
        dQc   = None
        dPinf = None
        dP0   = None
        n = 0
        nq = 0
        nd = 0
         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
+            (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
            F = la.block_diag(F,Ft) if (F is not None) else Ft
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht
            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
@ -328,6 +331,13 @@ class Add(CombinationKernel):
            else:
                dPinf = dPinft
            if dP0 is not None:
                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
                        'constant', constant_values=0)
                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
            else:
                dP0 = dP0t
            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]
@ -337,8 +347,10 @@ class Add(CombinationKernel):
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"        
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -126,13 +126,15 @@ class Prod(CombinationKernel):
        Qc     = np.array((1,), ndmin=2)
        H      = np.array((1,), ndmin=2)
        Pinf   = np.array((1,), ndmin=2)
        P0   = np.array((1,), ndmin=2)
        dF     = None
        dQc    = None
        dPinf  = None
        dP0  = None
         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
+            (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
            # check derivative dimensions ->
            number_of_parameters = len(p.param_array)            
@ -149,14 +151,16 @@ class Prod(CombinationKernel):
            dF    = dkron(F,dF,Ft,dFt,'sum')
            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
            dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
            L    = np.kron(L,Lt)
            Qc   = np.kron(Qc,Qct)
            Pinf = np.kron(Pinf,P_inft)
            P0 = np.kron(P0,P_inft)
            H    = np.kron(H,Ht)
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
 def dkron(A,dA,B,dB, operation='prod'):
    """
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@ -16,6 +16,7 @@
 import numpy as np
 from scipy import linalg
 from scipy import stats
 from ..core import Model
 from .. import kern
 from GPy.plotting.matplot_dep.models_plots import gpplot
@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param
 import GPy
 from .. import likelihoods
-import GPy.models.state_space_main as ssm
+
-#import state_space_main as ssm
+from . import state_space_main as ssm
 reload(ssm)
 print ssm.__file__
 class StateSpace(Model):
    def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
        super(StateSpace, self).__init__(name=name)
        self.num_data, input_dim = X.shape
        assert input_dim==1, "State space methods for time only"
        if len(Y.shape) ==2: # TODO make this nice
            num_data_Y, self.output_dim = Y.shape
        elif len(Y.shape) ==3:
            num_data_Y, self.output_dim, ts_number = Y.shape
        assert num_data_Y == self.num_data, "X and Y data don't match"
        assert self.output_dim == 1, "State space methods for single outputs only"
@ -68,7 +70,7 @@ class StateSpace(Model):
        """
        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
        # necessary parameters
        measurement_dim = self.output_dim
@ -78,17 +80,19 @@ class StateSpace(Model):
        dF    = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
        dQc   = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
        dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
        dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
        # Assign the values for the kernel function
        dF[:,:,:-1] = dFt
        dQc[:,:,:-1] = dQct
        dP_inf[:,:,:-1] = dP_inft
        dP0[:,:,:-1] = dP0t
        # The sigma2 derivative
        dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
        dR[:,:,-1] = np.eye(measurement_dim)
-
+        #(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
        # Use the Kalman filter to evaluate the likelihood
        grad_calc_params = {}
@ -96,26 +100,53 @@ class StateSpace(Model):
        grad_calc_params['dF'] = dF
        grad_calc_params['dQc'] = dQc
        grad_calc_params['dR'] = dR
        grad_calc_params['dP_init'] = dP0
        (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
-                                      P_init=None, calc_log_likelihood=True, 
+                                      float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
                                      P_init=P0, calc_log_likelihood=True, 
                                      calc_grad_log_likelihood=True, 
                                      grad_params_no=grad_params_no, 
                                      grad_calc_params=grad_calc_params)
-        self._log_marginal_likelihood = log_likelihood
+        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
-        #gradients  = self.compute_gradients()
+        grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
-        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
+        self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
        self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
-        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
+        self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
    def log_likelihood(self):
        return self._log_marginal_likelihood
-    def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
+    def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
        """
-        Inner function. It is called only from inside this class
+        Performs the actual prediction for new X points.
        Inner function. It is called only from inside this class.
        Input:
        ---------------------
        Xnews: vector or (n_points,1) matrix
            New time points where to evaluate predictions.
        Ynews: (n_train_points, ts_no) matrix
            This matrix can substitude the original training points (in order 
            to use only the parameters of the model).
        filteronly: bool
            Use only Kalman Filter for prediction. In this case the output does
            not coincide with corresponding Gaussian process.
        Output:
        --------------------
        m: vector
            Mean prediction
        V: vector
            Variance in every point
        """
        # Set defaults
@ -128,41 +159,44 @@ class StateSpace(Model):
        # Sort the matrix (save the order)
        _, return_index, return_inverse = np.unique(X,True,True)
-        X = X[return_index]
+        X = X[return_index] # TODO they are not used
        Y = Y[return_index]
        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
        state_dim = F.shape[0]        
        #import pdb; pdb.set_trace()
        #Y = self.Y[:, 0,0]
        # Run the Kalman filter
-        (M, P, tmp_log_likelihood, 
+        #import pdb; pdb.set_trace()
-         tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
+        (M, P, log_likelihood, 
-                                      P_init=None, calc_log_likelihood=False, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
                                      F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
                                      P_init=P0, calc_log_likelihood=False, 
                                      calc_grad_log_likelihood=False)                              
        # Run the Rauch-Tung-Striebel smoother
        if not filteronly:
            (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, 
                                AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
        # remove initial values        
-        M = M[:,1:]
+        M = M[1:,:]
-        P = P[:,:,1:]        
+        P = P[1:,:,:]        
        # Put the data back in the original order
-        M = M[:,return_inverse]
+        M = M[return_inverse,:]
-        P = P[:,:,return_inverse]
+        P = P[return_inverse,:,:]
        # Only return the values for Xnew
-        M = M[:,self.num_data:]
+        M = M[self.num_data:,:]
-        P = P[:,:,self.num_data:]
+        P = P[self.num_data:,:,:]
        # Calculate the mean and variance
-        m = H.dot(M).T
+        m = np.dot(M,H.T)
-        V = np.tensordot(H[0],P,(0,0))
+        V = np.einsum('ij,ajk,kl', H, P, H.T)
-        V = np.tensordot(V,H[0],(0,0))
+        
-        V = V[:,None]
+        V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
        # Return the posterior of the state
        return (m, V)
@ -170,10 +204,10 @@ class StateSpace(Model):
    def predict(self, Xnew, filteronly=False):
        # Run the Kalman filter to get the state
-        (m, V) = self._predict_raw(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
        # Add the noise variance to the state variance
-        V += self.sigma2
+        V += float(self.Gaussian_noise.variance)
        # Lower and upper bounds
        lower = m - 2*np.sqrt(V)
@ -182,142 +216,148 @@ class StateSpace(Model):
        # Return mean and variance
        return (m, V, lower, upper)
-    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
+    def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
-            ax=None, resolution=None, plot_raw=False, plot_filter=False,
+        mu, var = self._raw_predict(Xnew)
-            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
+        #import pdb; pdb.set_trace()
        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
        # Deal with optional parameters
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
-        # Define the frame on which to plot
+#    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
-        resolution = resolution or 200
+#            ax=None, resolution=None, plot_raw=False, plot_filter=False,
-        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+#            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
-
+#
-        # Make a prediction on the frame and plot it
+#        # Deal with optional parameters
-        if plot_raw:
+#        if ax is None:
-            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
+#            fig = pb.figure(num=fignum)
-            lower = m - 2*np.sqrt(v)
+#            ax = fig.add_subplot(111)
-            upper = m + 2*np.sqrt(v)
+#
-            Y = self.Y
+#        # Define the frame on which to plot
-        else:
+#        resolution = resolution or 200
-            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
+#        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-            Y = self.Y
+#
-
+#        # Make a prediction on the frame and plot it
-        # Plot the values
+#        if plot_raw:
-        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
+#            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
-        ax.plot(self.X, self.Y, 'kx', mew=1.5)
+#            lower = m - 2*np.sqrt(v)
-
+#            upper = m + 2*np.sqrt(v)
-        # Optionally plot some samples
+#            Y = self.Y
-        if samples:
+#        else:
-            if plot_raw:
+#            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
-                Ysim = self.posterior_samples_f(Xgrid, samples)
+#            Y = self.Y
-            else:
+#
-                Ysim = self.posterior_samples(Xgrid, samples)
+#        # Plot the values
-            for yi in Ysim.T:
+#        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
-                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
+#        ax.plot(self.X, self.Y, 'kx', mew=1.5)
-
+#
-        # Set the limits of the plot to some sensible values
+#        # Optionally plot some samples
-        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
+#        if samples:
-        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+#            if plot_raw:
-        ax.set_xlim(xmin, xmax)
+#                Ysim = self.posterior_samples_f(Xgrid, samples)
-        ax.set_ylim(ymin, ymax)
+#            else:
-
+#                Ysim = self.posterior_samples(Xgrid, samples)
-    def prior_samples_f(self,X,size=10):
+#            for yi in Ysim.T:
-
+#                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
-        # Sort the matrix (save the order)
+#
-        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        # Set the limits of the plot to some sensible values
-        X = X[return_index]
+#        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
-
+#        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-        # Get the model matrices from the kernel
+#        ax.set_xlim(xmin, xmax)
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#        ax.set_ylim(ymin, ymax)
-
+#
-        # Allocate space for results
+#    def prior_samples_f(self,X,size=10):
-        Y = np.empty((size,X.shape[0]))
+#
-
+#        # Sort the matrix (save the order)
-        # Simulate random draws
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
 #        X = X[return_index]
 #
 #        # Get the model matrices from the kernel
 #        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
 #
 #        # Allocate space for results
 #        Y = np.empty((size,X.shape[0]))
 #
 #        # Simulate random draws
 #        #for j in range(0,size):
 #        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
 #        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
 #
 #        # Only observations
 #        Y = np.tensordot(H[0],Y,(0,0))
 #
 #        # Reorder simulated values
 #        Y = Y[:,return_inverse]
 #
 #        # Return trajectory
 #        return Y.T
 #
 #    def posterior_samples_f(self,X,size=10):
 #
 #        # Sort the matrix (save the order)
 #        (_, return_index, return_inverse) = np.unique(X,True,True)
 #        X = X[return_index]
 #
 #        # Get the model matrices from the kernel
 #        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
 #
 #        # Run smoother on original data
 #        (m,V) = self.predict_raw(X)
 #
 #        # Simulate random draws from the GP prior
 #        y = self.prior_samples_f(np.vstack((self.X, X)),size)
 #
 #        # Allocate space for sample trajectories
 #        Y = np.empty((size,X.shape[0]))
 #
 #        # Run the RTS smoother on each of these values
 #        for j in range(0,size):
-        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
+#            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
-        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
+#            (m2,V2) = self.predict_raw(X,Ynew=yobs)
-
+#            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
-        # Only observations
+#
-        Y = np.tensordot(H[0],Y,(0,0))
+#        # Reorder simulated values
-
+#        Y = Y[:,return_inverse]
-        # Reorder simulated values
+#
-        Y = Y[:,return_inverse]
+#        # Return posterior sample trajectories
-
+#        return Y.T
-        # Return trajectory
+#
-        return Y.T
+#    def posterior_samples(self, X, size=10):
-
+#
-    def posterior_samples_f(self,X,size=10):
+#        # Make samples of f
-
+#        Y = self.posterior_samples_f(X,size)
-        # Sort the matrix (save the order)
+#
-        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        # Add noise
-        X = X[return_index]
+#        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
-
+#
-        # Get the model matrices from the kernel
+#        # Return trajectory
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#        return Y
-
+#        
-        # Run smoother on original data
+#        
-        (m,V) = self.predict_raw(X)
+#    def simulate(self,F,L,Qc,Pinf,X,size=1):
-
+#        # Simulate a trajectory using the state space model
-        # Simulate random draws from the GP prior
+#
-        y = self.prior_samples_f(np.vstack((self.X, X)),size)
+#        # Allocate space for results
-
+#        f = np.zeros((F.shape[0],size,X.shape[1]))
-        # Allocate space for sample trajectories
+#
-        Y = np.empty((size,X.shape[0]))
+#        # Initial state
-
+#        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
-        # Run the RTS smoother on each of these values
+#
-        for j in range(0,size):
+#        # Time step lengths
-            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
+#        dt = np.empty(X.shape)
-            (m2,V2) = self.predict_raw(X,Ynew=yobs)
+#        dt[:,0] = X[:,1]-X[:,0]
-            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
+#        dt[:,1:] = np.diff(X)
-
+#
-        # Reorder simulated values
+#        # Solve the LTI SDE for these time steps
-        Y = Y[:,return_inverse]
+#        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
-
+#
-        # Return posterior sample trajectories
+#        # Sweep through remaining time points
-        return Y.T
+#        for k in range(1,X.shape[1]):
-
+#
-    def posterior_samples(self, X, size=10):
+#            # Form discrete-time model
-
+#            A = As[:,:,index[1-k]]
-        # Make samples of f
+#            Q = Qs[:,:,index[1-k]]
-        Y = self.posterior_samples_f(X,size)
+#
-
+#            # Draw the state
-        # Add noise
+#            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
-        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
+#
-
+#        # Return values
-        # Return trajectory
+#        return f
        return Y
    def simulate(self,F,L,Qc,Pinf,X,size=1):
        # Simulate a trajectory using the state space model
        # Allocate space for results
        f = np.zeros((F.shape[0],size,X.shape[1]))
        # Initial state
        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
        # Time step lengths
        dt = np.empty(X.shape)
        dt[:,0] = X[:,1]-X[:,0]
        dt[:,1:] = np.diff(X)
        # Solve the LTI SDE for these time steps
        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
        # Sweep through remaining time points
        for k in range(1,X.shape[1]):
            # Form discrete-time model
            A = As[:,:,index[1-k]]
            Q = Qs[:,:,index[1-k]]
            # Draw the state
            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
        # Return values
        return f