ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
2026-05-07 11:02:38 +02:00 · 2015-07-14 16:44:21 +03:00 · 2015-07-14 16:44:21 +03:00 · 82cb626cd6
commit 82cb626cd6
parent 06a7fedd22
10 changed files with 1740 additions and 777 deletions
--- a/GPy/kern/_src/sde_brownian.py
+++ b/GPy/kern/_src/sde_brownian.py
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Classes in this module enhance Brownian motion covariance function with the
+Stochastic Differential Equation (SDE) functionality.
+"""
+
+from .brownian import Brownian
+
+import numpy as np
+
+class sde_Brownian(Brownian):
+    """
+    
+    Class provide extra functionality to transfer this covariance function into
+    SDE form.
+    
+    Linear kernel:
+
+    .. math::
+
+       k(x,y) = \sigma^2 min(x,y)
+
+    """
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
+    def sde(self): 
+        """ 
+        Return the state space representation of the covariance. 
+        """ 
+        
+        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
+        
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((1.0,),(0,)) )
+        Qc = np.array( ((variance,),) )
+        H = np.array( ((1.0,0),) )
+        
+        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
+        #P0 = Pinf.copy() 
+        P0 = np.zeros((2,2))   
+        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
+        dF = np.zeros((2,2,1))
+        dQc    = np.ones( (1,1,1) )
+        
+        dPinf = np.zeros((2,2,1))
+        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
+        #dP0 = dPinf.copy() 
+        dP0 = np.zeros((2,2,1))
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_linear.py
+++ b/GPy/kern/_src/sde_linear.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Linear covariance function with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .linear import Linear
@ -20,16 +20,45 @@ class sde_Linear(Linear):
       k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i

    """
-
+    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
+        """
+        Modify the init method, because one extra parameter is required. X - points
+        on the X axis.
+        """
+        
+        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
+        
+        self.t0 = np.min(X)
+        
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variances.gradient = gradients[0]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
-
-        # Params to use:
-
-        # self.variances
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
+        t0 = float(self.t0)
+        
+        F = np.array( ((0,1.0),(0,0) ))
+        L = np.array( ((0,),(1.0,)) )
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,0),) )
+        
+        Pinf   = np.zeros((2,2))
+        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
+        dF = np.zeros((2,2,1))
+        dQc    = np.zeros( (1,1,1) )
+        
+        dPinf = np.zeros((2,2,1))
+        dP0 = np.zeros((2,2,1))
+        dP0[:,:,0]  = P0 / variance
+  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@ -38,25 +38,24 @@ class sde_Matern32(Matern32):
        lengthscale = float(self.lengthscale.values)
        
        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array([[0, 1], [-foo**2, -2*foo]]) 
-        L    = np.array([[0], [1]]) 
-        Qc   = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]]) 
-        H    = np.array([[1, 0]]) 
-        Pinf = np.array([[variance, 0],  
-        [0,              3.*variance/(lengthscale**2)]]) 
+        F    = np.array(((0, 1), (-foo**2, -2*foo))) 
+        L    = np.array(( (0,), (1,) ))
+        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
+        H    = np.array(((1, 0),)) 
+        Pinf = np.array(((variance, 0), (0, 3.*variance/(lengthscale**2))))
+        P0 = Pinf.copy()
+        
        # Allocate space for the derivatives 
        dF    = np.empty([F.shape[0],F.shape[1],2])
        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
        # The partial derivatives 
-        dFvariance       = np.zeros([2,2]) 
-        dFlengthscale    = np.array([[0,0], 
-        [6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]]) 
-        dQcvariance      = np.array([12.*np.sqrt(3)/lengthscale**3]) 
-        dQclengthscale   = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance]) 
-        dPinfvariance    = np.array([[1,0],[0,3./lengthscale**2]]) 
-        dPinflengthscale = np.array([[0,0], 
-        [0,-6*variance/lengthscale**3]]) 
+        dFvariance       = np.zeros((2,2)) 
+        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
+        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
+        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
+        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
+        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
        # Combine the derivatives 
        dF[:,:,0]    = dFvariance 
        dF[:,:,1]    = dFlengthscale 
@ -64,8 +63,9 @@ class sde_Matern32(Matern32):
        dQc[:,:,1]   = dQclengthscale 
        dPinf[:,:,0] = dPinfvariance 
        dPinf[:,:,1] = dPinflengthscale 
-
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

 class sde_Matern52(Matern52):
    """
@ -106,7 +106,7 @@ class sde_Matern52(Matern52):
        H = np.array(((1,0,0),))        
        
        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        
+        P0 = Pinf.copy()
        # Allocate space for the derivatives         
        dF = np.empty((3,3,2))        
        dQc = np.empty((1,1,2))        
@ -130,75 +130,6 @@ class sde_Matern52(Matern52):
        dQc[:,:,1] = dQclengthscale        
        dPinf[:,:,0] = dPinf_variance
        dPinf[:,:,1] = dPinf_lengthscale
+        dP0 = dPinf.copy()
        
-#        % Derivative of F w.r.t. parameter magnSigma2
-#    dFmagnSigma2    =  [0,  0,  0;
-#                        0,  0,  0;
-#                        0,  0,  0];
-#    
-#    % Derivative of F w.r.t parameter lengthScale
-#    dFlengthScale   =  [0,                          0,                  0;
-#                        0,                          0,                  0;
-#                        15*sqrt(5)/lengthScale^4,    30/lengthScale^3,   3*sqrt(5)/lengthScale^2];
-#    
-#    % Derivative of Qc w.r.t. parameter magnSigma2
-#    dQcmagnSigma2   =   400*sqrt(5)/3/lengthScale^5;
-#    
-#    % Derivative of Qc w.r.t. parameter lengthScale
-#    dQclengthScale  =   -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
-#    
-#    % Derivative of Pinf w.r.t. parameter magnSigma2    
-#    dPinfmagnSigma2 = Pinf/magnSigma2;
-#    
-#    % Derivative of Pinf w.r.t. parameter lengthScale
-#    kappa2 = -2*kappa/lengthScale;
-#    dPinflengthScale = [0,          0,       -kappa2;
-#                        0,          kappa2,  0;
-#                        -kappa2,    0,       -100*magnSigma2/lengthScale^5];
-#  
-#    % Stack all derivatives
-#    dF = zeros(3,3,2);  
-#    dQc = zeros(1,1,2); 
-#    dPinf = zeros(3,3,2);
-#  
-#    dF(:,:,1) = dFmagnSigma2;
-#    dF(:,:,2) = dFlengthScale;
-#    dQc(:,:,1) = dQcmagnSigma2;
-#    dQc(:,:,2) = dQclengthScale;
-#    dPinf(:,:,1) = dPinfmagnSigma2;
-#    dPinf(:,:,2) = dPinflengthScale; 
-  
-#        % Derived constants
-#          lambda = sqrt(5)/lengthScale;
-#        
-#          % Feedback matrix
-#          F = [ 0,          1,          0;
-#                0,          0,          1;
-#               -lambda^3, -3*lambda^2, -3*lambda];
-#        
-#          % Noise effect matrix
-#          L = [0; 0; 1];
-#        
-#          % Spectral density
-#          Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
-#        
-#          % Observation model
-#          H = [1, 0, 0];
-  
-  
-#        %% Stationary covariance
-#          
-#          % Calculate Pinf only if requested
-#          if nargout > 4,
-#              
-#            % Derived constant
-#            kappa = 5/3*magnSigma2/lengthScale^2;
-#            
-#            % Stationary covariance
-#            Pinf = [magnSigma2, 0,      -kappa;
-#                    0,          kappa,  0;
-#                    -kappa,     0,      25*magnSigma2/lengthScale^4];
-#                
-#          end
-        
-        return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@ -75,7 +75,7 @@ class sde_StdPeriodic(StdPeriodic):
        Qc   = np.zeros((2*(N+1), 2*(N+1)))
        P_inf = np.kron(np.diag(q2),np.eye(2))
        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        
+        P0 = P_inf.copy()
        
        # Derivatives
        dF = np.empty((F.shape[0], F.shape[1], 3))
@ -96,9 +96,9 @@ class sde_StdPeriodic(StdPeriodic):
        dF[:,:,2] = np.zeros(F.shape)
        dQc[:,:,2] = np.zeros(Qc.shape)
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
-        
+        dP0 = dP_inf.copy()

-        return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
+        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
        
        
        
--- a/GPy/kern/_src/sde_static.py
+++ b/GPy/kern/_src/sde_static.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Classes in this module enhance Matern covariance functions with the
+Classes in this module enhance Static covariance functions with the
 Stochastic Differential Equation (SDE) functionality.
 """
 from .static import White
@ -14,25 +14,47 @@ class sde_White(White):
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
    
-    Linear kernel:
+    White kernel:

    .. math::

-       k(x,y) = \alpha
+       k(x,y) = \alpha*\delta(x-y)

    """
-
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        variance = float(self.variance.values) 
+        
+        F = np.array( ((-np.inf,),) )
+        L = np.array( ((1.0,),)  )
+        Qc = np.array( ((variance,),)  )
+        H = np.array( ((1.0,),) )
+        
+        Pinf   = np.array( ((variance,),)  )
+        P0 = Pinf.copy()     
+        
+        dF = np.zeros((1,1,1))
+        dQc = np.zeros((1,1,1))
+        dQc[:,:,0]    = np.array( ((1.0,),) )
+        
+        dPinf = np.zeros((1,1,1))
+        dPinf[:,:,0] = np.array( ((1.0,),) )
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

-        # Params to use:
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

 class sde_Bias(Bias):
    """
@ -40,22 +62,40 @@ class sde_Bias(Bias):
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
    
-    Linear kernel:
+    Bias kernel:

    .. math::

-       k(x,y) = \alpha*\delta(x-y)
+       k(x,y) = \alpha

    """
-
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
+        variance = float(self.variance.values) 
        
-        # Arno, insert your code here
+        F = np.array( ((0.0,),))
+        L = np.array( ((1.0,),))
+        Qc = np.zeros((1,1))
+        H = np.array( ((1.0,),))
        
-        # Params to use:
-        # self.variance
+        Pinf   = np.zeros((1,1))
+        P0 = np.array( ((variance,),) )      
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        dF = np.zeros((1,1,1))
+        dQc    = np.zeros((1,1,1))
+        
+        dPinf = np.zeros((1,1,1))
+        dP0 = np.zeros((1,1,1))
+        dP0[:,:,0] = np.array( ((1.0,),) )
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_stationary.py
+++ b/GPy/kern/_src/sde_stationary.py
@ -8,6 +8,7 @@ from .stationary import Exponential
 from .stationary import RatQuad

 import numpy as np
+import scipy as sp

 class sde_RBF(RBF):
    """
@ -22,20 +23,87 @@ class sde_RBF(RBF):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }

    """
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]

    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
-
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
+        N = 10# approximation order ( number of terms in exponent series expansion)
+        roots_rounding_decimals = 6
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        fn = np.math.factorial(N)        
+        
+        kappa = 1.0/2.0/self.lengthscale**2
+       
+        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+       
+        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        
+        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
+            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
+        
+        pp = sp.poly1d(pp)
+        roots = sp.roots(pp)        
+        
+        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
+        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
+        
+        F = np.diag(np.ones((N-1,)),1)
+        F[-1,:] = -aa[-1:0:-1]
+        
+        L= np.zeros((N,1))
+        L[N-1,0] = 1
+        
+        H = np.zeros((1,N))
+        H[0,0] = 1
+        
+        # Infinite covariance:
+        Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
+        
+        # Allocating space for derivatives        
+        dF    = np.empty([F.shape[0],F.shape[1],2])
+        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
+        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
+        
+        # Derivatives:
+        dFvariance = np.zeros(F.shape)
+        dFlengthscale = np.zeros(F.shape)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+
+        dQcvariance = Qc/self.variance
+        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
+        
+        dPinf_variance = Pinf/self.variance
+        
+        lp = Pinf.shape[0]
+        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
+        coeff[np.mod(coeff,2) != 0] = 0
+        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        
+        dF[:,:,0]    = dFvariance 
+        dF[:,:,1]    = dFlengthscale 
+        dQc[:,:,0]   = dQcvariance 
+        dQc[:,:,1]   = dQclengthscale 
+        dPinf[:,:,0] = dPinf_variance 
+        dPinf[:,:,1] = dPinf_lengthscale
+        
+        # Benefits of this are unjustified
+        #import GPy.models.state_space_main as ssm
+        #(F, L, Qc, H, Pinf, dF, dQc, dPinf,T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, dF, dQc, dPinf)
+        
+        P0 = Pinf.copy()
+        dP0 = dPinf.copy()
+        
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

 class sde_Exponential(Exponential):
    """
@ -50,29 +118,47 @@ class sde_Exponential(Exponential):
       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }

    """
-
+    
+    def sde_update_gradient_full(self, gradients):
+        """
+        Update gradient in the order in which parameters are represented in the
+        kernel
+        """
+    
+        self.variance.gradient = gradients[0]
+        self.lengthscale.gradient = gradients[1]
+        
    def sde(self): 
        """ 
        Return the state space representation of the covariance. 
        """ 
-        F  = np.array([[-1/self.lengthscale]]) 
-        L  = np.array([[1]]) 
-        Qc = np.array([[2*self.variance/self.lengthscale]]) 
-        H = np.array([[1]]) 
-        Pinf = np.array([[self.variance]]) 
-        # TODO: return the derivatives as well 
+        variance = float(self.variance.values)
+        lengthscale = float(self.lengthscale)        
        
-        return (F, L, Qc, H, Pinf)
+        F  = np.array(((-1.0/lengthscale,),))
+        L  = np.array(((1.0,),)) 
+        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
+        H = np.array(((1,),)) 
+        Pinf = np.array(((variance,),)) 
+        P0 = Pinf.copy()        
        
-        # Arno, insert your code here
+        dF = np.zeros((1,1,2));  
+        dQc = np.zeros((1,1,2)); 
+        dPinf = np.zeros((1,1,2));
        
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
-
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) 
+        dF[:,:,0] = 0.0        
+        dF[:,:,1] = 1.0/lengthscale**2
+        
+        dQc[:,:,0] = 2.0/lengthscale       
+        dQc[:,:,1] = -2.0*variance/lengthscale**2
+        
+        dPinf[:,:,0] = 1.0
+        dPinf[:,:,1] = 0.0
+        
+        dP0 = dPinf.copy()        

+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
+        
 class sde_RatQuad(RatQuad):
    """
    
@ -92,7 +178,7 @@ class sde_RatQuad(RatQuad):
        Return the state space representation of the covariance. 
        """ 
        
-        # Arno, insert your code here
+        assert False, 'Not Implemented'
        
        # Params to use:

@ -100,4 +186,4 @@ class sde_RatQuad(RatQuad):
        # self.variance
        #self.power
        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
+        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -290,23 +290,26 @@ class Add(CombinationKernel):
        Qc    = None
        H     = None
        Pinf  = None
+        P0    = None
        dF    = None
        dQc   = None
-        dPinf = None    
+        dPinf = None
+        dP0   = None
        n = 0
        nq = 0
        nd = 0

         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
+            (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
            F = la.block_diag(F,Ft) if (F is not None) else Ft
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht
             
            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
-             
+            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
+            
            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
                        'constant', constant_values=0)
@ -327,7 +330,14 @@ class Add(CombinationKernel):
                dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
            else:
                dPinf = dPinft
-             
+                
+            if dP0 is not None:
+                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
+                        'constant', constant_values=0)
+                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
+            else:
+                dP0 = dP0t
+                
            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]
@ -336,9 +346,11 @@ class Add(CombinationKernel):
        assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
-        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"        
+        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
+        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"        
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
+        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
        
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -126,13 +126,15 @@ class Prod(CombinationKernel):
        Qc     = np.array((1,), ndmin=2)
        H      = np.array((1,), ndmin=2)
        Pinf   = np.array((1,), ndmin=2)
+        P0   = np.array((1,), ndmin=2)
        dF     = None
        dQc    = None
        dPinf  = None
-          
+        dP0  = None
+        
         # Assign models
        for p in self.parts:
-            (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
+            (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
            
            # check derivative dimensions ->
            number_of_parameters = len(p.param_array)            
@ -149,14 +151,16 @@ class Prod(CombinationKernel):
            dF    = dkron(F,dF,Ft,dFt,'sum')
            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
-             
+            dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
+            
            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
            L    = np.kron(L,Lt)
            Qc   = np.kron(Qc,Qct)
            Pinf = np.kron(Pinf,P_inft)
+            P0 = np.kron(P0,P_inft)
            H    = np.kron(H,Ht)
            
-        return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
+        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

 def dkron(A,dA,B,dB, operation='prod'):
    """