ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
This commit is contained in:
Alexander Grigorievskiy 2015-07-14 16:44:21 +03:00
parent 06a7fedd22
commit 82cb626cd6
10 changed files with 1740 additions and 777 deletions

View file

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
@ -20,16 +20,45 @@ class sde_Linear(Linear):
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
# Arno, insert your code here
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
# Params to use:
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
# self.variances
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -38,25 +38,24 @@ class sde_Matern32(Matern32):
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
F = np.array(((0, 1), (-foo**2, -2*foo)))
L = np.array(( (0,), (1,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1, 0),))
Pinf = np.array(((variance, 0), (0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
@ -64,8 +63,9 @@ class sde_Matern32(Matern32):
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
@ -106,7 +106,7 @@ class sde_Matern52(Matern52):
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
@ -130,75 +130,6 @@ class sde_Matern52(Matern52):
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
# % Derivative of F w.r.t. parameter magnSigma2
# dFmagnSigma2 = [0, 0, 0;
# 0, 0, 0;
# 0, 0, 0];
#
# % Derivative of F w.r.t parameter lengthScale
# dFlengthScale = [0, 0, 0;
# 0, 0, 0;
# 15*sqrt(5)/lengthScale^4, 30/lengthScale^3, 3*sqrt(5)/lengthScale^2];
#
# % Derivative of Qc w.r.t. parameter magnSigma2
# dQcmagnSigma2 = 400*sqrt(5)/3/lengthScale^5;
#
# % Derivative of Qc w.r.t. parameter lengthScale
# dQclengthScale = -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
#
# % Derivative of Pinf w.r.t. parameter magnSigma2
# dPinfmagnSigma2 = Pinf/magnSigma2;
#
# % Derivative of Pinf w.r.t. parameter lengthScale
# kappa2 = -2*kappa/lengthScale;
# dPinflengthScale = [0, 0, -kappa2;
# 0, kappa2, 0;
# -kappa2, 0, -100*magnSigma2/lengthScale^5];
#
# % Stack all derivatives
# dF = zeros(3,3,2);
# dQc = zeros(1,1,2);
# dPinf = zeros(3,3,2);
#
# dF(:,:,1) = dFmagnSigma2;
# dF(:,:,2) = dFlengthScale;
# dQc(:,:,1) = dQcmagnSigma2;
# dQc(:,:,2) = dQclengthScale;
# dPinf(:,:,1) = dPinfmagnSigma2;
# dPinf(:,:,2) = dPinflengthScale;
# % Derived constants
# lambda = sqrt(5)/lengthScale;
#
# % Feedback matrix
# F = [ 0, 1, 0;
# 0, 0, 1;
# -lambda^3, -3*lambda^2, -3*lambda];
#
# % Noise effect matrix
# L = [0; 0; 1];
#
# % Spectral density
# Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
#
# % Observation model
# H = [1, 0, 0];
# %% Stationary covariance
#
# % Calculate Pinf only if requested
# if nargout > 4,
#
# % Derived constant
# kappa = 5/3*magnSigma2/lengthScale^2;
#
# % Stationary covariance
# Pinf = [magnSigma2, 0, -kappa;
# 0, kappa, 0;
# -kappa, 0, 25*magnSigma2/lengthScale^4];
#
# end
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -75,7 +75,7 @@ class sde_StdPeriodic(StdPeriodic):
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
@ -96,9 +96,9 @@ class sde_StdPeriodic(StdPeriodic):
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
@ -14,33 +14,7 @@ class sde_White(White):
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Linear kernel:
.. math::
k(x,y) = \alpha
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
# Arno, insert your code here
# Params to use:
# self.variance
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Linear kernel:
White kernel:
.. math::
@ -48,14 +22,80 @@ class sde_Bias(Bias):
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
# Arno, insert your code here
variance = float(self.variance.values)
# Params to use:
# self.variance
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -8,6 +8,7 @@ from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
@ -22,20 +23,87 @@ class sde_RBF(RBF):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
# Arno, insert your code here
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
# Params to use:
fn = np.math.factorial(N)
# self.lengthscale
# self.variance
kappa = 1.0/2.0/self.lengthscale**2
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
# Benefits of this are unjustified
#import GPy.models.state_space_main as ssm
#(F, L, Qc, H, Pinf, dF, dQc, dPinf,T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, dF, dQc, dPinf)
P0 = Pinf.copy()
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
@ -51,27 +119,45 @@ class sde_Exponential(Exponential):
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
F = np.array([[-1/self.lengthscale]])
L = np.array([[1]])
Qc = np.array([[2*self.variance/self.lengthscale]])
H = np.array([[1]])
Pinf = np.array([[self.variance]])
# TODO: return the derivatives as well
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
return (F, L, Qc, H, Pinf)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
# Arno, insert your code here
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
# Params to use:
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
# self.lengthscale
# self.variance
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
@ -92,7 +178,7 @@ class sde_RatQuad(RatQuad):
Return the state space representation of the covariance.
"""
# Arno, insert your code here
assert False, 'Not Implemented'
# Params to use:

View file

@ -290,22 +290,25 @@ class Add(CombinationKernel):
Qc = None
H = None
Pinf = None
P0 = None
dF = None
dQc = None
dPinf = None
dP0 = None
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
(Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
F = la.block_diag(F,Ft) if (F is not None) else Ft
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
@ -328,6 +331,13 @@ class Add(CombinationKernel):
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
@ -337,8 +347,10 @@ class Add(CombinationKernel):
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -126,13 +126,15 @@ class Prod(CombinationKernel):
Qc = np.array((1,), ndmin=2)
H = np.array((1,), ndmin=2)
Pinf = np.array((1,), ndmin=2)
P0 = np.array((1,), ndmin=2)
dF = None
dQc = None
dPinf = None
dP0 = None
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
(Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
# check derivative dimensions ->
number_of_parameters = len(p.param_array)
@ -149,14 +151,16 @@ class Prod(CombinationKernel):
dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft)
P0 = np.kron(P0,P_inft)
H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dkron(A,dA,B,dB, operation='prod'):
"""

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@
import numpy as np
from scipy import linalg
from scipy import stats
from ..core import Model
from .. import kern
from GPy.plotting.matplot_dep.models_plots import gpplot
@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param
import GPy
from .. import likelihoods
import GPy.models.state_space_main as ssm
#import state_space_main as ssm
reload(ssm)
print ssm.__file__
from . import state_space_main as ssm
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
self.num_data, input_dim = X.shape
assert input_dim==1, "State space methods for time only"
num_data_Y, self.output_dim = Y.shape
if len(Y.shape) ==2: # TODO make this nice
num_data_Y, self.output_dim = Y.shape
elif len(Y.shape) ==3:
num_data_Y, self.output_dim, ts_number = Y.shape
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods for single outputs only"
@ -68,7 +70,7 @@ class StateSpace(Model):
"""
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters
measurement_dim = self.output_dim
@ -78,17 +80,19 @@ class StateSpace(Model):
dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t
# The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim)
#(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
@ -96,26 +100,53 @@ class StateSpace(Model):
grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0
(filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
P_init=None, calc_log_likelihood=True,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
P_init=P0, calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
self._log_marginal_likelihood = log_likelihood
#gradients = self.compute_gradients()
self.likelihood.update_gradients(grad_log_likelihood[-1,0])
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self):
return self._log_marginal_likelihood
def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
"""
Inner function. It is called only from inside this class
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
Input:
---------------------
Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order
to use only the parameters of the model).
filteronly: bool
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
Output:
--------------------
m: vector
Mean prediction
V: vector
Variance in every point
"""
# Set defaults
@ -128,41 +159,44 @@ class StateSpace(Model):
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index]
X = X[return_index] # TODO they are not used
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
#import pdb; pdb.set_trace()
#Y = self.Y[:, 0,0]
# Run the Kalman filter
(M, P, tmp_log_likelihood,
tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
P_init=None, calc_log_likelihood=False,
#import pdb; pdb.set_trace()
(M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, calc_log_likelihood=False,
calc_grad_log_likelihood=False)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values
M = M[:,1:]
P = P[:,:,1:]
M = M[1:,:]
P = P[1:,:,:]
# Put the data back in the original order
M = M[:,return_inverse]
P = P[:,:,return_inverse]
M = M[return_inverse,:]
P = P[return_inverse,:,:]
# Only return the values for Xnew
M = M[:,self.num_data:]
P = P[:,:,self.num_data:]
M = M[self.num_data:,:]
P = P[self.num_data:,:,:]
# Calculate the mean and variance
m = H.dot(M).T
V = np.tensordot(H[0],P,(0,0))
V = np.tensordot(V,H[0],(0,0))
V = V[:,None]
m = np.dot(M,H.T)
V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state
return (m, V)
@ -170,10 +204,10 @@ class StateSpace(Model):
def predict(self, Xnew, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self._predict_raw(Xnew,filteronly=filteronly)
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += self.sigma2
V += float(self.Gaussian_noise.variance)
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
@ -182,142 +216,148 @@ class StateSpace(Model):
# Return mean and variance
return (m, V, lower, upper)
def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
ax=None, resolution=None, plot_raw=False, plot_filter=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
# Deal with optional parameters
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
# Define the frame on which to plot
resolution = resolution or 200
Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
# Make a prediction on the frame and plot it
if plot_raw:
m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.Y
else:
m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
Y = self.Y
# Plot the values
gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(self.X, self.Y, 'kx', mew=1.5)
# Optionally plot some samples
if samples:
if plot_raw:
Ysim = self.posterior_samples_f(Xgrid, samples)
else:
Ysim = self.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
# Set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
def prior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Allocate space for results
Y = np.empty((size,X.shape[0]))
# Simulate random draws
#for j in range(0,size):
# Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
Y = self.simulate(F,L,Qc,Pinf,X.T,size)
# Only observations
Y = np.tensordot(H[0],Y,(0,0))
# Reorder simulated values
Y = Y[:,return_inverse]
# Return trajectory
return Y.T
def posterior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run smoother on original data
(m,V) = self.predict_raw(X)
# Simulate random draws from the GP prior
y = self.prior_samples_f(np.vstack((self.X, X)),size)
# Allocate space for sample trajectories
Y = np.empty((size,X.shape[0]))
# Run the RTS smoother on each of these values
for j in range(0,size):
yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
(m2,V2) = self.predict_raw(X,Ynew=yobs)
Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
# Reorder simulated values
Y = Y[:,return_inverse]
# Return posterior sample trajectories
return Y.T
def posterior_samples(self, X, size=10):
# Make samples of f
Y = self.posterior_samples_f(X,size)
# Add noise
Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
# Return trajectory
return Y
def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
def simulate(self,F,L,Qc,Pinf,X,size=1):
# Simulate a trajectory using the state space model
# Allocate space for results
f = np.zeros((F.shape[0],size,X.shape[1]))
# Initial state
f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
# Sweep through remaining time points
for k in range(1,X.shape[1]):
# Form discrete-time model
A = As[:,:,index[1-k]]
Q = Qs[:,:,index[1-k]]
# Draw the state
f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
# Return values
return f
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
#
# # Deal with optional parameters
# if ax is None:
# fig = pb.figure(num=fignum)
# ax = fig.add_subplot(111)
#
# # Define the frame on which to plot
# resolution = resolution or 200
# Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
#
# # Make a prediction on the frame and plot it
# if plot_raw:
# m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
# lower = m - 2*np.sqrt(v)
# upper = m + 2*np.sqrt(v)
# Y = self.Y
# else:
# m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
# Y = self.Y
#
# # Plot the values
# gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
# ax.plot(self.X, self.Y, 'kx', mew=1.5)
#
# # Optionally plot some samples
# if samples:
# if plot_raw:
# Ysim = self.posterior_samples_f(Xgrid, samples)
# else:
# Ysim = self.posterior_samples(Xgrid, samples)
# for yi in Ysim.T:
# ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
#
# # Set the limits of the plot to some sensible values
# ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
# ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
#
# def prior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Allocate space for results
# Y = np.empty((size,X.shape[0]))
#
# # Simulate random draws
# #for j in range(0,size):
# # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
# Y = self.simulate(F,L,Qc,Pinf,X.T,size)
#
# # Only observations
# Y = np.tensordot(H[0],Y,(0,0))
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return trajectory
# return Y.T
#
# def posterior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Run smoother on original data
# (m,V) = self.predict_raw(X)
#
# # Simulate random draws from the GP prior
# y = self.prior_samples_f(np.vstack((self.X, X)),size)
#
# # Allocate space for sample trajectories
# Y = np.empty((size,X.shape[0]))
#
# # Run the RTS smoother on each of these values
# for j in range(0,size):
# yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
# (m2,V2) = self.predict_raw(X,Ynew=yobs)
# Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return posterior sample trajectories
# return Y.T
#
# def posterior_samples(self, X, size=10):
#
# # Make samples of f
# Y = self.posterior_samples_f(X,size)
#
# # Add noise
# Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
#
# # Return trajectory
# return Y
#
#
# def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model
#
# # Allocate space for results
# f = np.zeros((F.shape[0],size,X.shape[1]))
#
# # Initial state
# f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
#
# # Time step lengths
# dt = np.empty(X.shape)
# dt[:,0] = X[:,1]-X[:,0]
# dt[:,1:] = np.diff(X)
#
# # Solve the LTI SDE for these time steps
# As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
#
# # Sweep through remaining time points
# for k in range(1,X.shape[1]):
#
# # Form discrete-time model
# A = As[:,:,index[1-k]]
# Q = Qs[:,:,index[1-k]]
#
# # Draw the state
# f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
#
# # Return values
# return f