Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2016-04-01 11:43:20 +01:00
commit 7ef61ccdd0
32 changed files with 36241 additions and 6 deletions

View file

@ -0,0 +1,26 @@
import GPy
import numpy as np
import matplotlib.pyplot as plt
import GPy.models.state_space_model as SS_model
X = np.linspace(0, 10, 2000)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
kernel1 = GPy.kern.Matern32(X.shape[1])
m1 = GPy.models.GPRegression(X,Y, kernel1)
print m1
m1.optimize(optimizer='bfgs',messages=True)
print m1
kernel2 = GPy.kern.sde_Matern32(X.shape[1])
#m2 = SS_model.StateSpace(X,Y, kernel2)
m2 = GPy.models.StateSpace(X,Y, kernel2)
print m2
m2.optimize(optimizer='bfgs',messages=True)
print m2

View file

@ -29,3 +29,11 @@ from .src.splitKern import SplitKern,DEtime
from .src.splitKern import DEtime as DiffGenomeKern from .src.splitKern import DEtime as DiffGenomeKern
from .src.spline import Spline from .src.spline import Spline
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from .src.sde_matern import sde_Matern32
from .src.sde_matern import sde_Matern52
from .src.sde_linear import sde_Linear
from .src.sde_standard_periodic import sde_StdPeriodic
from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian

View file

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

135
GPy/kern/_src/sde_matern.py Normal file
View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,178 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.wavelengths.gradient = gradients[1]
self.lengthscales.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.wavelengths
#self.lengthscales
N = 7 # approximation order
w0 = 2*np.pi/self.wavelengths # frequency
lengthscales = 2*self.lengthscales
[q2,dq2l] = seriescoeff(N,lengthscales,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.wavelengths
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

101
GPy/kern/_src/sde_static.py Normal file
View file

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,190 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -263,4 +263,94 @@ class Add(CombinationKernel):
i_s[k._all_dims_active] += k.input_sensitivity(summarize) i_s[k._all_dims_active] += k.input_sensitivity(summarize)
return i_s return i_s
else: else:
return super(Add, self).input_sensitivity(summarize) return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
L = None
Qc = None
H = None
Pinf = None
P0 = None
dF = None
dQc = None
dPinf = None
dP0 = None
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
F = la.block_diag(F,Ft) if (F is not None) else Ft
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -305,7 +305,6 @@ class Kern(Parameterized):
def _check_active_dims(self, X): def _check_active_dims(self, X):
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape) assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
class CombinationKernel(Kern): class CombinationKernel(Kern):
""" """
Abstract super class for combination kernels. Abstract super class for combination kernels.

View file

@ -105,3 +105,114 @@ class Prod(CombinationKernel):
return i_s return i_s
else: else:
return super(Prod, self).input_sensitivity(summarize) return super(Prod, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
"""
F = np.array((0,), ndmin=2)
L = np.array((1,), ndmin=2)
Qc = np.array((1,), ndmin=2)
H = np.array((1,), ndmin=2)
Pinf = np.array((1,), ndmin=2)
P0 = np.array((1,), ndmin=2)
dF = None
dQc = None
dPinf = None
dP0 = None
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
# check derivative dimensions ->
number_of_parameters = len(p.param_array)
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
# check derivative dimensions <-
# exception for periodic kernel
if (p.name == 'std_periodic'):
Qct = P_inft
dQct = dP_inft
dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft)
P0 = np.kron(P0,P_inft)
H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dkron(A,dA,B,dB, operation='prod'):
"""
Function computes the derivative of Kronecker product A*B
(or Kronecker sum A+B).
Input:
-----------------------
A: 2D matrix
Some matrix
dA: 3D (or 2D matrix)
Derivarives of A
B: 2D matrix
Some matrix
dB: 3D (or 2D matrix)
Derivarives of B
operation: str 'prod' or 'sum'
Which operation is considered. If the operation is 'sum' it is assumed
that A and are square matrices.s
Output:
dC: 3D matrix
Derivative of Kronecker product A*B (or Kronecker sum A+B)
"""
if dA is None:
dA_param_num = 0
dA = np.zeros((A.shape[0], A.shape[1],1))
else:
dA_param_num = dA.shape[2]
if dB is None:
dB_param_num = 0
dB = np.zeros((B.shape[0], B.shape[1],1))
else:
dB_param_num = dB.shape[2]
# Space allocation for derivative matrix
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
for k in range(dA_param_num):
if operation == 'prod':
dC[:,:,k] = np.kron(dA[:,:,k],B);
else:
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
for k in range(dB_param_num):
if operation == 'prod':
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
else:
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
return dC

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

137
GPy/kern/src/sde_matern.py Normal file
View file

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.period.gradient = gradients[1]
self.lengthscale.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.period
#self.lengthscale
N = 7 # approximation order
w0 = 2*np.pi/self.period # frequency
lengthscale = 2*self.lengthscale
[q2,dq2l] = seriescoeff(N,lengthscale,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.period
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

103
GPy/kern/src/sde_static.py Normal file
View file

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,192 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -320,6 +320,18 @@ class Exponential(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -0.5*self.K_of_r(r) return -0.5*self.K_of_r(r)
# def sde(self):
# """
# Return the state space representation of the covariance.
# """
# F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]])
# Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well
#
# return (F, L, Qc, H, Pinf)
@ -388,6 +400,41 @@ class Matern32(Stationary):
F1lower = np.array([f(lower) for f in F1])[:, None] F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class Matern52(Stationary): class Matern52(Stationary):
""" """

View file

@ -22,3 +22,5 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
from .one_vs_all_classification import OneVsAllClassification from .one_vs_all_classification import OneVsAllClassification
from .one_vs_all_sparse_classification import OneVsAllSparseClassification from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM from .dpgplvm import DPBayesianGPLVM
from .state_space_model import StateSpace

745
GPy/models/state_space.py Normal file
View file

@ -0,0 +1,745 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import linalg
from ..core import Model
from .. import kern
from GPy.plotting.matplot_dep.models_plots import gpplot
from GPy.plotting.matplot_dep.base_plots import x_frame1D
from GPy.plotting.matplot_dep import Tango
import pylab as pb
from GPy.core.parameterization.param import Param
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
self.num_data, input_dim = X.shape
assert input_dim==1, "State space methods for time only"
num_data_Y, self.output_dim = Y.shape
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods for single outputs only"
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.sigma2 = Param('Gaussian_noise', sigma2)
self.link_parameter(self.sigma2)
# Default kernel
if kernel is None:
self.kern = kern.Matern32(1)
else:
self.kern = kernel
self.link_parameter(self.kern)
self.sigma2.constrain_positive()
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Use the Kalman filter to evaluate the likelihood
self._log_marginal_likelihood = self.kf_likelihood(F,L,Qc,H,self.sigma2,Pinf,self.X.T,self.Y.T)
gradients = self.compute_gradients()
self.sigma2.gradient_full[:] = gradients[-1]
self.kern.gradient_full[:] = gradients[:-1]
def log_likelihood(self):
return self._log_marginal_likelihood
def compute_gradients(self):
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dFt,dQct,dPinft) = self.kern.sde()
# Allocate space for the full partial derivative matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],dFt.shape[2]+1])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],dQct.shape[2]+1])
dPinf = np.zeros([dPinft.shape[0],dPinft.shape[1],dPinft.shape[2]+1])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dPinf[:,:,:-1] = dPinft
# The sigma2 derivative
dR = np.zeros([1,1,dF.shape[2]])
dR[:,:,-1] = 1
# Calculate the likelihood gradients
gradients = self.kf_likelihood_g(F,L,Qc,H,self.sigma2,Pinf,dF,dQc,dPinf,dR,self.X.T,self.Y.T)
return gradients
def predict_raw(self, Xnew, Ynew=None, filteronly=False):
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index]
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run the Kalman filter
(M, P) = self.kalman_filter(F,L,Qc,H,self.sigma2,Pinf,X.T,Y.T)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = self.rts_smoother(F,L,Qc,X.T,M,P)
# Put the data back in the original order
M = M[:,return_inverse]
P = P[:,:,return_inverse]
# Only return the values for Xnew
M = M[:,self.num_data:]
P = P[:,:,self.num_data:]
# Calculate the mean and variance
m = H.dot(M).T
V = np.tensordot(H[0],P,(0,0))
V = np.tensordot(V,H[0],(0,0))
V = V[:,None]
# Return the posterior of the state
return (m, V)
def predict(self, Xnew, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self.predict_raw(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += self.sigma2
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V)
# Return mean and variance
return (m, V, lower, upper)
def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
ax=None, resolution=None, plot_raw=False, plot_filter=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
# Deal with optional parameters
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
# Define the frame on which to plot
resolution = resolution or 200
Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
# Make a prediction on the frame and plot it
if plot_raw:
m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.Y
else:
m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
Y = self.Y
# Plot the values
gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(self.X, self.Y, 'kx', mew=1.5)
# Optionally plot some samples
if samples:
if plot_raw:
Ysim = self.posterior_samples_f(Xgrid, samples)
else:
Ysim = self.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
# Set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
def prior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Allocate space for results
Y = np.empty((size,X.shape[0]))
# Simulate random draws
#for j in range(0,size):
# Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
Y = self.simulate(F,L,Qc,Pinf,X.T,size)
# Only observations
Y = np.tensordot(H[0],Y,(0,0))
# Reorder simulated values
Y = Y[:,return_inverse]
# Return trajectory
return Y.T
def posterior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run smoother on original data
(m,V) = self.predict_raw(X)
# Simulate random draws from the GP prior
y = self.prior_samples_f(np.vstack((self.X, X)),size)
# Allocate space for sample trajectories
Y = np.empty((size,X.shape[0]))
# Run the RTS smoother on each of these values
for j in range(0,size):
yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
(m2,V2) = self.predict_raw(X,Ynew=yobs)
Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
# Reorder simulated values
Y = Y[:,return_inverse]
# Return posterior sample trajectories
return Y.T
def posterior_samples(self, X, size=10):
# Make samples of f
Y = self.posterior_samples_f(X,size)
# Add noise
Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
# Return trajectory
return Y
def kalman_filter(self,F,L,Qc,H,R,Pinf,X,Y):
# KALMAN_FILTER - Run the Kalman filter for a given model and data
# Allocate space for results
MF = np.empty((F.shape[0],Y.shape[1]))
PF = np.empty((F.shape[0],F.shape[0],Y.shape[1]))
# Initialize
MF[:,-1] = np.zeros(F.shape[0])
PF[:,:,-1] = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
MF[:,k] = A.dot(MF[:,k-1])
PF[:,:,k] = A.dot(PF[:,:,k-1]).dot(A.T) + Q
# Update step (only if there is data)
if not np.isnan(Y[:,k]):
if Y.shape[0]==1:
K = PF[:,:,k].dot(H.T)/(H.dot(PF[:,:,k]).dot(H.T) + R)
else:
LL = linalg.cho_factor(H.dot(PF[:,:,k]).dot(H.T) + R)
K = linalg.cho_solve(LL, H.dot(PF[:,:,k].T)).T
MF[:,k] += K.dot(Y[:,k]-H.dot(MF[:,k]))
PF[:,:,k] -= K.dot(H).dot(PF[:,:,k])
# Return values
return (MF, PF)
def rts_smoother(self,F,L,Qc,X,MS,PS):
# RTS_SMOOTHER - Run the RTS smoother for a given model and data
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sequentially smooth states starting from the end
for k in range(2,X.shape[1]+1):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,1-k])
A = As[:,:,index[1-k]];
Q = Qs[:,:,index[1-k]];
# Smoothing step
LL = linalg.cho_factor(A.dot(PS[:,:,-k]).dot(A.T)+Q)
G = linalg.cho_solve(LL,A.dot(PS[:,:,-k])).T
MS[:,-k] += G.dot(MS[:,1-k]-A.dot(MS[:,-k]))
PS[:,:,-k] += G.dot(PS[:,:,1-k]-A.dot(PS[:,:,-k]).dot(A.T)-Q).dot(G.T)
# Return
return (MS, PS)
def kf_likelihood(self,F,L,Qc,H,R,Pinf,X,Y):
# Evaluate marginal likelihood
# Initialize
lik = 0
m = np.zeros((F.shape[0],1))
P = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter for likelihood evaluation
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A,Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
m = A.dot(m)
P = A.dot(P).dot(A.T) + Q
# Update step only if there is data
if not np.isnan(Y[:,k]):
v = Y[:,k]-H.dot(m)
if Y.shape[0]==1:
S = H.dot(P).dot(H.T) + R
K = P.dot(H.T)/S
lik -= 0.5*np.log(S)
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*v*v/S
else:
LL, isupper = linalg.cho_factor(H.dot(P).dot(H.T) + R)
lik -= np.sum(np.log(np.diag(LL)))
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*linalg.cho_solve((LL, isupper),v).dot(v)
K = linalg.cho_solve((LL, isupper), H.dot(P.T)).T
m += K.dot(v)
P -= K.dot(H).dot(P)
# Return likelihood
return lik[0,0]
def kf_likelihood_g(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
n = F.shape[0]
steps = Y.shape[1]
nparam = dF.shape[2]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*n, 2*n, nparam])
FF = np.zeros([2*n, 2*n])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = 0
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve the differential equation
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# The discrete-time dynamical model
if j==0:
A = AA[:n,:n,j]
Q = Pinf - A.dot(Pinf).dot(A.T)
PP = A.dot(P).dot(A.T) + Q
# The derivatives of A and Q
dA = AA[n:,:n,j]
dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \
- A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T)
# The derivatives of P
dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \
+ A.dot(P).dot(dA.T) + dQ
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Return the gradient
return eg
def kf_likelihood_g_notstable(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
steps = Y.shape[1]
nparam = dF.shape[2]
n = F.shape[0]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
Z = np.zeros(F.shape)
QC = L.dot(Qc).dot(L.T)
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# % Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*F.shape[0], 2*F.shape[0], nparam])
AAA = np.zeros([4*F.shape[0], 4*F.shape[0], nparam])
FF = np.zeros([2*F.shape[0], 2*F.shape[0]])
FFF = np.zeros([4*F.shape[0], 4*F.shape[0]])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = t[1]-t[0]
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve using matrix fraction decomposition
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
# Pick the parts
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# Define W and G
W = L.dot(dQc[:,:,j]).dot(L.T)
G = dF[:,:,j];
# The second matrix for the matrix factor decomposition
FFF[:n,:n] = F
FFF[2*n:-n,:n] = G
FFF[:n, n:2*n] = QC
FFF[n:2*n, n:2*n] = -F.T
FFF[2*n:-n,n:2*n] = W
FFF[-n:, n:2*n] = -G.T
FFF[2*n:-n,2*n:-n] = F
FFF[2*n:-n,-n:] = QC
FFF[-n:,-n:] = -F.T
# Solve the matrix exponential
AAA[:,:,j] = linalg.expm3(FFF*dt)
# Solve using matrix fraction decomposition
foo = AAA[:,:,j].dot(np.vstack([P, np.eye(n), dP[:,:,j], np.zeros([n,n])]))
# Pick the parts
C = foo[:n, :]
D = foo[n:2*n, :]
dC = foo[2*n:-n,:]
dD = foo[-n:, :]
# The prediction step covariance (PP = C/D)
if j==0:
PP = linalg.solve(D.T,C.T).T
PP = (PP + PP.T)/2
# Sove dP for j (C/D == P_{k|k-1})
dP[:,:,j] = linalg.solve(D.T,(dC - PP.dot(dD)).T).T
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Report
#print e
#print eg
# Return the gradient
return eg
def simulate(self,F,L,Qc,Pinf,X,size=1):
# Simulate a trajectory using the state space model
# Allocate space for results
f = np.zeros((F.shape[0],size,X.shape[1]))
# Initial state
f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sweep through remaining time points
for k in range(1,X.shape[1]):
# Form discrete-time model
A = As[:,:,index[1-k]]
Q = Qs[:,:,index[1-k]]
# Draw the state
f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
# Return values
return f
def lti_disc(self,F,L,Qc,dt):
# Discrete-time solution to the LTI SDE
# Dimensionality
n = F.shape[0]
index = 0
# Check for numbers of time steps
if dt.flatten().shape[0]==1:
# The covariance matrix by matrix fraction decomposition
Phi = np.zeros((2*n,2*n))
Phi[:n,:n] = F
Phi[:n,n:] = L.dot(Qc).dot(L.T)
Phi[n:,n:] = -F.T
AB = linalg.expm(Phi*dt).dot(np.vstack((np.zeros((n,n)),np.eye(n))))
Q = linalg.solve(AB[n:,:].T,AB[:n,:].T)
# The dynamical model
A = linalg.expm(F*dt)
# Return
return A, Q
# Optimize for cases where time steps occur repeatedly
else:
# Time discretizations (round to 14 decimals to avoid problems)
dt, _, index = np.unique(np.round(dt,14),True,True)
# Allocate space for A and Q
A = np.empty((n,n,dt.shape[0]))
Q = np.empty((n,n,dt.shape[0]))
# Call this function for each dt
for j in range(0,dt.shape[0]):
A[:,:,j], Q[:,:,j] = self.lti_disc(F,L,Qc,dt[j])
# Return
return A, Q, index

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,964 @@
# -*- coding: utf-8 -*-
"""
Contains some cython code for state space modelling.
"""
import numpy as np
cimport numpy as np
import scipy as sp
cimport cython
#from libc.math cimport isnan # for nan checking in kalman filter cycle
cdef extern from "numpy/npy_math.h":
bint npy_isnan(double x)
DTYPE = np.float64
DTYPE_int = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t DTYPE_int_t
# Template class for dynamic callables
cdef class Dynamic_Callables_Cython:
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P): # returns state iteration matrix
raise NotImplemented("(cython) Ak is not implemented!")
cpdef Qk(self, int k):
raise NotImplemented("(cython) Qk is not implemented!")
cpdef Q_srk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dAk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dQk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self, bint compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
# Template class for measurement callables
cdef class Measurement_Callables_Cython:
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] Hk):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
raise NotImplemented("(cython) Hk is not implemented!")
cpdef Rk(self, int k):
raise NotImplemented("(cython) Rk is not implemented!")
cpdef R_isrk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dHk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dRk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self,compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
cdef class R_handling_Cython(Measurement_Callables_Cython):
"""
The calss handles noise matrix R.
"""
cdef:
np.ndarray R
np.ndarray index
int R_time_var_index
np.ndarray dR
bint svd_each_time
dict R_square_root
def __init__(self, np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index,
int R_time_var_index, int p_unique_R_number, np.ndarray[DTYPE_t, ndim=3] dR = None):
"""
Input:
---------------
R - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
R_time_var_index - another index in the array R. Computed earlier and passed here.
unique_R_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dR: 3D array[:, :, param_num]
derivative of R. Derivative is supported only when R do not change over time
Output:
--------------
Object which has two necessary functions:
f_R(k)
inv_R_square_root(k)
"""
self.R = R
self.index = index
self.R_time_var_index = R_time_var_index
self.dR = dR
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_R_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.R_square_root = {}
cpdef Rk(self, int k):
return self.R[:,:, <int>self.index[self.R_time_var_index, k]]
cpdef dRk(self,int k):
if self.dR is None:
raise ValueError("dR derivative is None")
return self.dR # the same dirivative on each iteration
cpdef R_isrk(self, int k):
"""
Function returns the inverse square root of R matrix on step k.
"""
cdef int ind = <int>self.index[self.R_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] R = self.R[:,:, ind ]
cdef np.ndarray[DTYPE_t, ndim=2] inv_square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (R.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
inv_square_root = np.sqrt( 1.0/R )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
else:
if ind in self.R_square_root:
inv_square_root = self.R_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
self.R_square_root[ind] = inv_square_root
return inv_square_root
cdef class Std_Measurement_Callables_Cython(R_handling_Cython):
cdef:
np.ndarray H
int H_time_var_index
np.ndarray dH
def __init__(self, np.ndarray[DTYPE_t, ndim=3] H, int H_time_var_index,
np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index, int R_time_var_index,
int unique_R_number, np.ndarray[DTYPE_t, ndim=3] dH = None,
np.ndarray[DTYPE_t, ndim=3] dR=None):
super(Std_Measurement_Callables_Cython,self).__init__(R, index, R_time_var_index, unique_R_number,dR)
self.H = H
self.H_time_var_index = H_time_var_index
self.dH = dH
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] H):
"""
function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
"""
return np.dot(H, m)
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.H[:,:, <int>self.index[self.H_time_var_index, k]]
cpdef dHk(self,int k):
if self.dH is None:
raise ValueError("dH derivative is None")
return self.dH # the same dirivative on each iteration
cdef class Q_handling_Cython(Dynamic_Callables_Cython):
cdef:
np.ndarray Q
np.ndarray index
int Q_time_var_index
np.ndarray dQ
dict Q_square_root
bint svd_each_time
def __init__(self, np.ndarray[DTYPE_t, ndim=3] Q, np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int p_unique_Q_number, np.ndarray[DTYPE_t, ndim=3] dQ = None):
"""
Input:
---------------
Q - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
Q_time_var_index - another index in the array R. Computed earlier and passed here.
unique_Q_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dQ: 3D array[:, :, param_num]
derivative of Q. Derivative is supported only when Q do not change over time
Output:
--------------
Object which has three necessary functions:
Qk(k)
dQk(k)
Q_srkt(k)
"""
self.Q = Q
self.index = index
self.Q_time_var_index = Q_time_var_index
self.dQ = dQ
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_Q_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.Q_square_root = {}
cpdef Qk(self, int k):
"""
function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
"""
return self.Q[:,:, <int>self.index[self.Q_time_var_index, k]]
cpdef dQk(self, int k):
if self.dQ is None:
raise ValueError("dQ derivative is None")
return self.dQ # the same dirivative on each iteration
cpdef Q_srk(self, int k):
"""
function (k). Returns the square root of noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
This function is implemented to use SVD prediction step.
"""
cdef int ind = <int>self.index[self.Q_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] Q = self.Q[:,:, ind]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (Q.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
square_root = np.sqrt( Q )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
else:
if ind in self.Q_square_root:
square_root = self.Q_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
self.Q_square_root[ind] = square_root
return square_root
cdef class Std_Dynamic_Callables_Cython(Q_handling_Cython):
cdef:
np.ndarray A
int A_time_var_index
np.ndarray dA
def __init__(self, np.ndarray[DTYPE_t, ndim=3] A, int A_time_var_index,
np.ndarray[DTYPE_t, ndim=3] Q,
np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int unique_Q_number,
np.ndarray[DTYPE_t, ndim=3] dA = None,
np.ndarray[DTYPE_t, ndim=3] dQ=None):
super(Std_Dynamic_Callables_Cython,self).__init__(Q, index, Q_time_var_index, unique_Q_number,dQ)
self.A = A
self.A_time_var_index = A_time_var_index
self.dA = dA
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
"""
return np.dot(A,m)
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.A[:,:, <int>self.index[self.A_time_var_index, k]]
cpdef dAk(self, int k):
if self.dA is None:
raise ValueError("dA derivative is None")
return self.dA # the same dirivative on each iteration
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cdef class AQcompute_batch_Cython(Q_handling_Cython):
"""
Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
equation. dt - time steps.
It has the same interface as AQcompute_once.
It computes matrices for all time steps. This object is used when
there are not so many (controlled by internal variable)
different time steps and storing all the matrices do not take too much memory.
Since all the matrices are computed all together, this object can be used
in smoother without repeating the computations.
"""
#def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
cdef:
np.ndarray As
np.ndarray Qs
np.ndarray dAs
np.ndarray dQs
np.ndarray reconstruct_indices
#long total_size_of_data
dict Q_svd_dict
int last_k
def __init__(self, np.ndarray[DTYPE_t, ndim=3] As, np.ndarray[DTYPE_t, ndim=3] Qs,
np.ndarray[DTYPE_int_t, ndim=1] reconstruct_indices,
np.ndarray[DTYPE_t, ndim=4] dAs=None,
np.ndarray[DTYPE_t, ndim=4] dQs=None):
"""
Constructor. All necessary parameters are passed here and stored
in the opject.
Input:
-------------------
F, L, Qc, P_inf : matrices
Parameters of corresponding continuous state model
dt: array
All time steps
compute_derivatives: bool
Whether to calculate derivatives
dP_inf, dF, dQc: 3D array
Derivatives if they are required
Output:
-------------------
"""
self.As = As
self.Qs = Qs
self.dAs = dAs
self.dQs = dQs
self.reconstruct_indices = reconstruct_indices
self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
(self.dAs.nbytes if (self.dAs is not None) else 0) +\
(self.dQs.nbytes if (self.dQs is not None) else 0) +\
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {}
self.last_k = 0
# !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
Dynamic model
"""
return np.dot(A, m) # default dynamic model
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cpdef Ak(self,int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P):
self.last_k = k
return self.As[:,:, <int>self.reconstruct_indices[k]]
cpdef Qk(self,int k):
self.last_k = k
return self.Qs[:,:, <int>self.reconstruct_indices[k]]
cpdef dAk(self, int k):
self.last_k = k
return self.dAs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef dQk(self, int k):
self.last_k = k
return self.dQs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef Q_srk(self, int k):
"""
Square root of the noise matrix Q
"""
cdef int matrix_index = <int>self.reconstruct_indices[k]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_svd_dict:
square_root = self.Q_svd_dict[matrix_index]
else:
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root
return square_root
# def return_last(self):
# """
# Function returns last available matrices.
# """
#
# if (self.last_k is None):
# raise ValueError("Matrices are not computed.")
# else:
# ind = self.reconstruct_indices[self.last_k]
# A = self.As[:,:, ind]
# Q = self.Qs[:,:, ind]
# dA = self.dAs[:,:, :, ind]
# dQ = self.dQs[:,:, :, ind]
#
# return self.last_k, A, Q, dA, dQ
@cython.boundscheck(False)
def _kalman_prediction_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m , tuple p_P,
Dynamic_Callables_Cython p_dynamic_callables,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Desctrete prediction function
Input:
k:int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
p_m: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (Prev_cov, S, V)
Covariance matrix from the previous step and its SVD decomposition.
Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V)
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_a.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
p_f_Qsr: function (k). Returns square root of noise matrix of the
dynamic model on iteration k. k (iteration number). starts at 0
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the previous step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: 3D array (state_dim, state_dim, parameters_no)
Mean derivatives from the previous step
grad_calc_params_1: List or None
List with derivatives. The first component is 'f_dA' - function(k)
which returns the derivative of A. The second element is 'f_dQ'
- function(k). Function which returns the derivative of Q.
Output:
----------------------------
m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
Results of the prediction steps.
"""
# covariance from the previous step# p_prev_cov = v * S * V.T
cdef np.ndarray[DTYPE_t, ndim=2] Prev_cov = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_old = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_old = p_P[2]
#p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
# index correspond to values from previous iteration.
cdef np.ndarray[DTYPE_t, ndim=2] A = p_dynamic_callables.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian)
cdef np.ndarray[DTYPE_t, ndim=2] Q = p_dynamic_callables.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step)
cdef np.ndarray[DTYPE_t, ndim=2] Q_sr = p_dynamic_callables.Q_srk(k)
# Prediction step ->
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_dynamic_callables.f_a(k, p_m, A) # predicted mean
# coavariance prediction have changed:
cdef np.ndarray[DTYPE_t, ndim=2] svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) )
res = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
# (U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# predicted variance computed by the regular method. For testing
#P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
cdef np.ndarray[DTYPE_t, ndim=2] V_new = Vh.T
cdef np.ndarray[DTYPE_t, ndim=1] S_new = S**2
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance
#tuple P_pred = (P_pred, S_new, Vh.T)
# Prediction step <-
# derivatives
cdef np.ndarray[DTYPE_t, ndim=3] dA_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dQ_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred
cdef int param_number
cdef int j
cdef tuple ret
cdef np.ndarray[DTYPE_t, ndim=2] dA
cdef np.ndarray[DTYPE_t, ndim=2] dQ
if calc_grad_log_likelihood:
dA_all_params = p_dynamic_callables.dAk(k) # derivatives of A wrt parameters
dQ_all_params = p_dynamic_callables.dQk(k) # derivatives of Q wrt parameters
param_number = p_dP.shape[2]
# p_dm, p_dP - derivatives form the previoius step
dm_pred = np.empty((p_dm.shape[0], p_dm.shape[1], p_dm.shape[2]), dtype = DTYPE)
dP_pred = np.empty((p_dP.shape[0], p_dP.shape[1], p_dP.shape[2]), dtype = DTYPE)
for j in range(param_number):
dA = dA_all_params[:,:,j]
dQ = dQ_all_params[:,:,j]
dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j])
# prediction step derivatives for current parameter:
dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T))
dP_pred[:,:,j] += dP_pred[:,:,j].T
dP_pred[:,:,j] += np.dot( A ,np.dot( p_dP[:,:,j] , A.T)) + dQ
dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
else:
dm_pred = None
dP_pred = None
ret = (P_pred, S_new, Vh.T)
return m_pred, ret, dm_pred, dP_pred
@cython.boundscheck(False)
def _kalman_update_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m, tuple p_P,
Measurement_Callables_Cython p_measurement_callables,
np.ndarray[DTYPE_t, ndim=2] measurement,
bint calc_log_likelihood= False,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Input:
k: int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
m_P: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (P_pred, S, V)
Covariance matrix from the prediction step and its SVD decomposition.
P_pred = V * S * V.T The tuple is (P_pred, S, V)
p_h: function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_R: function (k). Returns noise matrix of measurement equation
on iteration k.
k (iteration number). starts at 0
p_f_iRsr: function (k). Returns the square root of the noise matrix of
measurement equation on iteration k.
k (iteration number). starts at 0
measurement: (measurement_dim, time_series_no) matrix
One measurement used on the current update step. For
"multiple time series mode" it is matrix, second dimension of
which correspond to different time series.
calc_log_likelihood: boolean
Whether to calculate marginal likelihood of the state-space model.
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the prediction step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: array
Covariance derivatives from the prediction step.
grad_calc_params_2: List or None
List with derivatives. The first component is 'f_dH' - function(k)
which returns the derivative of H. The second element is 'f_dR'
- function(k). Function which returns the derivative of R.
Output:
----------------------------
m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
Results of the prediction steps.
log_likelihood_update: double or 1D array
Update to the log_likelihood from this step
d_log_likelihood_update: (grad_params_no, time_series_no) matrix
Update to the gradient of log_likelihood, "multiple time series mode"
adds extra columns to the gradient.
"""
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_m # from prediction step
#P_pred,S_pred,V_pred = p_P # from prediction step
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_pred = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_pred = p_P[2]
cdef np.ndarray[DTYPE_t, ndim=2] H = p_measurement_callables.Hk(k, m_pred, P_pred)
cdef np.ndarray[DTYPE_t, ndim=2] R = p_measurement_callables.Rk(k)
cdef np.ndarray[DTYPE_t, ndim=2] R_isr =p_measurement_callables.R_isrk(k) # square root of the inverse of R matrix
cdef int time_series_no = p_m.shape[1] # number of time serieses
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update # log_likelihood_update=None;
# Update step (only if there is data)
#if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
cdef np.ndarray[DTYPE_t, ndim=2] v = measurement-p_measurement_callables.f_h(k, m_pred, H)
cdef np.ndarray[DTYPE_t, ndim=2] svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) )
res = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
#(U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_svd = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# P_upd = U_upd S_upd**2 U_upd.T
cdef np.ndarray[DTYPE_t, ndim=2] U_upd = np.dot(V_pred, Vh.T)
cdef np.ndarray[DTYPE_t, ndim=1] S_upd = (1.0/S_svd)**2
cdef np.ndarray[DTYPE_t, ndim=2] P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance
#P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step
# stil need to compute S and K for derivative computation
cdef np.ndarray[DTYPE_t, ndim=2] S = H.dot(P_pred).dot(H.T) + R
cdef np.ndarray[DTYPE_t, ndim=2] K
cdef bint measurement_dim_gt_one = False
if measurement.shape[0]==1: # measurements are one dimensional
if (S < 0):
raise ValueError("Kalman Filter Update SVD: S is negative step %i" % k )
#import pdb; pdb.set_trace()
K = P_pred.dot(H.T) / S
if calc_log_likelihood:
log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
v*v / S)
#log_likelihood_update = log_likelihood_update[0,0] # to make int
if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
raise ValueError("Nan values in likelihood update!")
else:
log_likelihood_update = None
#LL = None; islower = None
else:
measurement_dim_gt_one = True
raise ValueError("""Measurement dimension larger then 1 is currently not supported""")
# Old method of computing updated covariance (for testing) ->
#P_upd_tst = K.dot(S).dot(K.T)
#P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
#P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric
# Old method of computing updated covariance (for testing) <-
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd # dm_upd=None;
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd # dP_upd=None;
cdef np.ndarray[DTYPE_t, ndim=2] d_log_likelihood_update # d_log_likelihood_update=None
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred_all_params
cdef int param_number
cdef np.ndarray[DTYPE_t, ndim=3] dH_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dR_all_params
cdef int param
cdef np.ndarray[DTYPE_t, ndim=2] dH, dR, dm_pred, dP_pred, dv, dS, tmp1, tmp2, tmp3, dK, tmp5
cdef tuple ret
if calc_grad_log_likelihood:
dm_pred_all_params = p_dm # derivativas of the prediction phase
dP_pred_all_params = p_dP
param_number = p_dP.shape[2]
dH_all_params = p_measurement_callables.dHk(k)
dR_all_params = p_measurement_callables.dRk(k)
dm_upd = np.empty((dm_pred_all_params.shape[0], dm_pred_all_params.shape[1], dm_pred_all_params.shape[2]), dtype = DTYPE)
dP_upd = np.empty((dP_pred_all_params.shape[0], dP_pred_all_params.shape[1], dP_pred_all_params.shape[2]), dtype = DTYPE)
# firts dimension parameter_no, second - time series number
d_log_likelihood_update = np.empty((param_number,time_series_no), dtype = DTYPE)
for param in range(param_number):
dH = dH_all_params[:,:,param]
dR = dR_all_params[:,:,param]
dm_pred = dm_pred_all_params[:,:,param]
dP_pred = dP_pred_all_params[:,:,param]
# Terms in the likelihood derivatives
dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred)
dS = np.dot(dH, np.dot( P_pred, H.T))
dS += dS.T
dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
# TODO: maybe symmetrize dS
tmp1 = H.T / S
tmp2 = dH.T / S
tmp3 = dS.T / S
dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
np.dot( P_pred, np.dot( tmp1, tmp3 ) )
# terms required for the next step, save this for each parameter
dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
dP_upd[:,:,param] += dP_upd[:,:,param].T
dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
# computing the likelihood change for each parameter:
tmp5 = v / S
d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
# Compute the actual updates for mean of the states. Variance update
# is computed earlier.
else:
dm_upd = None
dP_upd = None
d_log_likelihood_update = None
m_upd = m_pred + K.dot( v )
ret = (P_upd,S_upd,U_upd)
return m_upd, ret, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
@cython.boundscheck(False)
def _cont_discr_kalman_filter_raw_Cython(int state_dim, Dynamic_Callables_Cython p_dynamic_callables,
Measurement_Callables_Cython p_measurement_callables, X, Y,
np.ndarray[DTYPE_t, ndim=2] m_init=None, np.ndarray[DTYPE_t, ndim=2] P_init=None,
p_kalman_filter_type='regular',
bint calc_log_likelihood=False,
bint calc_grad_log_likelihood=False,
int grad_params_no=0,
np.ndarray[DTYPE_t, ndim=3] dm_init=None,
np.ndarray[DTYPE_t, ndim=3] dP_init=None):
cdef int steps_no = Y.shape[0] # number of steps in the Kalman Filter
cdef int time_series_no = Y.shape[2] # multiple time series mode
# Allocate space for results
# Mean estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] M = np.empty(((steps_no+1),state_dim,time_series_no), dtype=DTYPE)
M[0,:,:] = m_init # Initialize mean values
# Variance estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] P = np.empty(((steps_no+1),state_dim,state_dim))
P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
P[0,:,:] = P_init # Initialize initial covariance matrix
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
U,S,Vh = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
cdef tuple P_upd = (P_init, S,U)
#log_likelihood = 0
#grad_log_likelihood = np.zeros((grad_params_no,1))
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood = np.zeros((1, time_series_no), dtype = DTYPE) #if calc_log_likelihood else None
cdef np.ndarray[DTYPE_t, ndim=2] grad_log_likelihood = np.zeros((grad_params_no, time_series_no), dtype = DTYPE) #if calc_grad_log_likelihood else None
#setting initial values for derivatives update
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd = dm_init
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd = dP_init
# Main loop of the Kalman filter
cdef np.ndarray[DTYPE_t, ndim=2] prev_mean, k_measurment
cdef np.ndarray[DTYPE_t, ndim=2] m_pred, m_upd
cdef tuple P_pred
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred, dP_pred
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update, d_log_likelihood_update
cdef int k
#print "Hi I am cython"
for k in range(0,steps_no):
# In this loop index for new estimations is (k+1), old - (k)
# This happened because initial values are stored at 0-th index.
#import pdb; pdb.set_trace()
prev_mean = M[k,:,:] # mean from the previous step
m_pred, P_pred, dm_pred, dP_pred = \
_kalman_prediction_step_SVD_Cython(k, prev_mean ,P_upd, p_dynamic_callables,
calc_grad_log_likelihood, dm_upd, dP_upd)
k_measurment = Y[k,:,:]
if (np.any(np.isnan(k_measurment)) == False):
# if np.any(np.isnan(k_measurment)):
# raise ValueError("Nan measurements are currently not supported")
m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
_kalman_update_step_SVD_Cython(k, m_pred , P_pred, p_measurement_callables,
k_measurment, calc_log_likelihood=calc_log_likelihood,
calc_grad_log_likelihood=calc_grad_log_likelihood,
p_dm = dm_pred, p_dP = dP_pred)
else:
if not np.all(np.isnan(k_measurment)):
raise ValueError("""Nan measurements are currently not supported if
they are intermixed with not NaN measurements""")
else:
m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
if calc_log_likelihood:
log_likelihood_update = np.zeros((1,time_series_no))
if calc_grad_log_likelihood:
d_log_likelihood_update = np.zeros((grad_params_no,time_series_no))
if calc_log_likelihood:
log_likelihood += log_likelihood_update
if calc_grad_log_likelihood:
grad_log_likelihood += d_log_likelihood_update
M[k+1,:,:] = m_upd # separate mean value for each time series
P[k+1,:,:] = P_upd[0]
return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,432 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import linalg
from scipy import stats
from ..core import Model
from .. import kern
#from GPy.plotting.matplot_dep.models_plots import gpplot
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
#from GPy.plotting.matplot_dep import Tango
#import pylab as pb
from GPy.core.parameterization.param import Param
import GPy
from .. import likelihoods
from . import state_space_main as ssm
from . import state_space_setup as ss_setup
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1:
X = np.atleast_2d(X).T
self.num_data, input_dim = X.shape
if len(Y.shape) == 1:
Y = np.atleast_2d(Y).T
assert input_dim==1, "State space methods are only for 1D data"
if len(Y.shape)==2:
num_data_Y, self.output_dim = Y.shape
ts_number = None
elif len(Y.shape)==3:
num_data_Y, self.output_dim, ts_number = Y.shape
self.ts_number = ts_number
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods are for single outputs only"
self.kalman_filter_type = kalman_filter_type
#self.kalman_filter_type = 'svd' # temp test
ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace()
global ssm
#from . import state_space_main as ssm
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
reload(ssm)
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var)
# Default kernel
if kernel is None:
raise ValueError("State-Space Model: the kernel must be provided.")
else:
self.kern = kernel
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.posterior = None
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
#np.set_printoptions(16)
#print(self.param_array)
#import pdb; pdb.set_trace()
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters
measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
# add measurement noise as a parameter and get the gradient matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t
# The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim)
# Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf
grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0
kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered.
Y = self.Y
if self.ts_number is None:
Y.shape = (self.num_data,1)
else:
Y.shape = (self.num_data,1,self.ts_number)
(filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
if np.any( np.isfinite(log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the log_likelihood")
if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood")
#print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False):
"""
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
Input:
---------------------
Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order
to use only the parameters of the model).
filteronly: bool
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
Output:
--------------------
m: vector
Mean prediction
V: vector
Variance in every point
"""
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
if Xnew is not None:
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
predict_only_training = False
else:
X = self.X
Y = Ynew
predict_only_training = True
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index] # TODO they are not used
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
#Y = self.Y[:, 0,0]
# Run the Kalman filter
#import pdb; pdb.set_trace()
kalman_filter_type = self.kalman_filter_type
(M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type,
calc_log_likelihood=False,
calc_grad_log_likelihood=False)
# (filter_means, filter_covs, log_likelihood,
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
# calc_grad_log_likelihood=True,
# grad_params_no=grad_params_no,
# grad_calc_params=grad_calc_params)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values
M = M[1:,:,:]
P = P[1:,:,:]
# Put the data back in the original order
M = M[return_inverse,:,:]
P = P[return_inverse,:,:]
# Only return the values for Xnew
if not predict_only_training:
M = M[self.num_data:,:,:]
P = P[self.num_data:,:,:]
# Calculate the mean and variance
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state
return (m, V)
def predict(self, Xnew=None, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += float(self.Gaussian_noise.variance)
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V)
# Return mean and variance
return (m, V, lower, upper)
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5)):
mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
#
# # Deal with optional parameters
# if ax is None:
# fig = pb.figure(num=fignum)
# ax = fig.add_subplot(111)
#
# # Define the frame on which to plot
# resolution = resolution or 200
# Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
#
# # Make a prediction on the frame and plot it
# if plot_raw:
# m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
# lower = m - 2*np.sqrt(v)
# upper = m + 2*np.sqrt(v)
# Y = self.Y
# else:
# m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
# Y = self.Y
#
# # Plot the values
# gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
# ax.plot(self.X, self.Y, 'kx', mew=1.5)
#
# # Optionally plot some samples
# if samples:
# if plot_raw:
# Ysim = self.posterior_samples_f(Xgrid, samples)
# else:
# Ysim = self.posterior_samples(Xgrid, samples)
# for yi in Ysim.T:
# ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
#
# # Set the limits of the plot to some sensible values
# ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
# ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
#
# def prior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Allocate space for results
# Y = np.empty((size,X.shape[0]))
#
# # Simulate random draws
# #for j in range(0,size):
# # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
# Y = self.simulate(F,L,Qc,Pinf,X.T,size)
#
# # Only observations
# Y = np.tensordot(H[0],Y,(0,0))
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return trajectory
# return Y.T
#
# def posterior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Run smoother on original data
# (m,V) = self.predict_raw(X)
#
# # Simulate random draws from the GP prior
# y = self.prior_samples_f(np.vstack((self.X, X)),size)
#
# # Allocate space for sample trajectories
# Y = np.empty((size,X.shape[0]))
#
# # Run the RTS smoother on each of these values
# for j in range(0,size):
# yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
# (m2,V2) = self.predict_raw(X,Ynew=yobs)
# Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return posterior sample trajectories
# return Y.T
#
# def posterior_samples(self, X, size=10):
#
# # Make samples of f
# Y = self.posterior_samples_f(X,size)
#
# # Add noise
# Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
#
# # Return trajectory
# return Y
#
#
# def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model
#
# # Allocate space for results
# f = np.zeros((F.shape[0],size,X.shape[1]))
#
# # Initial state
# f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
#
# # Time step lengths
# dt = np.empty(X.shape)
# dt[:,0] = X[:,1]-X[:,0]
# dt[:,1:] = np.diff(X)
#
# # Solve the LTI SDE for these time steps
# As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
#
# # Sweep through remaining time points
# for k in range(1,X.shape[1]):
#
# # Form discrete-time model
# A = As[:,:,index[1-k]]
# Q = Qs[:,:,index[1-k]]
#
# # Draw the state
# f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
#
# # Return values
# return f

View file

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
This module is intended for the setup of state_space_main module.
The need of this module appeared because of the way state_space_main module
connected with cython code.
"""
use_cython = False

View file

@ -235,8 +235,6 @@ def plot_density(self, plot_limits=None, fixed_inputs=None,
Give the Y_metadata in the predict_kw if you need it. Give the Y_metadata in the predict_kw if you need it.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array :type plot_limits: np.array
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v.

View file

@ -1,5 +1,5 @@
#=============================================================================== #===============================================================================
# Copyright (c) 2015, Max Zwiessele # Copyright (c) 2016, Max Zwiessele, Alan saul
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -117,3 +117,42 @@ def align_subplot_array(axes,xlim=None, ylim=None):
ax.set_xticks([]) ax.set_xticks([])
else: else:
removeUpperTicks(ax) removeUpperTicks(ax)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
:param model: model
:type model: Model
:param non_fixed_inputs: dimensions of non fixed inputs
:type non_fixed_inputs: list
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
:type fix_routine: string
:param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
:type as_list: boolean
"""
from ...inference.latent_function_inference.posterior import VariationalPosterior
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':
f_inputs.append( (i, np.mean(X[:,i])) )
if fix_routine == 'median':
f_inputs.append( (i, np.median(X[:,i])) )
else: # set to zero zero
f_inputs.append( (i, 0) )
if not as_list:
X[:,i] = f_inputs[-1][1]
if as_list:
return f_inputs
else:
return X

View file

@ -0,0 +1,353 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Testing state space related functions.
"""
import unittest
import numpy as np
import GPy
import GPy.models.state_space_model as SS_model
from .state_space_main_tests import generate_x_points, generate_sine_data, \
generate_linear_data, generate_brownian_data, generate_linear_plus_sin
#from state_space_main_tests import generate_x_points, generate_sine_data, \
# generate_linear_data, generate_brownian_data, generate_linear_plus_sin
class StateSpaceKernelsTests(np.testing.TestCase):
def setUp(self):
pass
def run_for_model(self, X, Y, ss_kernel, kalman_filter_type = 'regular',
use_cython=False, check_gradients=True,
optimize=True, optimize_max_iters=1000,predict_X=None,
compare_with_GP=True, gp_kernel=None,
mean_compare_decimal=10, var_compare_decimal=7):
m1 = SS_model.StateSpace(X,Y, ss_kernel,
kalman_filter_type=kalman_filter_type,
use_cython=use_cython)
if check_gradients:
self.assertTrue(m1.checkgrad())
if optimize:
m1.optimize(optimizer='lbfgsb',max_iters=optimize_max_iters)
if compare_with_GP and (predict_X is None):
predict_X = X
if (predict_X is not None):
x_pred_reg_1 = m1.predict(predict_X)
x_quant_reg_1 = m1.predict_quantiles(predict_X)
if compare_with_GP:
m2 = GPy.models.GPRegression(X,Y, gp_kernel)
m2.optimize(optimizer='lbfgsb', max_iters=optimize_max_iters)
#print(m2)
x_pred_reg_2 = m2.predict(predict_X)
x_quant_reg_2 = m2.predict_quantiles(predict_X)
# Test values
#print np.max(np.abs(x_pred_reg_1[0]-x_pred_reg_2[0]))
np.testing.assert_almost_equal(np.max(np.abs(x_pred_reg_1[0]- \
x_pred_reg_2[0])), 0, decimal=mean_compare_decimal)
# Test variances
#print np.max(np.abs(x_pred_reg_1[1]-x_pred_reg_2[1]))
np.testing.assert_almost_equal(np.max(np.abs(x_pred_reg_1[1]- \
x_pred_reg_2[1])), 0, decimal=var_compare_decimal)
def test_Matern32_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Matern32(1,active_dims=[0,])
gp_kernel = GPy.kern.Matern32(1,active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
compare_with_GP=True,
gp_kernel=gp_kernel,
mean_compare_decimal=10, var_compare_decimal=7)
def test_Matern52_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Matern52(1,active_dims=[0,])
gp_kernel = GPy.kern.Matern52(1,active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
optimize = True, predict_X=X,
compare_with_GP=True, gp_kernel=gp_kernel,
mean_compare_decimal=8, var_compare_decimal=7)
def test_RBF_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_RBF(1,active_dims=[0,])
gp_kernel = GPy.kern.RBF(1,active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=1, var_compare_decimal=1)
def test_periodic_kernel(self,):
np.random.seed(322) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
ss_kernel.lengthscale.constrain_bounded(0.27, 1000)
ss_kernel.period.constrain_bounded(0.17, 100)
gp_kernel = GPy.kern.StdPeriodic(1,active_dims=[0,])
gp_kernel.lengthscale.constrain_bounded(0.27, 1000)
gp_kernel.period.constrain_bounded(0.17, 100)
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=3, var_compare_decimal=3)
def test_quasi_periodic_kernel(self,):
np.random.seed(329) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Matern32(1)*GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
ss_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
ss_kernel.std_periodic.period.constrain_bounded(0.15, 100)
gp_kernel = GPy.kern.Matern32(1)*GPy.kern.StdPeriodic(1,active_dims=[0,])
gp_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
gp_kernel.std_periodic.period.constrain_bounded(0.15, 100)
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=1, var_compare_decimal=2)
def test_linear_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_linear_data(x_points=None, tangent=2.0, add_term=20.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Linear(1,X,active_dims=[0,]) + GPy.kern.sde_Bias(1, active_dims=[0,])
gp_kernel = GPy.kern.Linear(1, active_dims=[0,]) + GPy.kern.Bias(1, active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients= False,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=5, var_compare_decimal=5)
def test_brownian_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_brownian_data(x_points=None, kernel_var=2.0, noise_var = 0.1,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Brownian()
gp_kernel = GPy.kern.Brownian()
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=10, var_compare_decimal=7)
def test_exponential_kernel(self,):
np.random.seed(234) # seed the random number generator
(X,Y) = generate_linear_data(x_points=None, tangent=1.0, add_term=20.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_Exponential(1, active_dims=[0,])
gp_kernel = GPy.kern.Exponential(1, active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=5, var_compare_decimal=6)
def test_kernel_addition(self,):
#np.random.seed(329) # seed the random number generator
np.random.seed(333)
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=5.0, noise_var=2.0,
plot = False, points_num=100, x_interval = (0, 40), random=True)
(X1,Y1) = generate_linear_data(x_points=X, tangent=1.0, add_term=20.0, noise_var=0.0,
plot = False, points_num=100, x_interval = (0, 40), random=True)
# Sine data <-
Y = Y + Y1
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
def get_new_kernels():
ss_kernel = GPy.kern.sde_Linear(1,X) + GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
ss_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
ss_kernel.std_periodic.period.constrain_bounded(3, 8)
gp_kernel = GPy.kern.Linear(1) + GPy.kern.StdPeriodic(1,active_dims=[0,])
gp_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
gp_kernel.std_periodic.period.constrain_bounded(3, 8)
return ss_kernel, gp_kernel
# Cython is available only with svd.
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'svd',
use_cython=True, optimize_max_iters=10, check_gradients=False,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=5, var_compare_decimal=5)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'regular',
use_cython=False, optimize_max_iters=10, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=5, var_compare_decimal=5)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'svd',
use_cython=False, optimize_max_iters=10, check_gradients=False,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=5, var_compare_decimal=5)
def test_kernel_multiplication(self,):
np.random.seed(329) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
def get_new_kernels():
ss_kernel = GPy.kern.sde_Matern32(1)*GPy.kern.sde_Matern52(1)
gp_kernel = GPy.kern.Matern32(1)*GPy.kern.sde_Matern52(1)
return ss_kernel, gp_kernel
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'svd',
use_cython=True, optimize_max_iters=10, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=-1, var_compare_decimal=-1)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'regular',
use_cython=False, optimize_max_iters=10, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=-1, var_compare_decimal=-1)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X, Y, ss_kernel, kalman_filter_type = 'svd',
use_cython=False, optimize_max_iters=10, check_gradients=True,
predict_X=X,
gp_kernel=gp_kernel,
mean_compare_decimal=-1, var_compare_decimal=0)
def test_forecast(self,):
"""
Test time-series forecasting.
"""
# Generate data ->
np.random.seed(339) # seed the random number generator
#import pdb; pdb.set_trace()
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=5.0, noise_var=2.0,
plot = False, points_num=100, x_interval = (0, 40), random=True)
(X1,Y1) = generate_linear_data(x_points=X, tangent=1.0, add_term=20.0, noise_var=0.0,
plot = False, points_num=100, x_interval = (0, 40), random=True)
Y = Y + Y1
X_train = X[X <= 20]
Y_train = Y[X <= 20]
X_test = X[X > 20]
Y_test = Y[X > 20]
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
X_train.shape = (X_train.shape[0],1); Y_train.shape = (Y_train.shape[0],1)
X_test.shape = (X_test.shape[0],1); Y_test.shape = (Y_test.shape[0],1)
# Generate data <-
#import pdb; pdb.set_trace()
def get_new_kernels():
periodic_kernel = GPy.kern.StdPeriodic(1,active_dims=[0,])
gp_kernel = GPy.kern.Linear(1, active_dims=[0,]) + GPy.kern.Bias(1, active_dims=[0,]) + periodic_kernel
gp_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
gp_kernel.std_periodic.period.constrain_bounded(0.15, 100)
periodic_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
ss_kernel = GPy.kern.sde_Linear(1,X,active_dims=[0,]) + \
GPy.kern.sde_Bias(1, active_dims=[0,]) + periodic_kernel
ss_kernel.std_periodic.lengthscale.constrain_bounded(0.25, 1000)
ss_kernel.std_periodic.period.constrain_bounded(0.15, 100)
return ss_kernel, gp_kernel
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X_train, Y_train, ss_kernel, kalman_filter_type = 'regular',
use_cython=False, optimize_max_iters=30, check_gradients=True,
predict_X=X_test,
gp_kernel=gp_kernel,
mean_compare_decimal=0, var_compare_decimal=-1)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X_train, Y_train, ss_kernel, kalman_filter_type = 'svd',
use_cython=False, optimize_max_iters=30, check_gradients=False,
predict_X=X_test,
gp_kernel=gp_kernel,
mean_compare_decimal=-1, var_compare_decimal=-1)
ss_kernel, gp_kernel = get_new_kernels()
self.run_for_model(X_train, Y_train, ss_kernel, kalman_filter_type = 'svd',
use_cython=True, optimize_max_iters=30, check_gradients=False,
predict_X=X_test,
gp_kernel=gp_kernel,
mean_compare_decimal=-1, var_compare_decimal=-1)
if __name__ == "__main__":
print("Running state-space inference tests...")
unittest.main()
#tt = StateSpaceKernelsTests('test_periodic_kernel')
#import pdb; pdb.set_trace()
#tt.test_Matern32_kernel()
#tt.test_Matern52_kernel()
#tt.test_RBF_kernel()
#tt.test_periodic_kernel()
#tt.test_quasi_periodic_kernel()
#tt.test_linear_kernel()
#tt.test_brownian_kernel()
#tt.test_exponential_kernel()
#tt.test_kernel_addition()
#tt.test_kernel_multiplication()
#tt.test_forecast()

View file

@ -148,6 +148,28 @@ class MiscTests(unittest.TestCase):
assert(gc.checkgrad()) assert(gc.checkgrad())
assert(gc2.checkgrad()) assert(gc2.checkgrad())
def test_predict_uncertain_inputs(self):
""" Projection of Gaussian through a linear function is still gaussian, and moments are analytical to compute, so we can check this case for predictions easily """
X = np.linspace(-5,5, 10)[:, None]
Y = 2*X + np.random.randn(*X.shape)*1e-3
m = GPy.models.BayesianGPLVM(Y, 1, X=X, kernel=GPy.kern.Linear(1), num_inducing=1)
m.Gaussian_noise[:] = 1e-4
m.X.mean[:] = X[:]
m.X.variance[:] = 1e-5
m.X.fix()
m.optimize()
X_pred_mu = np.random.randn(5, 1)
X_pred_var = np.random.rand(5, 1) + 1e-5
from GPy.core.parameterization.variational import NormalPosterior
X_pred = NormalPosterior(X_pred_mu, X_pred_var)
# mu = \int f(x)q(x|mu,S) dx = \int 2x.q(x|mu,S) dx = 2.mu
# S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
Y_mu_true = 2*X_pred_mu
Y_var_true = 4*X_pred_var
Y_mu_pred, Y_var_pred = m._raw_predict(X_pred)
np.testing.assert_allclose(Y_mu_true, Y_mu_pred, rtol=1e-4)
np.testing.assert_allclose(Y_var_true, Y_var_pred, rtol=1e-4)
def test_sparse_raw_predict(self): def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1) k = GPy.kern.RBF(1)
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k) m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)

View file

@ -0,0 +1,977 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Test module for state_space_main.py
"""
import unittest
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import GPy.models.state_space_setup as ss_setup
import GPy.models.state_space_main as ssm
def generate_x_points(points_num=100, x_interval = (0, 20), random=True):
"""
Function generates (sorted) points on the x axis.
Input:
---------------------------
points_num: int
How many points to generate
x_interval: tuple (a,b)
On which interval to generate points
random: bool
Regular points or random
Output:
---------------------------
x_points: np.array
Generated points
"""
x_interval = np.asarray( x_interval )
if random:
x_points = np.random.rand(points_num) * ( x_interval[1] - x_interval[0] ) + x_interval[0]
x_points = np.sort( x_points )
else:
x_points = np.linspace(x_interval[0], x_interval[1], num=points_num )
return x_points
def generate_sine_data(x_points=None, sin_period=2.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=100, x_interval = (0, 20), random=True):
"""
Function generates sinusoidal data.
Input:
--------------------------------
x_points: np.array
Previously generated X points
sin_period: float
Sine period
sin_ampl: float
Sine amplitude
noise_var: float
Gaussian noise variance added to the sine function
plot: bool
Whether to plot generated data
(if x_points is None, the the following parameters are used to generate
those. They are the same as in 'generate_x_points' function)
points_num: int
x_interval: tuple (a,b)
random: bool
"""
sin_function = lambda xx: sin_ampl * np.sin( 2*np.pi/sin_period * xx )
if x_points is None:
x_points = generate_x_points(points_num, x_interval, random)
y_points = sin_function( x_points ) + np.random.randn( len(x_points) ) * np.sqrt(noise_var)
if plot:
pass
return x_points, y_points
def generate_linear_data(x_points=None, tangent=2.0, add_term=1.0, noise_var=2.0,
plot = False, points_num=100, x_interval = (0, 20), random=True):
"""
Function generates linear data.
Input:
--------------------------------
x_points: np.array
Previously generated X points
tangent: float
Factor with which independent variable is multiplied in linear equation.
add_term: float
Additive term in linear equation.
noise_var: float
Gaussian noise variance added to the sine function
plot: bool
Whether to plot generated data
(if x_points is None, the the following parameters are used to generate
those. They are the same as in 'generate_x_points' function)
points_num: int
x_interval: tuple (a,b)
random: bool
"""
linear_function = lambda xx: tangent*xx + add_term
if x_points is None:
x_points = generate_x_points(points_num, x_interval, random)
y_points = linear_function( x_points ) + np.random.randn( len(x_points) ) * np.sqrt(noise_var)
if plot:
pass
return x_points, y_points
def generate_brownian_data(x_points=None, kernel_var = 2.0, noise_var = 2.0,
plot = False, points_num=100, x_interval = (0, 20), random=True):
"""
Generate brownian data - data from Brownian motion.
First point is always 0, and \Beta(0) = 0 - standard conditions for Brownian motion.
Input:
--------------------------------
x_points: np.array
Previously generated X points
variance: float
Gaussian noise variance added to the sine function
plot: bool
Whether to plot generated data
(if x_points is None, the the following parameters are used to generate
those. They are the same as in 'generate_x_points' function)
points_num: int
x_interval: tuple (a,b)
random: bool
"""
if x_points is None:
x_points = generate_x_points(points_num, x_interval, random)
if x_points[0] != 0:
x_points[0] = 0
y_points = np.zeros( (points_num,) )
for i in range(1, points_num):
noise = np.random.randn() * np.sqrt(kernel_var * (x_points[i] - x_points[i-1]))
y_points[i] = y_points[i-1] + noise
y_points += np.random.randn( len(x_points) ) * np.sqrt(noise_var)
return x_points, y_points
def generate_linear_plus_sin(x_points=None, tangent=2.0, add_term=1.0, noise_var=2.0,
sin_period=2.0, sin_ampl=10.0, plot = False,
points_num=100, x_interval = (0, 20), random=True):
"""
Generate the sum of linear trend and the sine function.
For parameters see the 'generate_linear' and 'generate_sine'.
Comment: Gaussian noise variance is added only once (for linear function).
"""
x_points, y_linear_points = generate_linear_data(x_points, tangent, add_term, noise_var,
False, points_num, x_interval, random)
x_points, y_sine_points = generate_sine_data(x_points, sin_period, sin_ampl, 0.0,
False, points_num, x_interval, random)
y_points = y_linear_points + y_sine_points
if plot:
pass
return x_points, y_points
def generate_random_y_data(samples, dim, ts_no):
"""
Generate data:
Input:
------------------
samples - how many samples
dim - dimensionality of the data
ts_no - number of time series
Output:
--------------------------
Y: np.array((samples, dim, ts_no))
"""
Y = np.empty((samples, dim, ts_no));
for i in range(0,samples):
for j in range(0,ts_no):
sample = np.random.randn(dim)
Y[i,:,j] = sample
if (Y.shape[2] == 1): # ts_no = 1
Y.shape=(Y.shape[0], Y.shape[1])
return Y
class StateSpaceKernelsTests(np.testing.TestCase):
def setUp(self):
pass
def run_descr_model(self, measurements, A,Q,H,R, true_states=None,
mean_compare_decimal=8,
m_init=None, P_init=None, dA=None,dQ=None,
dH=None,dR=None, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True):
#import pdb; pdb.set_trace()
state_dim = 1 if not isinstance(A,np.ndarray) else A.shape[0]
ts_no = 1 if (len(measurements.shape) < 3) else measurements.shape[2]
grad_params_no = None if dA is None else dA.shape[2]
ss_setup.use_cython = use_cython
global ssm
if (ssm.cython_code_available) and (ssm.use_cython != use_cython):
reload(ssm)
grad_calc_params = None
if calc_grad_log_likelihood:
grad_calc_params = {}
grad_calc_params['dA'] = dA
grad_calc_params['dQ'] = dQ
grad_calc_params['dH'] = dH
grad_calc_params['dR'] = dR
(f_mean, f_var, loglikelhood, g_loglikelhood, \
dynamic_callables_smoother) = ssm.DescreteStateSpace.kalman_filter(A, Q, H, R, measurements, index=None,
m_init=m_init, P_init=P_init, p_kalman_filter_type = kalman_filter_type,
calc_log_likelihood=calc_log_likelihood,
calc_grad_log_likelihood=calc_grad_log_likelihood,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
f_mean_squeezed = np.squeeze(f_mean[1:,:]) # exclude initial value
f_var_squeezed = np.squeeze(f_var[1:,:]) # exclude initial value
if true_states is not None:
#print np.max(np.abs(f_mean_squeezed-true_states))
np.testing.assert_almost_equal(np.max(np.abs(f_mean_squeezed- \
true_states)), 0, decimal=mean_compare_decimal)
np.testing.assert_equal(f_mean.shape, (measurements.shape[0]+1,state_dim,ts_no) )
np.testing.assert_equal(f_var.shape, (measurements.shape[0]+1,state_dim,state_dim) )
(M_smooth, P_smooth) = ssm.DescreteStateSpace.rts_smoother(state_dim, dynamic_callables_smoother, f_mean,
f_var)
return f_mean, f_var
def run_continuous_model(self, F, L, Qc, p_H, p_R, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=None, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=0, grad_calc_params=None):
#import pdb; pdb.set_trace()
state_dim = 1 if not isinstance(F,np.ndarray) else F.shape[0]
ts_no = 1 if (len(Y_data.shape) < 3) else Y_data.shape[2]
ss_setup.use_cython = use_cython
global ssm
if (ssm.cython_code_available) and (ssm.use_cython != use_cython):
reload(ssm)
(f_mean, f_var, loglikelhood, g_loglikelhood, \
dynamic_callables_smoother) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F, L, Qc, p_H, p_R,
P_inf, X_data, Y_data, index = None,
m_init=None, P_init=None,
p_kalman_filter_type='regular',
calc_log_likelihood=False,
calc_grad_log_likelihood=False,
grad_params_no=0, grad_calc_params=grad_calc_params)
f_mean_squeezed = np.squeeze(f_mean[1:,:]) # exclude initial value
f_var_squeezed = np.squeeze(f_var[1:,:]) # exclude initial value
np.testing.assert_equal(f_mean.shape, (Y_data.shape[0]+1,state_dim,ts_no))
np.testing.assert_equal(f_var.shape, (Y_data.shape[0]+1,state_dim,state_dim))
(M_smooth, P_smooth) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, f_mean, \
f_var,dynamic_callables_smoother)
return f_mean, f_var
def test_discrete_ss_first(self,plot=False):
"""
Tests discrete State-Space model - first test.
"""
np.random.seed(235) # seed the random number generator
A = 1.0 # For cython code to run properly need float input
H = 1.0
Q = 1.0
R = 1.0
steps_num = 100
# generate data ->
true_states = np.zeros((steps_num,))
init_state = 0
measurements = np.zeros((steps_num,))
for s in range(0, steps_num):
if s== 0:
true_states[0] = init_state + np.sqrt(Q)*np.random.randn()
else:
true_states[s] = true_states[s-1] + np.sqrt(R)*np.random.randn()
measurements[s] = true_states[s] + np.sqrt(R)*np.random.randn()
# generate data <-
# descrete kalman filter ->
m_init = 0; P_init = 1
d_num = 1000
state_discr = np.linspace(-10,10,d_num)
state_trans_matrix = np.empty((d_num,d_num))
for i in range(d_num):
state_trans_matrix[:,i] = norm.pdf(state_discr, loc=A*state_discr[i], scale=np.sqrt(Q))
m_prev = norm.pdf(state_discr, loc = m_init, scale = np.sqrt(P_init)); #m_prev / np.sum(m_prev)
m = np.zeros((d_num, steps_num))
i_mean = np.zeros((steps_num,))
for s in range(0, steps_num):
# Prediction step:
if (s==0):
m[:,s] = np.dot(state_trans_matrix, m_prev)
else:
m[:,s] = np.dot(state_trans_matrix, m[:,s-1])
# Update step:
#meas_ind = np.argmin(np.abs(state_discr - measurements[s])
y_vec = np.zeros( (d_num,))
for i in range(d_num):
y_vec[i] = norm.pdf(measurements[s], loc=H*state_discr[i], scale=np.sqrt(R))
norm_const = np.dot( y_vec, m[:,s] )
m[:,s] = y_vec * m[:,s] / norm_const
i_mean[s] = state_discr[ np.argmax(m[:,s]) ]
# descrete kalman filter <-
(f_mean, f_var) = self.run_descr_model(measurements, A,Q,H,R, true_states=i_mean,
mean_compare_decimal=1,
m_init=m_init, P_init=P_init,use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=False)
(f_mean, f_var) = self.run_descr_model(measurements, A,Q,H,R, true_states=i_mean,
mean_compare_decimal=1,
m_init=m_init, P_init=P_init,use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=False)
(f_mean, f_var) = self.run_descr_model(measurements, A,Q,H,R, true_states=i_mean,
mean_compare_decimal=1,
m_init=m_init, P_init=P_init,use_cython=True,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=False)
if plot:
# plotting ->
plt.figure()
plt.plot( true_states, 'g.-',label='true states')
#plt.plot( measurements, 'b.-', label='measurements')
plt.plot( f_mean, 'r.-',label='Kalman filter estimates')
plt.plot( i_mean, 'k.-', label='Discretization')
plt.plot( f_mean + 2*np.sqrt(f_var), 'r.--')
plt.plot( f_mean - 2*np.sqrt(f_var), 'r.--')
plt.legend()
plt.show()
# plotting <-
return None
def test_discrete_ss_1D(self,plot=False):
"""
This function tests Kalman filter and smoothing when the state
dimensionality is one dimensional.
"""
np.random.seed(234) # seed the random number generator
# 1D ss model
state_dim = 1;
param_num = 2 # sigma_Q, sigma_R - parameters
measurement_dim = 1 # dimensionality od measurement
A = 1.0
Q = 2.0
dA= np.zeros((state_dim,state_dim,param_num))
dQ = np.zeros((state_dim,state_dim,param_num)); dQ[0,0,0] = 1.0
# measurement related parameters (subject to change) ->
H = np.ones((measurement_dim,state_dim ))
R = 0.5 * np.eye(measurement_dim)
dH = np.zeros((measurement_dim,state_dim,param_num))
dR = np.zeros((measurement_dim,measurement_dim,param_num)); dR[:,:,1] = np.eye(measurement_dim)
# measurement related parameters (subject to change) <-
# 1D measurement, 1 ts_no ->
data = generate_random_y_data(10, 1, 1) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=True,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:]+H*f_var[1:]*H), 'b--')
plt.plot( np.squeeze(f_mean[1:]-H*f_var[1:]*H), 'b--')
# plt.plot( np.squeeze(M_sm[1:]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:]+H*P_sm[1:]*H), 'r--')
# plt.plot( np.squeeze(M_sm[1:]-H*P_sm[1:]*H), 'r--')
plt.legend()
plt.title("1D state-space, 1D measurements, 1 ts_no")
plt.show()
# plotting <-
# 1D measurement, 1 ts_no <-
# 1D measurement, 3 ts_no ->
data = generate_random_y_data(10, 1, 3) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=True,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
#import pdb; pdb.set_trace()
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data[:,:,1]), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:,0,1]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:,0,1])+np.squeeze(H*f_var[1:]*H), 'b--')
plt.plot( np.squeeze(f_mean[1:,0,1])-np.squeeze(H*f_var[1:]*H), 'b--')
# plt.plot( np.squeeze(M_sm[1:,0,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,0,1])+H*np.squeeze(P_sm[1:])*H, 'r--')
# plt.plot( np.squeeze(M_sm[1:,0,1])-H*np.squeeze(P_sm[1:])*H, 'r--')
plt.legend()
plt.title("1D state-space, 1D measurements, 3 ts_no. 2-nd ts ploted")
plt.show()
# plotting <-
# 1D measurement, 3 ts_no <-
measurement_dim = 2 # dimensionality of measurement
H = np.ones((measurement_dim,state_dim))
R = 0.5 * np.eye(measurement_dim)
dH = np.zeros((measurement_dim,state_dim,param_num))
dR = np.zeros((measurement_dim,measurement_dim,param_num)); dR[:,:,1] = np.eye(measurement_dim)
# measurement related parameters (subject to change) <
data = generate_random_y_data(10, 2, 3) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
# (f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
# mean_compare_decimal=16,
# m_init=None, P_init=None, dA=dA,dQ=dQ,
# dH=dH,dR=dR, use_cython=True,
# kalman_filter_type='svd',
# calc_log_likelihood=True,
# calc_grad_log_likelihood=True)
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data[:,0,1]), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:,0,1]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:,0,1])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot( np.squeeze(f_mean[1:,0,1])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,0,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,0,1])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,0,1])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("1D state-space, 2D measurements, 3 ts_no. 1-st measurement, 2-nd ts ploted")
plt.show()
# plotting <-
# 2D measurement, 3 ts_no <-
def test_discrete_ss_2D(self,plot=False):
"""
This function tests Kalman filter and smoothing when the state
dimensionality is two dimensional.
"""
np.random.seed(234) # seed the random number generator
# 1D ss model
state_dim = 2;
param_num = 3 # sigma_Q, sigma_R, one parameters in A - parameters
measurement_dim = 1 # dimensionality od measurement
A = np.eye(state_dim); A[0,0] = 0.5
Q = np.ones((state_dim,state_dim));
dA = np.zeros((state_dim,state_dim,param_num)); dA[1,1,2] = 1
dQ = np.zeros((state_dim,state_dim,param_num)); dQ[:,:,1] = np.eye(measurement_dim)
# measurement related parameters (subject to change) ->
H = np.ones((measurement_dim,state_dim))
R = 0.5 * np.eye(measurement_dim)
dH = np.zeros((measurement_dim,state_dim,param_num))
dR = np.zeros((measurement_dim,measurement_dim,param_num)); dR[:,:,1] = np.eye(measurement_dim)
# measurement related parameters (subject to change) <-
# 1D measurement, 1 ts_no ->
data = generate_random_y_data(10, 1, 1) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=True,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:,0]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:,0])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot( np.squeeze(f_mean[1:,0])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,0]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,0])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,0])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("2D state-space, 1D measurements, 1 ts_no")
plt.show()
# plotting <-
# 1D measurement, 1 ts_no <-
# 1D measurement, 3 ts_no ->
data = generate_random_y_data(10, 1, 3) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=True,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data[:,:,1]), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:,0,1]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:,0,1])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot( np.squeeze(f_mean[1:,0,1])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,0,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,0,1])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,0,1])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("2D state-space, 1D measurements, 3 ts_no. 2-nd ts ploted")
plt.show()
# plotting <-
# 1D measurement, 3 ts_no <-
# 2D measurement, 3 ts_no ->
# measurement related parameters (subject to change) ->
measurement_dim = 2 # dimensionality od measurement
H = np.ones((measurement_dim,state_dim))
R = 0.5 * np.eye(measurement_dim)
dH = np.zeros((measurement_dim,state_dim,param_num))
dR = np.zeros((measurement_dim,measurement_dim,param_num)); dR[:,:,1] = np.eye(measurement_dim)
# measurement related parameters (subject to change) <
data = generate_random_y_data(10, 2, 3) # np.array((samples, dim, ts_no))
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
(f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
mean_compare_decimal=16,
m_init=None, P_init=None, dA=dA,dQ=dQ,
dH=dH,dR=dR, use_cython=False,
kalman_filter_type='svd',
calc_log_likelihood=True,
calc_grad_log_likelihood=True)
# (f_mean, f_var) = self.run_descr_model(data, A,Q,H,R, true_states=None,
# mean_compare_decimal=16,
# m_init=None, P_init=None, dA=dA,dQ=dQ,
# dH=dH,dR=dR, use_cython=True,
# kalman_filter_type='svd',
# calc_log_likelihood=True,
# calc_grad_log_likelihood=True)
if plot:
# plotting ->
plt.figure()
plt.plot( np.squeeze(data[:,0,1]), 'g.-', label='measurements')
plt.plot( np.squeeze(f_mean[1:,0,1]), 'b.-',label='Kalman filter estimates')
plt.plot( np.squeeze(f_mean[1:,0,1])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot( np.squeeze(f_mean[1:,0,1])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,0,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,0,1])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,0,1])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("2D state-space, 2D measurements, 3 ts_no. 1-st measurement, 2-nd ts ploted")
plt.show()
# plotting <-
# 2D measurement, 3 ts_no <-
def test_continuos_ss(self,plot=False):
"""
This function tests the continuos state-space model.
"""
# 1D measurements, 1 ts_no ->
measurement_dim = 1 # dimensionality of measurement
X_data = generate_x_points(points_num=10, x_interval = (0, 20), random=True)
Y_data = generate_random_y_data(10, 1, 1) # np.array((samples, dim, ts_no))
try:
import GPy
except ImportError as e:
return None
periodic_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
(F,L,Qc,H,P_inf,P0, dFt,dQct,dP_inft,dP0) = periodic_kernel.sde()
state_dim = dFt.shape[0];
param_num = dFt.shape[2]
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inft
grad_calc_params['dF'] = dFt
grad_calc_params['dQc'] = dQct
grad_calc_params['dR'] = np.zeros((measurement_dim,measurement_dim,param_num))
grad_calc_params['dP_init'] = dP0
# dH matrix is None
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='rbc',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=True,
kalman_filter_type='rbc',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
if plot:
# plotting ->
plt.figure()
plt.plot( X_data, np.squeeze(Y_data[:,0]), 'g.-', label='measurements')
plt.plot( X_data, np.squeeze(f_mean[1:,15]), 'b.-',label='Kalman filter estimates')
plt.plot( X_data, np.squeeze(f_mean[1:,15])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot( X_data, np.squeeze(f_mean[1:,15])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,15]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,15])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,15])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("1D measurements, 1 ts_no")
plt.show()
# plotting <-
# 1D measurements, 1 ts_no <-
# 1D measurements, 3 ts_no ->
measurement_dim = 1 # dimensionality od measurement
X_data = generate_x_points(points_num=10, x_interval = (0, 20), random=True)
Y_data = generate_random_y_data(10, 1, 3) # np.array((samples, dim, ts_no))
periodic_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
(F,L,Qc,H,P_inf,P0, dFt,dQct,dP_inft,dP0) = periodic_kernel.sde()
state_dim = dFt.shape[0];
param_num = dFt.shape[2]
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inft
grad_calc_params['dF'] = dFt
grad_calc_params['dQc'] = dQct
grad_calc_params['dR'] = np.zeros((measurement_dim,measurement_dim,param_num))
grad_calc_params['dP_init'] = dP0
# dH matrix is None
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='rbc',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, 1.5, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=True,
kalman_filter_type='rbc',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
if plot:
# plotting ->
plt.figure()
plt.plot(X_data, np.squeeze(Y_data[:,0,1]), 'g.-', label='measurements')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1]), 'b.-',label='Kalman filter estimates')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,15,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,15,1])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,15,1])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("1D measurements, 3 ts_no. 2-nd ts ploted")
plt.show()
# plotting <-
# 1D measurements, 3 ts_no <-
# 2D measurements, 3 ts_no ->
measurement_dim = 2 # dimensionality od measurement
X_data = generate_x_points(points_num=10, x_interval = (0, 20), random=True)
Y_data = generate_random_y_data(10, 2, 3) # np.array((samples, dim, ts_no))
periodic_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
(F,L,Qc,H,P_inf,P0, dFt,dQct,dP_inft,dP0) = periodic_kernel.sde()
H = np.vstack((H,H)) # make 2D measurements
R = 1.5 * np.eye(measurement_dim)
state_dim = dFt.shape[0];
param_num = dFt.shape[2]
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inft
grad_calc_params['dF'] = dFt
grad_calc_params['dQc'] = dQct
grad_calc_params['dR'] = np.zeros((measurement_dim,measurement_dim,param_num))
grad_calc_params['dP_init'] = dP0
# dH matrix is None
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, R, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='regular',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
(f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, R, P_inf, X_data, Y_data, index = None,
m_init=None, P_init=P0, use_cython=False,
kalman_filter_type='rbc',
calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=param_num, grad_calc_params=grad_calc_params)
# (f_mean, f_var) = self.run_continuous_model(F, L, Qc, H, R, P_inf, X_data, Y_data, index = None,
# m_init=None, P_init=P0, use_cython=True,
# kalman_filter_type='rbc',
# calc_log_likelihood=True,
# calc_grad_log_likelihood=True,
# grad_params_no=param_num, grad_calc_params=grad_calc_params)
if plot:
# plotting ->
plt.figure()
plt.plot(X_data, np.squeeze(Y_data[:,0,1]), 'g.-', label='measurements')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1]), 'b.-',label='Kalman filter estimates')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1])+np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
plt.plot(X_data, np.squeeze(f_mean[1:,15,1])-np.einsum('ij,ajk,kl', H, f_var[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_sm[1:,15,1]), 'r.-',label='Smoother Estimates')
# plt.plot( np.squeeze(M_sm[1:,15,1])+np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
# plt.plot( np.squeeze(M_sm[1:,15,1])-np.einsum('ij,ajk,kl', H, P_sm[1:], H.T)[:,0,0], 'r--')
plt.legend()
plt.title("1D measurements, 3 ts_no. 2-nd ts ploted")
plt.show()
# plotting <-
# 2D measurements, 3 ts_no <-
#def test_EM_gradient(plot=False):
# """
# Test EM gradient calculation. This method works (the formulas are such)
# that it works only for time invariant matrices A, Q, H, R. For the continuous
# model it means that time intervals are the same.
# """
#
# np.random.seed(234) # seed the random number generator
#
# # 1D measurements, 1 ts_no ->
# measurement_dim = 1 # dimensionality of measurement
#
# x_data = generate_x_points(points_num=10, x_interval = (0, 20), random=False)
# data = generate_random_y_data(10, 1, 1) # np.array((samples, dim, ts_no))
#
# import GPy
# #periodic_kernel = GPy.kern.sde_Matern32(1,active_dims=[0,])
# periodic_kernel = GPy.kern.sde_StdPeriodic(1,active_dims=[0,])
# (F,L,Qc,H,P_inf,P0, dFt,dQct,dP_inft,dP0t) = periodic_kernel.sde()
#
# state_dim = dFt.shape[0];
# param_num = dFt.shape[2]
#
# grad_calc_params = {}
# grad_calc_params['dP_inf'] = dP_inft
# grad_calc_params['dF'] = dFt
# grad_calc_params['dQc'] = dQct
# grad_calc_params['dR'] = np.zeros((measurement_dim,measurement_dim,param_num))
# grad_calc_params['dP_init'] = dP0t
# # dH matrix is None
#
#
# #(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
# # Use the Kalman filter to evaluate the likelihood
#
# #import pdb; pdb.set_trace()
# (M_kf, P_kf, log_likelihood,
# grad_log_likelihood,SmootherMatrObject) = ss.ContDescrStateSpace.cont_discr_kalman_filter(F,
# L, Qc, H, 1.5, P_inf, x_data, data, m_init=None,
# P_init=P0, calc_log_likelihood=True,
# calc_grad_log_likelihood=True,
# grad_params_no=param_num,
# grad_calc_params=grad_calc_params)
#
# if plot:
# # plotting ->
# plt.figure()
# plt.plot( np.squeeze(data[:,0]), 'g.-', label='measurements')
# plt.plot( np.squeeze(M_kf[1:,15]), 'b.-',label='Kalman filter estimates')
# plt.plot( np.squeeze(M_kf[1:,15])+np.einsum('ij,ajk,kl', H, P_kf[1:], H.T)[:,0,0], 'b--')
# plt.plot( np.squeeze(M_kf[1:,15])-np.einsum('ij,ajk,kl', H, P_kf[1:], H.T)[:,0,0], 'b--')
# plt.title("1D measurements, 1 ts_no")
# plt.show()
# # plotting <-
# # 1D measurements, 1 ts_no <-
if __name__ == '__main__':
print("Running state-space inference tests...")
unittest.main()
#tt = StateSpaceKernelsTests('test_discrete_ss_first')
#res = tt.test_discrete_ss_first(plot=True)
#res = tt.test_discrete_ss_1D(plot=True)
#res = tt.test_discrete_ss_2D(plot=False)
#res = tt.test_continuos_ss(plot=True)

View file

@ -1,5 +1,5 @@
#=============================================================================== #===============================================================================
# Copyright (c) 2016, Max Zwiessele # Copyright (c) 2016, Max Zwiessele, Alan Saul
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -47,3 +47,52 @@ class TestDebug(unittest.TestCase):
array = np.random.normal(0, 1, (25,25)) array = np.random.normal(0, 1, (25,25))
self.assertTrue(checkFullRank(tdot(array))) self.assertTrue(checkFullRank(tdot(array)))
def test_fixed_inputs_median(self):
""" test fixed_inputs convenience function """
from GPy.plotting.matplot_dep.util import fixed_inputs
import GPy
X = np.random.randn(10, 3)
Y = np.sin(X) + np.random.randn(10, 3)*1e-3
m = GPy.models.GPRegression(X, Y)
fixed = fixed_inputs(m, [1], fix_routine='median', as_list=True, X_all=False)
self.assertTrue((0, np.median(X[:,0])) in fixed)
self.assertTrue((2, np.median(X[:,2])) in fixed)
self.assertTrue(len([t for t in fixed if t[0] == 1]) == 0) # Unfixed input should not be in fixed
def test_fixed_inputs_mean(self):
from GPy.plotting.matplot_dep.util import fixed_inputs
import GPy
X = np.random.randn(10, 3)
Y = np.sin(X) + np.random.randn(10, 3)*1e-3
m = GPy.models.GPRegression(X, Y)
fixed = fixed_inputs(m, [1], fix_routine='mean', as_list=True, X_all=False)
self.assertTrue((0, np.mean(X[:,0])) in fixed)
self.assertTrue((2, np.mean(X[:,2])) in fixed)
self.assertTrue(len([t for t in fixed if t[0] == 1]) == 0) # Unfixed input should not be in fixed
def test_fixed_inputs_zero(self):
from GPy.plotting.matplot_dep.util import fixed_inputs
import GPy
X = np.random.randn(10, 3)
Y = np.sin(X) + np.random.randn(10, 3)*1e-3
m = GPy.models.GPRegression(X, Y)
fixed = fixed_inputs(m, [1], fix_routine='zero', as_list=True, X_all=False)
self.assertTrue((0, 0.0) in fixed)
self.assertTrue((2, 0.0) in fixed)
self.assertTrue(len([t for t in fixed if t[0] == 1]) == 0) # Unfixed input should not be in fixed
def test_fixed_inputs_uncertain(self):
from GPy.plotting.matplot_dep.util import fixed_inputs
import GPy
from GPy.core.parameterization.variational import NormalPosterior
X_mu = np.random.randn(10, 3)
X_var = np.random.randn(10, 3)
X = NormalPosterior(X_mu, X_var)
Y = np.sin(X_mu) + np.random.randn(10, 3)*1e-3
m = GPy.models.BayesianGPLVM(Y, X=X_mu, X_variance=X_var, input_dim=3)
fixed = fixed_inputs(m, [1], fix_routine='median', as_list=True, X_all=False)
self.assertTrue((0, np.median(X.mean.values[:,0])) in fixed)
self.assertTrue((2, np.median(X.mean.values[:,2])) in fixed)
self.assertTrue(len([t for t in fixed if t[0] == 1]) == 0) # Unfixed input should not be in fixed

View file

@ -94,6 +94,10 @@ ext_mods = [Extension(name='GPy.kern.src.stationary_cython',
Extension(name='GPy.kern.src.coregionalize_cython', Extension(name='GPy.kern.src.coregionalize_cython',
sources=['GPy/kern/src/coregionalize_cython.c'], sources=['GPy/kern/src/coregionalize_cython.c'],
include_dirs=[np.get_include(),'.'], include_dirs=[np.get_include(),'.'],
extra_compile_args=compile_flags),
Extension(name='GPy.models.state_space_cython',
sources=['GPy/models/state_space_cython.c'],
include_dirs=[np.get_include(),'.'],
extra_compile_args=compile_flags)] extra_compile_args=compile_flags)]
setup(name = 'GPy', setup(name = 'GPy',