mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 19:12:40 +02:00
1618 lines
No EOL
63 KiB
Python
1618 lines
No EOL
63 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Created on Thu Feb 19 12:32:32 2015
|
||
|
||
|
||
"""
|
||
|
||
import collections # for cheking whether a variable is iterable
|
||
import types # for cheking whether a variable is a function
|
||
|
||
import numpy as np
|
||
import scipy as sp
|
||
import scipy.linalg as linalg
|
||
|
||
class DescreteStateSpace(object):
|
||
"""
|
||
This class implents state-space inference for linear and non-linear
|
||
state-space models.
|
||
Linear models are:
|
||
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
Nonlinear:
|
||
x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
Here f_a and f_h are some functions of k (iteration number), x_{k-1} or
|
||
x_{k} (state value on certain iteration), A_{k} and H_{k} - Jacobian
|
||
matrices of f_a and f_h respectively. In the linear case they are exactly
|
||
A_{k} and H_{k}.
|
||
|
||
|
||
Currently two nonlinear Gaussian filter algorithms are implemented:
|
||
Extended Kalman Filter (EKF), Statistically linearized Filter (SLF), which
|
||
implementations are very similar.
|
||
|
||
"""
|
||
|
||
@staticmethod
|
||
def reshape_input_data(shape):
|
||
"""
|
||
Function returns the column-wise shape for for an input shape.
|
||
|
||
Inputs:
|
||
--------------
|
||
shape: tuple
|
||
Shape of an input array, so that it is always a column.
|
||
old_shape: tuple or None
|
||
If the shape has been modified, return old shape, otherwise None
|
||
"""
|
||
|
||
|
||
if (len(shape) > 2):
|
||
raise ValueError("Input array is not supposed to be 3 or more dimensional")
|
||
elif len(shape) == 1:
|
||
return ((shape[0],1), shape)
|
||
else: # len(shape) == 2
|
||
return ((shape[1],1), shape) if (shape[0] == 1) else (shape,None)
|
||
|
||
|
||
# def __init__(self, X, Y, kernel):
|
||
# """
|
||
# Inputs:
|
||
# --------------
|
||
#
|
||
# X: double array (N_samples,1)
|
||
# Time points of the observations
|
||
# Y: double array (N_samples,N_dim)
|
||
# Observations
|
||
# """
|
||
#
|
||
# # If X is 1D array make it column (x.shape[0],1)
|
||
# X.shape, old_Y_shape = DescreteStateSpace.reshape_input_data(X.shape)
|
||
# Y.shape, old_X_shape = DescreteStateSpace.reshape_input_data(Y.shape)
|
||
#
|
||
# self.num_data, input_dim = X.shape
|
||
# assert input_dim==1, "State space methods for time only"
|
||
# num_data_Y, self.output_dim = Y.shape
|
||
# assert num_data_Y == self.num_data, "Number of samples in X and Y don't match"
|
||
# assert self.output_dim == 1, "State space methods for single outputs only"
|
||
#
|
||
# # Make sure the observations are ordered in time
|
||
# sort_index = np.argsort(X[:,0])
|
||
# self.X = X[sort_index]
|
||
# self.Y = Y[sort_index]
|
||
#
|
||
# # Noise variance ??? - TODO: figure out
|
||
# self.sigma2 = 1
|
||
#
|
||
# self.kern = kernel
|
||
#
|
||
# # Make sure all parameters are positive
|
||
# #self.ensure_default_constraints()
|
||
#
|
||
# # Assert that the kernel is supported
|
||
# #assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
|
||
|
||
@classmethod
|
||
def kalman_filter(cls,p_A, p_Q, p_H, p_R, Y, index = None, m_init=None,
|
||
P_init=None, calc_log_likelihood=False,
|
||
calc_grad_log_likelihood=False, grad_params_no=None,
|
||
grad_calc_params=None):
|
||
"""
|
||
This function implements the basic (raw) Kalman Filter algorithm
|
||
These notations are assumed:
|
||
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
|
||
|
||
Currently, H_{k} and R_{k} are not supposed to change over time.
|
||
I. e. They are assumed to be constant. Later they may be done time
|
||
dependent by analogy with A_{k} and Q_{k-1}.
|
||
|
||
|
||
Input:
|
||
-----------------
|
||
|
||
p_A: scalar, square matrix, 3D array
|
||
A_{k} in the model. If matrix then A_{k} = A - constant.
|
||
If it is 3D array then A_{k} = p_A[:,:, index[k]]
|
||
|
||
p_Q: scalar, square symmetric matrix, 3D array
|
||
Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant.
|
||
If it is 3D array then Q_{k-1} = p_Q[:,:, index[k]]
|
||
p_H: scalar, matrix
|
||
H_{k} = p_H. Assumed to be constant.
|
||
|
||
p_R: scalar, square symmetric matrix
|
||
H_{k} = p_H. Assumed to be constant.
|
||
|
||
Y: matrix or vector
|
||
Data. If Y is matrix then samples are along 0-th dimension and
|
||
features along the 1-st. May have missing values.
|
||
|
||
index: vector
|
||
Which indices (on 3-rd dimension) from arrays p_A and p_Q to use
|
||
on every time step. If this parameter is None then it is assumed
|
||
that p_A and p_Q are matrices and indices are not needed.
|
||
|
||
p_mean: vector
|
||
Initial distribution mean. If None it is assumed to be zero
|
||
|
||
P_init: square symmetric matrix or scalar
|
||
Initial covariance of the states. If the parameter is scalar
|
||
then it is assumed that initial covariance matrix is unit matrix
|
||
multiplied by this scalar. If None the unit matrix is used instead.
|
||
|
||
calc_log_likelihood: boolean
|
||
Whether to calculate marginal likelihood of the state-space model.
|
||
|
||
calc_grad_log_likelihood: boolean
|
||
Whether to calculate gradient of the marginal likelihood
|
||
of the state-space model. If true then the next parameter must
|
||
provide the extra parameters for gradient calculation.
|
||
|
||
grad_params_no: int
|
||
Number of parameters
|
||
|
||
Output:
|
||
--------------
|
||
|
||
M: (state_dim, no_steps) matrix
|
||
Filter estimates of the state means
|
||
|
||
P: (state_dim, state_dim, no_steps) 3D array
|
||
Filter estimates of the state covariances
|
||
|
||
log_likelihood: double
|
||
If the parameter calc_log_likelihood was set to true, return
|
||
logarithm of marginal likelihood of the state-space model. If
|
||
the parameter was false, return None
|
||
|
||
"""
|
||
|
||
# Parameters checking ->
|
||
# index
|
||
p_A = np.atleast_1d(p_A)
|
||
p_Q = np.atleast_1d(p_Q)
|
||
if ((len(p_A.shape) >= 3) and (p_A.shape[2] != 1)) or \
|
||
((len(p_Q.shape) >= 3) and (p_Q.shape[2] != 1)):
|
||
if index is None:
|
||
raise ValueError("A and Q can not change over time if indices are not given")
|
||
else:
|
||
A_or_Q_change_over_time = True
|
||
else:
|
||
index = np.zeros((Y.shape[0],) )
|
||
A_or_Q_change_over_time = False
|
||
|
||
# p_A
|
||
(p_A, old_A_shape) = cls._check_A_matrix(p_A)
|
||
|
||
# p_Q
|
||
(p_Q,old_Q_shape) = cls._check_Q_matrix(p_Q)
|
||
|
||
# p_H
|
||
p_H = np.atleast_1d(p_H)
|
||
(p_H,old_H_shape) = cls._check_H_matrix(p_H)
|
||
|
||
# p_R
|
||
p_R = np.atleast_1d(p_R)
|
||
(p_R,old_R_shape) = cls._check_R_matrix(p_R)
|
||
|
||
state_dim = p_A.shape[0]
|
||
# m_init
|
||
if m_init is None:
|
||
m_init = np.zeros((state_dim,1))
|
||
else:
|
||
m_init = np.atleast_2d(m_init).T
|
||
|
||
# P_init
|
||
if P_init is None:
|
||
P_init = np.eye(state_dim)
|
||
elif not isinstance(P_init, collections.Iterable): #scalar
|
||
P_init = P_init*np.eye(state_dim)
|
||
|
||
# Y
|
||
Y.shape, old_Y_shape = cls.reshape_input_data(Y.shape)
|
||
if (Y.shape[0] != len(index)):
|
||
raise ValueError("Number of measurements must be equal the number of A_{k} and Q_{k}")
|
||
|
||
measurement_dim = Y.shape[1]
|
||
# Functions to pass to the kalman_filter algorithm:
|
||
# PArameters:
|
||
# k - number of Kalman filter iteration
|
||
# m - vector for calculating matrices. Required for EKF. Not used here.
|
||
fa = lambda k,m,A: np.dot(A, m)
|
||
f_A = lambda k,m,P: p_A[:,:, index[k]]
|
||
f_Q = lambda k: p_Q[:,:, index[k]]
|
||
fh = lambda k,m,H: np.dot(H, m)
|
||
f_H = lambda k,m,P: p_H
|
||
f_R = lambda k: p_R
|
||
|
||
|
||
grad_calc_params_pass_further = None
|
||
if calc_grad_log_likelihood:
|
||
if A_or_Q_change_over_time:
|
||
raise ValueError("When computing likelihood gradient A and Q can not change over time.")
|
||
|
||
f_dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA')
|
||
|
||
f_dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ')
|
||
|
||
f_dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
|
||
|
||
f_dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dR')
|
||
|
||
dm_init = grad_calc_params.get('dm_init')
|
||
if dm_init is None:
|
||
dm_init = np.zeros( (state_dim,grad_params_no) )
|
||
|
||
dP_init = grad_calc_params.get('dP_init')
|
||
if dP_init is None:
|
||
dP_init = np.zeros( (state_dim,state_dim,grad_params_no) )
|
||
|
||
grad_calc_params_pass_further = {}
|
||
grad_calc_params_pass_further['f_dA'] = f_dA
|
||
grad_calc_params_pass_further['f_dQ'] = f_dQ
|
||
grad_calc_params_pass_further['f_dH'] = f_dH
|
||
grad_calc_params_pass_further['f_dR'] = f_dR
|
||
grad_calc_params_pass_further['dm_init'] = dm_init
|
||
grad_calc_params_pass_further['dP_init'] = dP_init
|
||
|
||
(M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(state_dim, fa, f_A, f_Q, fh, f_H, f_R, Y, m_init,
|
||
P_init, calc_log_likelihood=calc_log_likelihood,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood,
|
||
grad_calc_params=grad_calc_params_pass_further)
|
||
# restore shapes so that input parameters are unchenged
|
||
if old_A_shape is not None:
|
||
p_A.shape = old_A_shape
|
||
|
||
if old_Q_shape is not None:
|
||
p_Q.shape = old_Q_shape
|
||
|
||
if old_H_shape is not None:
|
||
p_H.shape = old_H_shape
|
||
|
||
if old_R_shape is not None:
|
||
p_R.shape = old_R_shape
|
||
|
||
if old_Y_shape is not None:
|
||
Y.shape = old_Y_shape
|
||
# Return values
|
||
return (M, P,log_likelihood, grad_log_likelihood)
|
||
|
||
@classmethod
|
||
def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None,
|
||
P_init=None,calc_log_likelihood=False):
|
||
|
||
"""
|
||
Extended Kalman Filter
|
||
|
||
Input:
|
||
-----------------
|
||
|
||
p_state_dim: integer
|
||
|
||
p_a: if None - the function from the linear model is assumed. No non-
|
||
linearity in the dynamic is assumed.
|
||
|
||
function (k, x_{k-1}, A_{k}). Dynamic function.
|
||
k: (iteration number),
|
||
x_{k-1}: (previous state)
|
||
x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
|
||
|
||
p_f_A: matrix - in this case function which returns this matrix is assumed.
|
||
Look at this parameter description in kalman_filter function.
|
||
|
||
function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_a.
|
||
|
||
k: (iteration number),
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_Q: matrix. In this case function which returns this matrix is asumed.
|
||
Look at this parameter description in kalman_filter function.
|
||
|
||
function (k). Returns noise matrix of dynamic model on iteration k.
|
||
k: (iteration number).
|
||
|
||
p_h: if None - the function from the linear measurement model is assumed.
|
||
No nonlinearity in the measurement is assumed.
|
||
|
||
function (k, x_{k}, H_{k}). Measurement function.
|
||
k: (iteration number),
|
||
x_{k}: (current state)
|
||
H_{k}: Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
|
||
|
||
p_f_H: matrix - in this case function which returns this matrix is assumed.
|
||
function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_h.
|
||
k: (iteration number),
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_R: matrix. In this case function which returns this matrix is asumed.
|
||
function (k). Returns noise matrix of measurement equation
|
||
on iteration k.
|
||
k: (iteration number).
|
||
|
||
Y: matrix or vector
|
||
Data. If Y is matrix then samples are along 0-th dimension and
|
||
features along the 1-st. May have missing values.
|
||
|
||
p_mean: vector
|
||
Initial distribution mean. If None it is assumed to be zero
|
||
|
||
P_init: square symmetric matrix or scalar
|
||
Initial covariance of the states. If the parameter is scalar
|
||
then it is assumed that initial covariance matrix is unit matrix
|
||
multiplied by this scalar. If None the unit matrix is used instead.
|
||
|
||
calc_log_likelihood: boolean
|
||
Whether to calculate marginal likelihood of the state-space model.
|
||
"""
|
||
|
||
# Y
|
||
Y.shape, old_Y_shape = cls.reshape_input_data(Y.shape)
|
||
|
||
# m_init
|
||
if m_init is None:
|
||
m_init = np.zeros((p_state_dim,1))
|
||
else:
|
||
m_init = np.atleast_2d(m_init).T
|
||
|
||
# P_init
|
||
if P_init is None:
|
||
P_init = np.eye(p_state_dim)
|
||
elif not isinstance(P_init, collections.Iterable): #scalar
|
||
P_init = P_init*np.eye(p_state_dim)
|
||
|
||
if p_a is None:
|
||
p_a = lambda k,m,A: np.dot(A, m)
|
||
|
||
old_A_shape = None
|
||
if not isinstance(p_f_A, types.FunctionType): # not a function but array
|
||
p_f_A = np.atleast_1d(p_f_A)
|
||
(p_A, old_A_shape) = cls._check_A_matrix(p_f_A)
|
||
|
||
p_f_A = lambda k, m, P: p_A[:,:, 0] # make function
|
||
else:
|
||
if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]:
|
||
raise ValueError("p_f_A function returns matrix of wrong size")
|
||
|
||
old_Q_shape = None
|
||
if not isinstance(p_f_Q, types.FunctionType): # not a function but array
|
||
p_f_Q = np.atleast_1d(p_f_Q)
|
||
(p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q)
|
||
|
||
p_f_Q = lambda k: p_Q[:,:, 0] # make function
|
||
else:
|
||
if p_f_Q(1).shape[0] != m_init.shape[0]:
|
||
raise ValueError("p_f_Q function returns matrix of wrong size")
|
||
|
||
if p_h is None:
|
||
lambda k,m,H: np.dot(H, m)
|
||
|
||
old_H_shape = None
|
||
if not isinstance(p_f_H, types.FunctionType): # not a function but array
|
||
p_f_H = np.atleast_1d(p_f_H)
|
||
(p_H, old_H_shape) = cls._check_H_matrix(p_f_H)
|
||
|
||
p_f_H = lambda k, m, P: p_H # make function
|
||
else:
|
||
if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]:
|
||
raise ValueError("p_f_H function returns matrix of wrong size")
|
||
|
||
old_R_shape = None
|
||
if not isinstance(p_f_R, types.FunctionType): # not a function but array
|
||
p_f_R = np.atleast_1d(p_f_R)
|
||
(p_R, old_R_shape) = cls._check_H_matrix(p_f_R)
|
||
|
||
p_f_R = lambda k: p_R # make function
|
||
else:
|
||
if p_f_R(1).shape[0] != m_init.shape[0]:
|
||
raise ValueError("p_f_R function returns matrix of wrong size")
|
||
|
||
(M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init,
|
||
P_init, calc_log_likelihood,
|
||
calc_grad_log_likelihood=False, grad_calc_params=None)
|
||
|
||
if old_Y_shape is not None:
|
||
Y.shape = old_Y_shape
|
||
|
||
if old_A_shape is not None:
|
||
p_A.shape = old_A_shape
|
||
|
||
if old_Q_shape is not None:
|
||
p_Q.shape = old_Q_shape
|
||
|
||
if old_H_shape is not None:
|
||
p_H.shape = old_H_shape
|
||
|
||
if old_R_shape is not None:
|
||
p_R.shape = old_R_shape
|
||
|
||
return (M, P)
|
||
|
||
@classmethod
|
||
def _kalman_algorithm_raw(cls,state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init,
|
||
P_init, calc_log_likelihood=False,
|
||
calc_grad_log_likelihood=False, grad_calc_params=None):
|
||
"""
|
||
General nonlinear filtering algorithm for inference in the state-space
|
||
model:
|
||
|
||
x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
|
||
|
||
Input:
|
||
-----------------
|
||
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
|
||
k (iteration number),
|
||
x_{k-1}
|
||
x_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
|
||
|
||
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_a.
|
||
k (iteration number),
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
|
||
k (iteration number).
|
||
|
||
p_h: function (k, x_{k}, H_{k}). Measurement function.
|
||
k (iteration number),
|
||
x_{k}
|
||
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
|
||
|
||
p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_h.
|
||
k (iteration number),
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_R: function (k). Returns noise matrix of measurement equation
|
||
on iteration k.
|
||
k (iteration number).
|
||
|
||
Y: matrix or vector
|
||
Data. If Y is matrix then samples are along 0-th dimension and
|
||
features along the 1-st. May have missing values.
|
||
|
||
m_init: vector
|
||
Initial distribution mean. Must be not None
|
||
|
||
P_init: matrix or scalar
|
||
Initial covariance of the states. Must be not None
|
||
|
||
calc_log_likelihood: boolean
|
||
Whether to calculate marginal likelihood of the state-space model.
|
||
|
||
calc_grad_log_likelihood: boolean
|
||
Whether to calculate gradient of the marginal likelihood
|
||
of the state-space model. If true then the next parameter must
|
||
provide the extra parameters for gradient calculation.
|
||
|
||
|
||
Output:
|
||
--------------
|
||
|
||
M: (state_dim, no_steps) matrix
|
||
Filter estimates of the state means
|
||
|
||
P: (state_dim, state_dim, no_steps) 3D array
|
||
Filter estimates of the state covariances
|
||
|
||
log_likelihood: double
|
||
If the parameter calc_log_likelihood was set to true, return
|
||
logarithm of marginal likelihood of the state-space model. If
|
||
the parameter was false, return None
|
||
|
||
"""
|
||
|
||
steps_no = Y.shape[0] # number of steps in the Kalman Filter
|
||
|
||
if calc_grad_log_likelihood:
|
||
grad_calc_params_1 = (grad_calc_params['f_dA'], grad_calc_params['f_dQ'])
|
||
grad_calc_params_2 = (grad_calc_params['f_dH'],grad_calc_params['f_dR'])
|
||
|
||
dm_init = grad_calc_params['dm_init']; dP_init = grad_calc_params['dP_init']
|
||
else:
|
||
grad_calc_params_1 = None
|
||
grad_calc_params_2 = None
|
||
|
||
dm_init = None; dP_init = None
|
||
|
||
# Allocate space for results
|
||
# Mean estimations. Initial values will be included
|
||
M = np.empty((state_dim,(steps_no+1)))
|
||
# Variance estimations. Initial values will be included
|
||
P = np.empty((state_dim,state_dim,(steps_no+1)))
|
||
|
||
# Initialize
|
||
M[:,0] = m_init
|
||
P[:,:,0] = P_init
|
||
|
||
if calc_log_likelihood:
|
||
log_likelihood = 0
|
||
else:
|
||
log_likelihood = None
|
||
|
||
if calc_grad_log_likelihood:
|
||
grad_log_likelihood = 0
|
||
else:
|
||
grad_log_likelihood = None
|
||
# Main loop of the Kalman filter
|
||
for k in range(0,steps_no):
|
||
# In this loop index for new estimations is (k+1), old - (k)
|
||
# This happened because initial values are stored at 0-th index.
|
||
|
||
if k == 0: #settinginitial values
|
||
dm_upd = dm_init
|
||
dP_upd = dP_init
|
||
|
||
m_pred, P_pred, dm_pred, dP_pred = \
|
||
cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], p_a, p_f_A, p_f_Q,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood,
|
||
p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = grad_calc_params_1)
|
||
|
||
m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
|
||
cls._kalman_update_step(k, m_pred , P_pred, p_h, p_f_H, p_f_R, Y,
|
||
calc_log_likelihood=calc_log_likelihood,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood,
|
||
p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = grad_calc_params_2)
|
||
|
||
if calc_log_likelihood:
|
||
log_likelihood += log_likelihood_update
|
||
|
||
if calc_grad_log_likelihood:
|
||
grad_log_likelihood += d_log_likelihood_update
|
||
|
||
M[:,k+1] = m_upd
|
||
P[:,:,k+1] = P_upd
|
||
# Return values, get rid of initial values
|
||
#return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
|
||
return (M, P, log_likelihood, grad_log_likelihood)
|
||
|
||
@staticmethod
|
||
def _kalman_prediction_step(k, p_m , p_P, p_a, p_f_A, p_f_Q, calc_grad_log_likelihood=False,
|
||
p_dm = None, p_dP = None, grad_calc_params_1 = None):
|
||
"""
|
||
Desctrete prediction function
|
||
|
||
Input:
|
||
k:int
|
||
Iteration No. Starts at 0. Total number of iterations equal to the
|
||
number of measurements.
|
||
|
||
p_m: matrix of size (state_dim, time_series_no)
|
||
Mean value from the previous step
|
||
p_P:
|
||
Covariance matrix from the previous step.
|
||
|
||
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
|
||
k (iteration number), starts at 0
|
||
x_{k-1} State from the previous step
|
||
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
|
||
|
||
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_a.
|
||
k (iteration number), starts at 0
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
|
||
k (iteration number). starts at 0
|
||
|
||
calc_grad_log_likelihood: boolean
|
||
Whether to calculate gradient of the marginal likelihood
|
||
of the state-space model. If true then the next parameter must
|
||
provide the extra parameters for gradient calculation.
|
||
p_dm: 3D array (state_dim, time_series_no, parameters_no)
|
||
Mean derivatives from the previous step
|
||
|
||
p_dP: 3D array (state_dim, state_dim, parameters_no)
|
||
Mean derivatives from the previous step
|
||
|
||
"""
|
||
|
||
# index correspond to values from previous iteration.
|
||
A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian)
|
||
Q = p_f_Q(k) # state noise matrix
|
||
|
||
|
||
# Prediction step ->
|
||
m_pred = p_a(k, p_m, A) # predicted mean
|
||
P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance
|
||
# Prediction step <-
|
||
|
||
|
||
if calc_grad_log_likelihood:
|
||
p_f_dA = grad_calc_params_1[0]; dA_all_params = p_f_dA(k) # derivatives of A wrt parameters
|
||
p_f_dQ = grad_calc_params_1[1]; dQ_all_params = p_f_dQ(k) # derivatives of Q wrt parameters
|
||
dm_all_params = p_dm # derivarites from the previous step
|
||
dP_all_params = p_dP
|
||
|
||
param_number = p_dP.shape[2]
|
||
|
||
dm_pred = np.empty(dm_all_params.shape)
|
||
dP_pred = np.empty(dP_all_params.shape)
|
||
|
||
for j in range(param_number):
|
||
dA = dA_all_params[:,:,j]
|
||
dQ = dQ_all_params[:,:,j]
|
||
|
||
|
||
dm = dm_all_params[:,j]
|
||
dP = dP_all_params[:,:,j]
|
||
|
||
# prediction step derivatives for current parameter:
|
||
dm_pred[:,j] = np.dot(dA, p_m) + np.dot(A, dm)
|
||
dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T))
|
||
dP_pred[:,:,j] += dP_pred[:,:,j].T
|
||
dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ
|
||
|
||
dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
|
||
else:
|
||
dm_pred = None
|
||
dP_pred = None
|
||
|
||
return m_pred, P_pred, dm_pred, dP_pred
|
||
|
||
|
||
@staticmethod
|
||
def _kalman_update_step(k, p_m , p_P, p_h, p_f_H, p_f_R, Y, calc_log_likelihood= False,
|
||
calc_grad_log_likelihood=False, p_dm = None, p_dP = None, grad_calc_params_2 = None):
|
||
"""
|
||
Input:
|
||
|
||
k: int
|
||
Iteration No. Starts at 0. Total number of iterations equal to the
|
||
number of measurements.
|
||
|
||
m_P: matrix of size (state_dim, time_series_no)
|
||
Mean value from the previous step.
|
||
|
||
p_P:
|
||
Covariance matrix from the previous step.
|
||
|
||
p_h: function (k, x_{k}, H_{k}). Measurement function.
|
||
k (iteration number), starts at 0
|
||
x_{k} state
|
||
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
|
||
|
||
p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_h.
|
||
k (iteration number), starts at 0
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_R: function (k). Returns noise matrix of measurement equation
|
||
on iteration k.
|
||
k (iteration number). starts at 0
|
||
calc_grad_log_likelihood: int
|
||
|
||
p_dm: array
|
||
Mean derivatives from the prediction step
|
||
|
||
p_dP: array
|
||
Covariance derivatives from the prediction step
|
||
"""
|
||
|
||
m_pred = p_m
|
||
P_pred = p_P
|
||
|
||
H = p_f_H(k, m_pred, P_pred)
|
||
R = p_f_R(k)
|
||
measurement = Y[k,:].T # measurement as column
|
||
|
||
log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
|
||
# Update step (only if there is data)
|
||
if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
|
||
v = measurement-p_h(k, m_pred, H)
|
||
S = H.dot(P_pred).dot(H.T) + R
|
||
if Y.shape[1]==1: # measurements are one dimensional
|
||
K = P_pred.dot(H.T) / S
|
||
if calc_log_likelihood:
|
||
log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
|
||
v*v / S)
|
||
log_likelihood_update = log_likelihood_update[0,0] # to make int
|
||
if np.isnan(log_likelihood_update):
|
||
raise ValueError("Errrrr 1")
|
||
LL = None; islower = None
|
||
else:
|
||
LL,islower = linalg.cho_factor(S)
|
||
K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T
|
||
|
||
if calc_log_likelihood:
|
||
log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) +
|
||
2*np.sum( np.log(np.diag(LL)) ) +
|
||
np.dot(v.T, linalg.cho_solve((LL,islower),v)) )
|
||
log_likelihood_update = log_likelihood_update[0,0] # to make int
|
||
|
||
|
||
if calc_grad_log_likelihood:
|
||
dm_pred_all_params = p_dm # derivativas of the prediction phase
|
||
dP_pred_all_params = p_dP
|
||
|
||
param_number = p_dP.shape[2]
|
||
|
||
p_f_dH = grad_calc_params_2[0]; dH_all_params = p_f_dH(k)
|
||
p_f_dR = grad_calc_params_2[1]; dR_all_params = p_f_dR(k)
|
||
|
||
dm_upd = np.empty(dm_pred_all_params.shape)
|
||
dP_upd = np.empty(dP_pred_all_params.shape)
|
||
|
||
d_log_likelihood_update = np.empty((param_number,1))
|
||
for param in range(param_number):
|
||
|
||
dH = dH_all_params[:,:,param]
|
||
dR = dR_all_params[:,:,param]
|
||
|
||
dm_pred = dm_pred_all_params[:,param]
|
||
dP_pred = dP_pred_all_params[:,:,param]
|
||
|
||
# Terms in the likelihood derivatives
|
||
dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred)
|
||
dS = np.dot(dH, np.dot( P_pred, H.T))
|
||
dS += dS.T
|
||
dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
|
||
|
||
# TODO: maybe symmetrize dS
|
||
|
||
#dm and dP for the next stem
|
||
if LL is not None: # the state vector is not a scalar
|
||
tmp1 = linalg.cho_solve((LL,islower), H).T
|
||
tmp2 = linalg.cho_solve((LL,islower), dH).T
|
||
tmp3 = linalg.cho_solve((LL,islower), dS).T
|
||
else: # the state vector is a scalar
|
||
tmp1 = H.T / S
|
||
tmp2 = dH.T / S
|
||
tmp3 = dS.T / S
|
||
|
||
dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
|
||
np.dot( P_pred, np.dot( tmp1, tmp3 ) )
|
||
|
||
# terms required for the next step, save this for each parameter
|
||
dm_upd[:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
|
||
dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
|
||
dP_upd[:,:,param] += dP_upd[:,:,param].T
|
||
dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
|
||
|
||
dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
|
||
# computing the likelihood change for each parameter:
|
||
if LL is not None: # the state vector is not 1D
|
||
#tmp4 = linalg.cho_solve((LL,islower), dv)
|
||
tmp5 = linalg.cho_solve((LL,islower), v)
|
||
else: # the state vector is a scalar
|
||
#tmp4 = dv / S
|
||
tmp5 = v / S
|
||
|
||
|
||
d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
|
||
np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
|
||
|
||
|
||
|
||
# Compute the actual updates for mean and variance of the states.
|
||
m_upd = m_pred + K.dot( v )
|
||
|
||
# Covariance update and ensure it is symmetric
|
||
P_upd = K.dot(S).dot(K.T)
|
||
P_upd = 0.5*(P_upd + P_upd.T)
|
||
P_upd = P_pred - P_upd# this update matrix is symmetric
|
||
|
||
return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
|
||
|
||
@staticmethod
|
||
def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step,
|
||
p_P_prev_step, p_f_A):
|
||
"""
|
||
Input:
|
||
|
||
k: int
|
||
Iteration No. Starts at 0. Total number of iterations equal to the
|
||
number of measurements.
|
||
|
||
p_m: matrix of size (state_dim, time_series_no)
|
||
Filter mean on step k
|
||
|
||
p_P:
|
||
Filter Covariance on step k
|
||
|
||
p_m_pred: matrix of size (state_dim, time_series_no)
|
||
Prediction mean
|
||
|
||
p_P_pred:
|
||
Prediction covariance:
|
||
|
||
p_m_prev_step
|
||
Smoother mean from the previous step.
|
||
|
||
p_P_prev_step:
|
||
Smoother covariance from the previous step.
|
||
|
||
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_a.
|
||
k (iteration number), starts at 0
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
"""
|
||
|
||
A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian)
|
||
|
||
tmp = np.dot( A, p_P.T)
|
||
if A.shape[0] == 1: # 1D states
|
||
G = tmp.T / p_P_pred # P[:,:,k] is symmetric
|
||
else:
|
||
LL,islower = linalg.cho_factor(p_P_pred)
|
||
G = linalg.cho_solve((LL,islower),tmp).T
|
||
|
||
m_upd = p_m + G.dot( p_m_prev_step-p_m_pred )
|
||
P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T)
|
||
|
||
P_upd = 0.5*(P_upd + P_upd.T)
|
||
|
||
return m_upd, P_upd
|
||
|
||
@classmethod
|
||
def _rts_smoother_raw(cls,state_dim, p_a, p_f_A, p_f_Q, filter_means,
|
||
filter_covars):
|
||
"""
|
||
This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
|
||
based on the results of kalman_filter_raw.
|
||
These notations are the same:
|
||
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k))
|
||
|
||
Input:
|
||
--------------
|
||
|
||
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
|
||
k (iteration number), starts at 0
|
||
x_{k-1} State from the previous step
|
||
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
|
||
|
||
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
|
||
passed into p_a.
|
||
k (iteration number), starts at 0
|
||
m: point where Jacobian is evaluated
|
||
P: parameter for Jacobian, usually covariance matrix.
|
||
|
||
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
|
||
k (iteration number). starts at 0
|
||
|
||
filter_means:
|
||
Include initial values
|
||
|
||
filter_covars
|
||
Include initial values
|
||
|
||
Output:
|
||
-------------
|
||
|
||
M: (state_dim, no_steps) matrix
|
||
Smoothed estimates of the state means
|
||
|
||
P: (state_dim, state_dim, no_steps) 3D array
|
||
Smoothed estimates of the state covariances
|
||
"""
|
||
|
||
no_steps = filter_covars.shape[-1] # number
|
||
|
||
M = np.empty(filter_means.shape) # smoothed means
|
||
P = np.empty(filter_covars.shape) # smoothed covars
|
||
|
||
M[:,-1] = filter_means[:,-1]
|
||
P[:,:,-1] = filter_covars[:,:,-1]
|
||
for k in range(no_steps-2,-1,-1):
|
||
|
||
m_pred, P_pred, tmp1, tmp2 = \
|
||
cls._kalman_prediction_step(k, filter_means[:,k],
|
||
filter_covars[:,:,k], p_a, p_f_A, p_f_Q,
|
||
calc_grad_log_likelihood=False)
|
||
|
||
m_upd, P_upd = cls._rts_smoother_update_step(k,
|
||
filter_means[:,k] ,filter_covars[:,:,k],
|
||
m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], p_f_A)
|
||
|
||
M[:,k] = m_upd
|
||
P[:,:,k] = P_upd
|
||
# Return values
|
||
|
||
return (M, P)
|
||
|
||
@staticmethod
|
||
def _check_A_matrix(p_A):
|
||
"""
|
||
Check A matrix so that it has right shape
|
||
"""
|
||
|
||
old_A_shape = None
|
||
if len(p_A.shape) < 3:
|
||
old_A_shape = p_A.shape # save shape to restore it on exit
|
||
if len(p_A.shape) == 2: # matrix
|
||
p_A.shape = (p_A.shape[0],p_A.shape[1],1)
|
||
if p_A.shape[0] != p_A.shape[1]:
|
||
raise ValueError("p_A must be a square matrix")
|
||
|
||
elif len(p_A.shape) == 1: # scalar but in array already
|
||
if (p_A.shape[0] != 1):
|
||
raise ValueError("Parameter p_A is an 1D array, while it must be matrix or scalar")
|
||
else:
|
||
p_A.shape = (1,1,1)
|
||
|
||
return (p_A,old_A_shape)
|
||
|
||
@staticmethod
|
||
def _check_Q_matrix(p_Q):
|
||
"""
|
||
Check Q matrix
|
||
"""
|
||
|
||
old_Q_shape = None
|
||
if len(p_Q.shape) < 3:
|
||
old_Q_shape = p_Q.shape # save shape to restore it on exit
|
||
if len(p_Q.shape) == 2: # matrix
|
||
p_Q.shape = (p_Q.shape[0],p_Q.shape[1],1)
|
||
if p_Q.shape[0] != p_Q.shape[1]:
|
||
raise ValueError("p_Q must be a square matrix")
|
||
|
||
elif len(p_Q.shape) == 1: # scalar but in array already
|
||
if (p_Q.shape[0] != 1):
|
||
raise ValueError("Parameter p_Q is an 1D array, while it must be matrix or scalar")
|
||
else:
|
||
p_Q.shape = (1,1,1)
|
||
|
||
return (p_Q,old_Q_shape)
|
||
|
||
@staticmethod
|
||
def _check_H_matrix(p_H):
|
||
"""
|
||
Check H matrix
|
||
"""
|
||
|
||
old_H_shape = None
|
||
if (len(p_H.shape) == 1):
|
||
if p_H.shape[0] != 1:
|
||
raise ValueError("Ambiguity in the shape of p_H")
|
||
else:
|
||
old_H_shape = p_H.shape
|
||
p_H.shape = (1,1)
|
||
|
||
return (p_H, old_H_shape)
|
||
|
||
@staticmethod
|
||
def _check_R_matrix(p_R):
|
||
"""
|
||
Check R matrix
|
||
"""
|
||
|
||
old_R_shape = None
|
||
if (len(p_R.shape) == 1):
|
||
if p_R.shape[0] != 1:
|
||
raise ValueError("Ambiguity in the shape of p_H")
|
||
else:
|
||
old_R_shape = p_R.shape
|
||
p_R.shape = (1,1)
|
||
|
||
return (p_R, old_R_shape)
|
||
|
||
@staticmethod
|
||
def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'):
|
||
"""
|
||
Check dA, dQ matrices
|
||
|
||
Input:
|
||
|
||
which: string 'dA' or 'dQ'
|
||
"""
|
||
|
||
|
||
if dM is None:
|
||
dM=np.zeros((state_dim,state_dim,grad_params_no))
|
||
elif isinstance(dM, np.ndarray):
|
||
if state_dim == 1:
|
||
if len(dM.shape) < 3:
|
||
dM.shape = (1,1,1)
|
||
else:
|
||
if len(dM.shape) < 3:
|
||
dM.shape = (state_dim,state_dim,1)
|
||
elif isinstance(dM, np.int):
|
||
if state_dim > 1:
|
||
raise ValueError("When computing likelihood gradient wrong %s dimension." % which)
|
||
else:
|
||
dM = np.ones((1,1,1)) * dM
|
||
if not isinstance(dM, types.FunctionType):
|
||
f_dM = lambda k: dM
|
||
else:
|
||
f_dM = dM
|
||
|
||
return f_dM
|
||
|
||
|
||
@staticmethod
|
||
def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'):
|
||
"""
|
||
Check dA, dQ matrices
|
||
|
||
Input:
|
||
|
||
which: string 'dH' or 'dR'
|
||
"""
|
||
|
||
if dM is None:
|
||
if which == 'dH':
|
||
dM=np.zeros((measurement_dim ,state_dim,grad_params_no))
|
||
elif which == 'dR':
|
||
dM=np.zeros((measurement_dim,measurement_dim,grad_params_no))
|
||
elif isinstance(dM, np.ndarray):
|
||
if state_dim == 1:
|
||
if len(dM.shape) < 3:
|
||
dM.shape = (1,1,1)
|
||
else:
|
||
if len(dM.shape) < 3:
|
||
if which == 'dH':
|
||
dM.shape = (measurement_dim,state_dim,1)
|
||
elif which == 'dR':
|
||
dM.shape = (measurement_dim,measurement_dim,1)
|
||
elif isinstance(dM, np.int):
|
||
if state_dim > 1:
|
||
raise ValueError("When computing likelihood gradient wrong dH dimension.")
|
||
else:
|
||
dM = np.ones((1,1,1)) * dM
|
||
if not isinstance(dM, types.FunctionType):
|
||
f_dM = lambda k: dM
|
||
else:
|
||
f_dM = dM
|
||
|
||
return f_dM
|
||
|
||
|
||
|
||
class Struct(object):
|
||
pass
|
||
|
||
class ContDescrStateSpace(DescreteStateSpace):
|
||
"""
|
||
Class for continuous-discrete Kalman filter. State equation is
|
||
continuous while measurement equation is discrete.
|
||
|
||
d x(t)/ dt = F x(t) + L q; where q~ N(0, Qc)
|
||
y_{t_k} = H_{k} x_{t_k} + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
"""
|
||
|
||
class AQcompute_once(object):
|
||
"""
|
||
Class for providing the functions for A and Q dA and dQ calculation
|
||
and for caching the result of the call
|
||
"""
|
||
|
||
def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
|
||
"""
|
||
Input:
|
||
|
||
dt: array
|
||
Array of dt steps
|
||
"""
|
||
self.dt = dt
|
||
self.F = F
|
||
self.L = L
|
||
self.Qc = Qc
|
||
|
||
self.P_inf = P_inf
|
||
self.dP_inf = dP_inf
|
||
self.dF = dF
|
||
self.dQc = dQc
|
||
|
||
self.compute_derivatives = compute_derivatives
|
||
self.grad_params_no = grad_params_no
|
||
|
||
|
||
self.last_k = 0
|
||
self.last_k_computed = False
|
||
self.Ak = None
|
||
self.Qk = None
|
||
|
||
def recompute_for_new_k(self,k):
|
||
"""
|
||
"""
|
||
|
||
if self.last_k == k:
|
||
if (self.last_k_computed == False):
|
||
Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
|
||
self.L,self.Qc,self.dt[k],self.compute_derivatives,
|
||
grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
|
||
self.Ak = Ak
|
||
self.Qk = Qk
|
||
self.dAk = dAk
|
||
self.dQk = dQk
|
||
self.last_k_computed = True
|
||
else:
|
||
Ak = self.Ak
|
||
Qk = self.Qk
|
||
dAk = self.dAk
|
||
dQk = self.dQk
|
||
else:
|
||
Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
|
||
self.L,self.Qc,self.dt[k],self.compute_derivatives,
|
||
grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
|
||
|
||
self.last_k = k
|
||
self.last_k_computed = True
|
||
self.Ak = Ak
|
||
self.Qk = Qk
|
||
self.dAk = dAk
|
||
self.dQk = dQk
|
||
return Ak,Qk, dAk, dQk
|
||
|
||
def reset(self, compute_derivatives):
|
||
"""
|
||
For reusing this object e.g. in smoother computation
|
||
"""
|
||
|
||
self.last_k = 0
|
||
self.last_k_computed = False
|
||
self.compute_derivatives = compute_derivatives
|
||
|
||
return self
|
||
|
||
def f_A(self,k,m,P):
|
||
|
||
Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
|
||
return Ak
|
||
|
||
def f_Q(self,k):
|
||
|
||
Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
|
||
|
||
return Qk
|
||
|
||
def f_dA(self, k):
|
||
|
||
Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
|
||
|
||
return dAk
|
||
|
||
def f_dQ(self, k):
|
||
|
||
Ak,Qk, dAk, dQk = self.recompute_for_new_k(k)
|
||
|
||
return dQk
|
||
|
||
|
||
class AQcompute_batch(object):
|
||
"""
|
||
"""
|
||
def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
|
||
"""
|
||
Input:
|
||
|
||
dt: array
|
||
Array of dt steps
|
||
"""
|
||
As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
|
||
L,Qc,dt,compute_derivatives,
|
||
grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
|
||
|
||
self.As = As
|
||
self.Qs = Qs
|
||
self.dAs = dAs
|
||
self.dQs = dQs
|
||
self.reconstruct_indices = reconstruct_indices
|
||
|
||
def reset(self, compute_derivatives):
|
||
"""
|
||
For reusing this object e.g. in smoother computation
|
||
"""
|
||
|
||
return self
|
||
|
||
def f_A(self,k,m,P):
|
||
"""
|
||
"""
|
||
return self.As[:,:, self.reconstruct_indices[k]]
|
||
|
||
def f_Q(self,k):
|
||
"""
|
||
"""
|
||
return self.Qs[:,:, self.reconstruct_indices[k]]
|
||
|
||
def f_dA(self,k):
|
||
"""
|
||
"""
|
||
return self.dAs[:,:, :, self.reconstruct_indices[k]]
|
||
|
||
def f_dQ(self,k):
|
||
"""
|
||
"""
|
||
return self.dQs[:,:, :, self.reconstruct_indices[k]]
|
||
|
||
@classmethod
|
||
def cont_discr_kalman_filter(cls,F,L,Qc,H,R,P_inf,X,Y,m_init=None,
|
||
P_init=None, calc_log_likelihood=False,
|
||
calc_grad_log_likelihood=False, grad_params_no=None, grad_calc_params=None):
|
||
"""
|
||
|
||
"""
|
||
|
||
# Time step lengths
|
||
state_dim = F.shape[0]
|
||
measurement_dim = Y.shape[1]
|
||
|
||
if m_init is None:
|
||
m_init = np.zeros((state_dim,1))
|
||
|
||
if P_init is None:
|
||
P_init = P_inf.copy()
|
||
|
||
if calc_grad_log_likelihood:
|
||
|
||
dF = cls._check_grad_state_matrices( grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA')
|
||
dQc = cls._check_grad_state_matrices( grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ')
|
||
dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dQ')
|
||
|
||
dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
|
||
dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
|
||
|
||
dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter
|
||
if dm_init is None:
|
||
dm_init = np.zeros( (state_dim,grad_params_no) )
|
||
|
||
dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter
|
||
if dP_init is None:
|
||
dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function
|
||
|
||
else:
|
||
dP_inf = None
|
||
dF = None
|
||
dQc = None
|
||
dH = None
|
||
dR = None
|
||
|
||
|
||
# TODO: Defaults for m_init, P_init, dm_init, dP_init. !!!
|
||
# Also for dH, dR and probably for all derivatives
|
||
(M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=m_init,
|
||
P_init=P_init, calc_log_likelihood=calc_log_likelihood,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, dP_inf=dP_inf,
|
||
dF = dF, dQc=dQc, dH=dH, dR=dR, dm_init=dm_init, dP_init=dP_init)
|
||
|
||
return (M, P, log_likelihood, grad_log_likelihood,AQcomp)
|
||
|
||
@classmethod
|
||
def _cont_discr_kalman_filter_raw(cls,state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=None,
|
||
P_init=None, calc_log_likelihood=False,
|
||
calc_grad_log_likelihood=False, grad_params_no=None, dP_inf=None,
|
||
dF = None, dQc=None, dH=None, dR=None, dm_init=None, dP_init=None):
|
||
"""
|
||
Kalman filter for continuos dynamic model.
|
||
|
||
Input:
|
||
F:
|
||
Dynamic model matrix
|
||
|
||
X:
|
||
Time steps
|
||
"""
|
||
|
||
steps_no = X.shape[0] # number of steps in the Kalman Filter
|
||
|
||
# Return object which computes A, Q and possibly derivatives on the way, pass the derivative matrices not functions
|
||
AQcomp = cls._cont_to_discrete_object(X, F,L,Qc,compute_derivatives=calc_grad_log_likelihood,
|
||
grad_params_no=grad_params_no,
|
||
P_inf=P_inf, dP_inf=dP_inf(0),
|
||
dF = dF(0), dQc=dQc(0))
|
||
|
||
# Functions required for Kalman filter
|
||
f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
|
||
f_h = lambda k,m,H: np.dot(H, m) # measurement model
|
||
f_H = lambda k,m,P: H
|
||
f_R = lambda k: R
|
||
|
||
# Allocate space for results
|
||
# Mean estimations. Initial values will be included
|
||
M = np.empty((state_dim,(steps_no+1)))
|
||
# Variance estimations. Initial values will be included
|
||
P = np.empty((state_dim,state_dim,(steps_no+1)))
|
||
|
||
# Initialize
|
||
M[:,0] = m_init[:,0]
|
||
P[:,:,0] = P_init
|
||
|
||
log_likelihood = 0
|
||
grad_log_likelihood = np.zeros((grad_params_no,1))
|
||
# Main loop of the Kalman filter
|
||
for k in range(0,steps_no):
|
||
# In this loop index for new estimations is (k+1), old - (k)
|
||
# This happened because initial values are stored at 0-th index.
|
||
|
||
if k == 0: #setting initial values
|
||
dm_upd = dm_init
|
||
dP_upd = dP_init
|
||
|
||
m_pred, P_pred, dm_pred, dP_pred = \
|
||
cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood,
|
||
p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = (AQcomp.f_dA, AQcomp.f_dQ) )
|
||
|
||
m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
|
||
cls._kalman_update_step(k, m_pred , P_pred, f_h, f_H, f_R, Y,
|
||
calc_log_likelihood=calc_log_likelihood,
|
||
calc_grad_log_likelihood=calc_grad_log_likelihood,
|
||
p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
|
||
|
||
if calc_log_likelihood:
|
||
log_likelihood += log_likelihood_update
|
||
|
||
if calc_grad_log_likelihood:
|
||
grad_log_likelihood += d_log_likelihood_update
|
||
|
||
M[:,k+1] = m_upd
|
||
P[:,:,k+1] = P_upd
|
||
# Return values, get rid of initial values
|
||
#return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
|
||
# Return values, get rid of initial values
|
||
#return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood)
|
||
return (M, P, log_likelihood, grad_log_likelihood, AQcomp.reset(False))
|
||
|
||
@classmethod
|
||
def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars,
|
||
AQcomp=None, X=None, F=None,L=None,Qc=None):
|
||
"""
|
||
|
||
UPDATE Description!
|
||
|
||
This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
|
||
based on the results of kalman_filter_raw.
|
||
These notations are the same:
|
||
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k})
|
||
|
||
Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k))
|
||
|
||
Input:
|
||
--------------
|
||
|
||
filter_means:
|
||
Include initial values
|
||
|
||
filter_covars
|
||
Include initial values
|
||
|
||
Output:
|
||
-------------
|
||
|
||
M: (state_dim, no_steps) matrix
|
||
Smoothed estimates of the state means
|
||
|
||
P: (state_dim, state_dim, no_steps) 3D array
|
||
Smoothed estimates of the state covariances
|
||
"""
|
||
|
||
if AQcomp is None: # make this object from scratch
|
||
AQcomp = cls._cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False,
|
||
grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None)
|
||
|
||
f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
|
||
|
||
no_steps = filter_covars.shape[-1] # number
|
||
|
||
M = np.empty(filter_means.shape) # smoothed means
|
||
P = np.empty(filter_covars.shape) # smoothed covars
|
||
|
||
M[:,-1] = filter_means[:,-1]
|
||
P[:,:,-1] = filter_covars[:,:,-1]
|
||
for k in range(no_steps-2,-1,-1):
|
||
|
||
m_pred, P_pred, tmp1, tmp2 = \
|
||
cls._kalman_prediction_step(k, filter_means[:,k],
|
||
filter_covars[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q,
|
||
calc_grad_log_likelihood=False)
|
||
|
||
m_upd, P_upd = cls._rts_smoother_update_step(k,
|
||
filter_means[:,k] ,filter_covars[:,:,k],
|
||
m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], AQcomp.f_A)
|
||
|
||
M[:,k] = m_upd
|
||
P[:,:,k] = P_upd
|
||
# Return values
|
||
|
||
return (M, P)
|
||
|
||
@classmethod
|
||
def _cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False,grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
|
||
"""
|
||
Function return the object which can be used in Kalman filter and/or
|
||
smoother to obtain matrices A and Q as well as necessary derivatives(clarify).
|
||
|
||
"""
|
||
|
||
unique_round_decimals = 8
|
||
dt = np.empty((X.shape[0],))
|
||
dt[1:] = np.diff(X[:,0],axis=0)
|
||
dt[0] = 0#dt[1]
|
||
unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
|
||
|
||
if len(unique_indices) > 20:
|
||
AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives,
|
||
grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
|
||
else:
|
||
AQcomp = cls.AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives,
|
||
grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
|
||
|
||
return AQcomp
|
||
|
||
@staticmethod
|
||
def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False,
|
||
grad_params_no=None, P_inf=None,
|
||
dP_inf=None, dF = None, dQc=None):
|
||
"""
|
||
Linear Time-Invariant Stochastic Differential Equation (LTI SDE):
|
||
|
||
dx(t) = F x(t) dt + L d \beta ,where
|
||
|
||
x(t): (vector) stochastic process
|
||
\beta: (vector) Brownian motion process
|
||
F, L: (time invariant) matrices of corresponding dimensions
|
||
Qc: covariance of noise.
|
||
|
||
This function rewrites it into the corresponding state-space form:
|
||
|
||
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
|
||
|
||
|
||
Input:
|
||
--------------
|
||
F,L: LTI SDE matrices of corresponding dimensions
|
||
|
||
Qc: matrix (n,n)
|
||
Covarince between different dimensions of noise \beta.
|
||
n is the dimensionality of the noise.
|
||
|
||
dt: double or iterable
|
||
Time difference used on this iteration.
|
||
If dt is iterable, then A and Q_noise are computed for every
|
||
unique dt
|
||
|
||
compute_derivatives
|
||
|
||
param_num: int
|
||
Number of parameters
|
||
|
||
P_inf
|
||
|
||
dP_inf
|
||
|
||
dF
|
||
|
||
dQc
|
||
|
||
dR
|
||
|
||
dm_prev:
|
||
Mean derivatives from the previos step
|
||
dP_prev:
|
||
Variance derivatives from the previous step
|
||
|
||
|
||
Output:
|
||
--------------
|
||
|
||
A: matrix
|
||
A_{k}. Because we have LTI SDE only dt can affect on matrix
|
||
difference for different k.
|
||
|
||
Q_noise: matrix
|
||
Covariance matrix of (vector) q_{k-1}. Only dt can affect the
|
||
matrix difference for different k.
|
||
|
||
reconstruct_index: array
|
||
If dt was iterable return three dimensinal arrays A and Q_noise.
|
||
Third dimension of these arrays correspond to unique dt's.
|
||
This reconstruct_index contain indices of the original dt's
|
||
in the uninue dt sequence. A[:,:, reconstruct_index[5]]
|
||
is matrix A of 6-th(indices start from zero) dt in the original
|
||
sequence.
|
||
"""
|
||
|
||
# Dimensionality
|
||
n = F.shape[0]
|
||
|
||
if not isinstance(dt, collections.Iterable): # not iterable, scalar
|
||
|
||
# The dynamical model
|
||
A = linalg.expm(F*dt)
|
||
if np.any( np.isnan(A)):
|
||
A = linalg.expm3(F*dt)
|
||
|
||
# The covariance matrix Q by matrix fraction decomposition ->
|
||
Phi = np.zeros((2*n,2*n))
|
||
Phi[:n,:n] = F
|
||
Phi[:n,n:] = L.dot(Qc).dot(L.T)
|
||
Phi[n:,n:] = -F.T
|
||
AB = linalg.expm(Phi*dt)
|
||
if np.any(np.isnan(AB)):
|
||
AB = linalg.expm3(Phi*dt)
|
||
AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
|
||
|
||
Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
|
||
# The covariance matrix Q by matrix fraction decomposition <-
|
||
|
||
if compute_derivatives:
|
||
dA = np.zeros([n, n, grad_params_no])
|
||
dQ = np.zeros([n, n, grad_params_no])
|
||
|
||
#AA = np.zeros([2*n, 2*n, nparam])
|
||
FF = np.zeros([2*n, 2*n])
|
||
AA = np.zeros([2*n, 2*n, grad_params_no])
|
||
|
||
for p in range(0, grad_params_no):
|
||
|
||
FF[:n,:n] = F
|
||
FF[n:,:n] = dF[:,:,p]
|
||
FF[n:,n:] = F
|
||
|
||
# Solve the matrix exponential
|
||
AA[:,:,p] = linalg.expm(FF*dt)
|
||
|
||
# Solve the differential equation
|
||
#foo = AA[:,:,p].dot(np.vstack([m, dm[:,p]]))
|
||
#mm = foo[:n,:]
|
||
#dm[:,p] = foo[n:,:]
|
||
|
||
# The discrete-time dynamical model*
|
||
if p==0:
|
||
A = AA[:n,:n,p]
|
||
Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T)
|
||
Q_noise = Q_noise_2
|
||
#PP = A.dot(P).dot(A.T) + Q_noise_2
|
||
|
||
# The derivatives of A and Q
|
||
dA[:,:,p] = AA[n:,:n,p]
|
||
dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
|
||
- A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
|
||
|
||
# The derivatives of P
|
||
#dP[:,:,p] = dA.dot(P).dot(A.T) + A.dot(dP_prev[:,:,p]).dot(A.T) \
|
||
# + A.dot(P).dot(dA.T) + dQ
|
||
#Q_noise = Q_noise_2
|
||
else:
|
||
dA = None
|
||
dQ = None
|
||
Q_noise = Q_noise_1
|
||
|
||
|
||
# Return
|
||
return A, Q_noise,None, dA, dQ
|
||
|
||
else: # iterable, array
|
||
|
||
# Time discretizations (round to 14 decimals to avoid problems)
|
||
dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8),
|
||
return_index=True,return_inverse=True)
|
||
del tmp
|
||
# Allocate space for A and Q
|
||
A = np.empty((n,n,dt_unique.shape[0]))
|
||
Q_noise = np.empty((n,n,dt_unique.shape[0]))
|
||
|
||
if compute_derivatives:
|
||
dA = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
|
||
dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
|
||
|
||
# Call this function for each unique dt
|
||
for j in range(0,dt_unique.shape[0]):
|
||
A[:,:,j], Q_noise[:,:,j], tmp1, dA[:,:,:,j], dQ[:,:,:,j] = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j],
|
||
compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
|
||
|
||
# Return
|
||
return A, Q_noise, reconstruct_index, dA, dQ |