From f0660dcde00041974cf46a458f3bf0b6a04bda00 Mon Sep 17 00:00:00 2001 From: Alexander Grigorievskiy Date: Tue, 15 Mar 2016 17:01:05 +0200 Subject: [PATCH] TEST: Tests use 'lbfgsb' optimization function. Also some syntactic changes. --- GPy/models/state_space_main.py | 2352 +++++++++--------- GPy/testing/gpy_kernels_state_space_tests.py | 6 +- 2 files changed, 1199 insertions(+), 1159 deletions(-) diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py index 72f3957d..d65364e5 100644 --- a/GPy/models/state_space_main.py +++ b/GPy/models/state_space_main.py @@ -5,16 +5,13 @@ Main functionality for state-space inference. """ -import collections # for cheking whether a variable is iterable -import types # for cheking whether a variable is a function +import collections # for cheking whether a variable is iterable +import types # for cheking whether a variable is a function import numpy as np import scipy as sp import scipy.linalg as linalg - -#import pdb; pdb.set_trace() - try: import state_space_setup setup_available = True @@ -25,7 +22,7 @@ except ImportError as e: print_verbose = False try: - import state_space_cython + import state_space_cython cython_code_available = True if print_verbose: print("state_space: cython is available") @@ -43,29 +40,32 @@ if print_verbose: print("state_space: cython is used") else: print("state_space: cython is NOT used") - + + class Dynamic_Callables_Python(object): - + def f_a(self, k, m, A): """ - p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. k (iteration number), starts at 0 x_{k-1} State from the previous step - A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + A_{k} Jacobian matrices of f_a. In the linear case it is exactly + A_{k}. """ raise NotImplemented("f_a is not implemented!") - def Ak(self, k,m,P): # returns state iteration matrix + def Ak(self, k, m, P): # returns state iteration matrix """ - function (k, m, P) return Jacobian of dynamic function, it is passed into p_a. + function (k, m, P) return Jacobian of dynamic function, it is passed + into p_a. k (iteration number), starts at 0 m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. """ - + raise NotImplemented("Ak is not implemented!") - + def Qk(self, k): """ function (k). Returns noise matrix of dynamic model on iteration k. @@ -75,12 +75,14 @@ class Dynamic_Callables_Python(object): def Q_srk(self, k): """ - function (k). Returns the square root of noise matrix of dynamic model on iteration k. - k (iteration number). starts at 0 - + function (k). Returns the square root of noise matrix of dynamic model + on iteration k. + + k (iteration number). starts at 0 + This function is implemented to use SVD prediction step. """ - + raise NotImplemented("Q_srk is not implemented!") def dAk(self, k): @@ -95,33 +97,35 @@ class Dynamic_Callables_Python(object): function (k). Returns the derivative of Q on iteration k. k (iteration number). starts at 0 """ - raise NotImplemented("dQk is not implemented!") - - def reset(self,compute_derivatives = False): + raise NotImplemented("dQk is not implemented!") + + def reset(self, compute_derivatives=False): """ - Return the state of this object to the beginning of iteration (to k eq. 0) - """ - + Return the state of this object to the beginning of iteration + (to k eq. 0). + """ + raise NotImplemented("reset is not implemented!") - + if use_cython: Dynamic_Callables_Class = state_space_cython.Dynamic_Callables_Cython else: Dynamic_Callables_Class = Dynamic_Callables_Python - + class Measurement_Callables_Python(object): def f_h(self, k, m_pred, Hk): """ function (k, x_{k}, H_{k}). Measurement function. k (iteration number), starts at 0 - x_{k} state - H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. + x_{k} state + H_{k} Jacobian matrices of f_h. In the linear case it is exactly + H_{k}. """ raise NotImplemented("f_a is not implemented!") - def Hk(self, k,m_pred,P_pred): # returns state iteration matrix + def Hk(self, k, m_pred, P_pred): # returns state iteration matrix """ function (k, m, P) return Jacobian of measurement function, it is passed into p_h. @@ -129,27 +133,26 @@ class Measurement_Callables_Python(object): m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. """ - + raise NotImplemented("Hk is not implemented!") - + def Rk(self, k): """ - function (k). Returns noise matrix of measurement equation + function (k). Returns noise matrix of measurement equation on iteration k. k (iteration number). starts at 0 """ raise NotImplemented("Rk is not implemented!") - def R_isrk(self, k): """ - function (k). Returns the square root of the noise matrix of - measurement equation on iteration k. + function (k). Returns the square root of the noise matrix of + measurement equation on iteration k. k (iteration number). starts at 0 - + This function is implemented to use SVD prediction step. """ - + raise NotImplemented("Q_srk is not implemented!") def dHk(self, k): @@ -166,40 +169,45 @@ class Measurement_Callables_Python(object): """ raise NotImplemented("dQk is not implemented!") - def reset(self,compute_derivatives = False): + def reset(self, compute_derivatives=False): """ - Return the state of this object to the beginning of iteration (to k eq. 0) - """ - + Return the state of this object to the beginning of iteration + (to k eq. 0) + """ + raise NotImplemented("reset is not implemented!") if use_cython: - Measurement_Callables_Class = state_space_cython.Measurement_Callables_Cython + Measurement_Callables_Class = state_space_cython.\ + Measurement_Callables_Cython else: Measurement_Callables_Class = Measurement_Callables_Python - + + class R_handling_Python(Measurement_Callables_Class): """ The calss handles noise matrix R. """ def __init__(self, R, index, R_time_var_index, unique_R_number, dR=None): """ - Input: + Input: --------------- R - array with noise on various steps. The result of preprocessing the noise input. - + index - for each step of Kalman filter contains the corresponding index in the array. - - R_time_var_index - another index in the array R. Computed earlier and passed here. - - unique_R_number - number of unique noise matrices below which square roots - are cached and above which they are computed each time. - + + R_time_var_index - another index in the array R. Computed earlier and + is passed here. + + unique_R_number - number of unique noise matrices below which square + roots are cached and above which they are computed each time. + dR: 3D array[:, :, param_num] - derivative of R. Derivative is supported only when R do not change over time - + derivative of R. Derivative is supported only when R do not change + over time + Output: -------------- Object which has two necessary functions: @@ -208,53 +216,56 @@ class R_handling_Python(Measurement_Callables_Class): """ self.R = R self.index = index - self.R_time_var_index = R_time_var_index - self.dR = dR - + self.R_time_var_index = int(R_time_var_index) + self.dR = dR + if (len(np.unique(index)) > unique_R_number): self.svd_each_time = True else: self.svd_each_time = False - + self.R_square_root = {} - - def Rk(self,k): - return self.R[:,:, self.index[self.R_time_var_index, k]] - - def dRk(self,k): + + def Rk(self, k): + return self.R[:, :, self.index[self.R_time_var_index, k]] + + def dRk(self, k): if self.dR is None: raise ValueError("dR derivative is None") - - return self.dR # the same dirivative on each iteration - + + return self.dR # the same dirivative on each iteration + def R_isrk(self, k): """ Function returns the inverse square root of R matrix on step k. """ - ind = self.index[self.R_time_var_index, k] - R = self.R[:,:, ind ] - - if (R.shape[0] == 1): # 1-D case handle simplier. No storage - # of the result, just compute it each time. - inv_square_root = np.sqrt( 1.0/R ) + ind = int(self.index[self.R_time_var_index, k]) + R = self.R[:, :, ind] + + if (R.shape[0] == 1): # 1-D case handle simplier. No storage + # of the result, just compute it each time. + inv_square_root = np.sqrt(1.0/R) else: if self.svd_each_time: - - (U,S,Vh) = sp.linalg.svd( R,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - + + (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, + compute_uv=True, overwrite_a=False, + check_finite=True) + inv_square_root = U * 1.0/np.sqrt(S) else: if ind in self.R_square_root: inv_square_root = self.R_square_root[ind] else: - (U,S,Vh) = sp.linalg.svd( R,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - + (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True) + inv_square_root = U * 1.0/np.sqrt(S) - + self.R_square_root[ind] = inv_square_root - + return inv_square_root if use_cython: @@ -262,25 +273,30 @@ if use_cython: else: R_handling_Class = R_handling_Python + class Std_Measurement_Callables_Python(R_handling_Class): - - def __init__(self, H, H_time_var_index, R, index, R_time_var_index, unique_R_number, dH = None, dR=None): - super(Std_Measurement_Callables_Python,self).__init__(R, index, R_time_var_index, unique_R_number,dR) - + + def __init__(self, H, H_time_var_index, R, index, R_time_var_index, + unique_R_number, dH=None, dR=None): + super(Std_Measurement_Callables_Python, + self).__init__(R, index, R_time_var_index, unique_R_number, dR) + self.H = H - self.H_time_var_index = H_time_var_index + self.H_time_var_index = int(H_time_var_index) self.dH = dH + def f_h(self, k, m, H): """ function (k, x_{k}, H_{k}). Measurement function. k (iteration number), starts at 0 - x_{k} state - H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. + x_{k} state + H_{k} Jacobian matrices of f_h. In the linear case it is exactly + H_{k}. """ return np.dot(H, m) - def Hk(self, k,m_pred,P_pred): # returns state iteration matrix + def Hk(self, k, m_pred, P_pred): # returns state iteration matrix """ function (k, m, P) return Jacobian of measurement function, it is passed into p_h. @@ -289,105 +305,112 @@ class Std_Measurement_Callables_Python(R_handling_Class): P: parameter for Jacobian, usually covariance matrix. """ - return self.H[:,:, self.index[self.H_time_var_index, k]] - - def dHk(self,k): + return self.H[:, :, self.index[self.H_time_var_index, k]] + + def dHk(self, k): if self.dH is None: raise ValueError("dH derivative is None") - - return self.dH # the same dirivative on each iteration + + return self.dH # the same dirivative on each iteration if use_cython: - Std_Measurement_Callables_Class = state_space_cython.Std_Measurement_Callables_Cython + Std_Measurement_Callables_Class = state_space_cython.\ + Std_Measurement_Callables_Cython else: Std_Measurement_Callables_Class = Std_Measurement_Callables_Python - + + class Q_handling_Python(Dynamic_Callables_Class): - - def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ = None): + + def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ=None): """ - Input: + Input: --------------- R - array with noise on various steps. The result of preprocessing the noise input. - + index - for each step of Kalman filter contains the corresponding index in the array. - - R_time_var_index - another index in the array R. Computed earlier and passed here. - - unique_R_number - number of unique noise matrices below which square roots - are cached and above which they are computed each time. - + + R_time_var_index - another index in the array R. Computed earlier and + passed here. + + unique_R_number - number of unique noise matrices below which square + roots are cached and above which they are computed each time. + dQ: 3D array[:, :, param_num] - derivative of Q. Derivative is supported only when Q do not change over time - + derivative of Q. Derivative is supported only when Q do not + change over time + Output: -------------- Object which has two necessary functions: f_R(k) inv_R_square_root(k) """ - + self.Q = Q self.index = index self.Q_time_var_index = Q_time_var_index - self.dQ = dQ - + self.dQ = dQ + if (len(np.unique(index)) > unique_Q_number): self.svd_each_time = True else: self.svd_each_time = False - + self.Q_square_root = {} - - - def Qk(self,k): + + def Qk(self, k): """ function (k). Returns noise matrix of dynamic model on iteration k. k (iteration number). starts at 0 """ - return self.Q[:,:, self.index[self.Q_time_var_index, k]] - - def dQk(self,k): + return self.Q[:, :, self.index[self.Q_time_var_index, k]] + + def dQk(self, k): if self.dQ is None: raise ValueError("dQ derivative is None") - - return self.dQ # the same dirivative on each iteration - - def Q_srk(self,k): + + return self.dQ # the same dirivative on each iteration + + def Q_srk(self, k): """ - function (k). Returns the square root of noise matrix of dynamic model on iteration k. - k (iteration number). starts at 0 - + function (k). Returns the square root of noise matrix of dynamic model + on iteration k. + k (iteration number). starts at 0 + This function is implemented to use SVD prediction step. """ ind = self.index[self.Q_time_var_index, k] - Q = self.Q[:,:, ind] - - - if (Q.shape[0] == 1): # 1-D case handle simplier. No storage - # of the result, just compute it each time. - square_root = np.sqrt( Q ) + Q = self.Q[:, :, ind] + + if (Q.shape[0] == 1): # 1-D case handle simplier. No storage + # of the result, just compute it each time. + square_root = np.sqrt(Q) else: if self.svd_each_time: - - (U,S,Vh) = sp.linalg.svd( Q,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - + + (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True) + square_root = U * np.sqrt(S) else: - + if ind in self.Q_square_root: square_root = self.Q_square_root[ind] else: - (U,S,Vh) = sp.linalg.svd( Q,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - + (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True) + square_root = U * np.sqrt(S) - + self.Q_square_root[ind] = square_root - + return square_root if use_cython: @@ -395,26 +418,29 @@ if use_cython: else: Q_handling_Class = Q_handling_Python + class Std_Dynamic_Callables_Python(Q_handling_Class): - - def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index, unique_Q_number, dA = None, dQ=None): - super(Std_Dynamic_Callables_Python,self).__init__(Q, index, Q_time_var_index, unique_Q_number,dQ) - + + def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index, + unique_Q_number, dA=None, dQ=None): + super(Std_Dynamic_Callables_Python, + self).__init__(Q, index, Q_time_var_index, unique_Q_number, dQ) + self.A = A self.A_time_var_index = A_time_var_index self.dA = dA - + def f_a(self, k, m, A): """ - f_a: function (k, x_{k-1}, A_{k}). Dynamic function. + f_a: function (k, x_{k-1}, A_{k}). Dynamic function. k (iteration number), starts at 0 x_{k-1} State from the previous step - A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + A_{k} Jacobian matrices of f_a. In the linear case it is exactly + A_{k}. """ + return np.dot(A, m) - return np.dot(A,m) - - def Ak(self, k,m_pred,P_pred): # returns state iteration matrix + def Ak(self, k, m_pred, P_pred): # returns state iteration matrix """ function (k, m, P) return Jacobian of measurement function, it is passed into p_h. @@ -423,29 +449,31 @@ class Std_Dynamic_Callables_Python(Q_handling_Class): P: parameter for Jacobian, usually covariance matrix. """ - return self.A[:,:, self.index[self.A_time_var_index, k]] - - def dAk(self,k): + return self.A[:, :, self.index[self.A_time_var_index, k]] + + def dAk(self, k): if self.dA is None: raise ValueError("dA derivative is None") - - return self.dA # the same dirivative on each iteration - - def reset(self,compute_derivatives = False): + + return self.dA # the same dirivative on each iteration + + def reset(self, compute_derivatives=False): """ - Return the state of this object to the beginning of iteration (to k eq. 0) - """ - + Return the state of this object to the beginning of iteration + (to k eq. 0) + """ + return self - + if use_cython: - Std_Dynamic_Callables_Class = state_space_cython.Std_Dynamic_Callables_Cython + Std_Dynamic_Callables_Class = state_space_cython.\ + Std_Dynamic_Callables_Cython else: Std_Dynamic_Callables_Class = Std_Dynamic_Callables_Python class AddMethodToClass(object): - + def __init__(self, func=None, tp='staticmethod'): """ Input: @@ -453,47 +481,56 @@ class AddMethodToClass(object): func: function to add tp: string Type of the method: normal, staticmethod, classmethod - """ + """ if func is None: raise ValueError("Function can not be None") - + self.func = func self.tp = tp - - def __get__(self, obj, klass=None,*args, **kwargs): - + + def __get__(self, obj, klass=None, *args, **kwargs): + if self.tp == 'staticmethod': return self.func elif self.tp == 'normal': def newfunc(obj, *args, **kwargs): - return self.func - - elif self.tp == 'classmethod': + return self.func + + elif self.tp == 'classmethod': def newfunc(klass, *args, **kwargs): - return self.func + return self.func return newfunc - + + class DescreteStateSpaceMeta(type): """ Substitute necessary methods from cython. """ - + def __new__(typeclass, name, bases, attributes): """ After thos method the class object is created """ - + if use_cython: if '_kalman_prediction_step_SVD' in attributes: - attributes['_kalman_prediction_step_SVD'] = AddMethodToClass(state_space_cython._kalman_prediction_step_SVD_Cython) - - if '_kalman_update_step_SVD' in attributes: - attributes['_kalman_update_step_SVD'] = AddMethodToClass(state_space_cython._kalman_update_step_SVD_Cython) - + attributes['_kalman_prediction_step_SVD'] =\ + AddMethodToClass(state_space_cython. + _kalman_prediction_step_SVD_Cython) + + if '_kalman_update_step_SVD' in attributes: + attributes['_kalman_update_step_SVD'] =\ + AddMethodToClass(state_space_cython. + _kalman_update_step_SVD_Cython) + if '_cont_discr_kalman_filter_raw' in attributes: - attributes['_cont_discr_kalman_filter_raw'] = AddMethodToClass(state_space_cython._cont_discr_kalman_filter_raw_Cython) - - return super(DescreteStateSpaceMeta, typeclass).__new__(typeclass, name, bases, attributes) + attributes['_cont_discr_kalman_filter_raw'] =\ + AddMethodToClass(state_space_cython. + _cont_discr_kalman_filter_raw_Cython) + + return super(DescreteStateSpaceMeta, + typeclass).__new__(typeclass, name, bases, attributes) + class DescreteStateSpace(object): """ @@ -502,7 +539,7 @@ class DescreteStateSpace(object): Linear models are: x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - + Nonlinear: x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) @@ -510,63 +547,68 @@ class DescreteStateSpace(object): x_{k} (state value on certain iteration), A_{k} and H_{k} - Jacobian matrices of f_a and f_h respectively. In the linear case they are exactly A_{k} and H_{k}. - - + + Currently two nonlinear Gaussian filter algorithms are implemented: Extended Kalman Filter (EKF), Statistically linearized Filter (SLF), which implementations are very similar. - + """ __metaclass__ = DescreteStateSpaceMeta - + @staticmethod def _reshape_input_data(shape, desired_dim=3): """ Static function returns the column-wise shape for for an input shape. - + Input: -------------- shape: tuple Shape of an input array, so that it is always a column. - + desired_dim: int - desired shape of output. For Y data it should be 3 + desired shape of output. For Y data it should be 3 (sample_no, dimension, ts_no). For X data - 2 (sample_no, 1) Output: -------------- new_shape: tuple - New shape of the measurements array. Idea is that samples are + New shape of the measurements array. Idea is that samples are along dimension 0, sample dimension - dimension 1, different time series - dimension 2. old_shape: tuple or None - If the shape has been modified, return old shape, otherwise None + If the shape has been modified, return old shape, otherwise + None. """ - - + if (len(shape) > 3): - raise ValueError("Input array is not supposed to be more than 3 dimensional.") - + raise ValueError("""Input array is not supposed to be more + than 3 dimensional.""") + if (len(shape) > desired_dim): - raise ValueError("Input array shape is more than desired shape.") + raise ValueError("Input array shape is more than desired shape.") elif len(shape) == 1: - if (desired_dim==3): - return ((shape[0],1,1), shape) # last dimension is the time serime_series_no - elif (desired_dim==2): - return ((shape[0],1), shape) - + if (desired_dim == 3): + return ((shape[0], 1, 1), shape) # last dimension is the + # time serime_series_no + elif (desired_dim == 2): + return ((shape[0], 1), shape) + elif len(shape) == 2: - if (desired_dim==3): - return ((shape[1],1,1), shape) if (shape[0] == 1) else ( (shape[0],shape[1],1), shape) # convert to column vector - elif (desired_dim==2): - return ((shape[1],1), shape) if (shape[0] == 1) else ( (shape[0],shape[1]), None) # convert to column vector - - else: # len(shape) == 3 - return (shape,None) # do nothing - + if (desired_dim == 3): + return ((shape[1], 1, 1), shape) if (shape[0] == 1) else\ + ((shape[0], shape[1], 1), shape) # convert to column + # vector + elif (desired_dim == 2): + return ((shape[1], 1), shape) if (shape[0] == 1) else\ + ((shape[0], shape[1]), None) # convert to column vector + + else: # len(shape) == 3 + return (shape, None) # do nothing + @classmethod - def kalman_filter(cls,p_A, p_Q, p_H, p_R, Y, index = None, m_init=None, + def kalman_filter(cls, p_A, p_Q, p_H, p_R, Y, index=None, m_init=None, P_init=None, p_kalman_filter_type='regular', - calc_log_likelihood=False, + calc_log_likelihood=False, calc_grad_log_likelihood=False, grad_params_no=None, grad_calc_params=None): """ @@ -574,20 +616,20 @@ class DescreteStateSpace(object): These notations for the State-Space model are assumed: x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + Current Features: ---------------------------------------- - 1) The function generaly do not modify the passed parameters. If + 1) The function generaly do not modify the passed parameters. If it happens then it is an error. There are several exeprions: scalars - can be modified into a matrix, in some rare cases shapes of + can be modified into a matrix, in some rare cases shapes of the derivatives matrices may be changed, it is ignored for now. - + 2) Copies of p_A, p_Q, index are created in memory to be used later in smoother. References to copies are kept in "matrs_for_smoother" return parameter. - + 3) Function support "multiple time series mode" which means that exactly the same State-Space model is used to filter several sets of measurements. In this case third dimension of Y should include these state-space measurements @@ -598,117 +640,117 @@ class DescreteStateSpace(object): 5) Measurement may include missing values. In this case update step is not done for this measurement. (later may be changed) - - Input: + + Input: ----------------- - + p_A: scalar, square matrix, 3D array A_{k} in the model. If matrix then A_{k} = A - constant. - If it is 3D array then A_{k} = p_A[:,:, index[0,k]] - + If it is 3D array then A_{k} = p_A[:,:, index[0,k]] + p_Q: scalar, square symmetric matrix, 3D array Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant. If it is 3D array then Q_{k-1} = p_Q[:,:, index[1,k]] - + p_H: scalar, matrix (measurement_dim, state_dim) , 3D array H_{k} in the model. If matrix then H_{k} = H - constant. If it is 3D array then H_{k} = p_Q[:,:, index[2,k]] - + p_R: scalar, square symmetric matrix, 3D array R_{k} in the model. If matrix then R_{k} = R - constant. If it is 3D array then R_{k} = p_R[:,:, index[3,k]] - + Y: matrix or vector or 3D array Data. If Y is matrix then samples are along 0-th dimension and - features along the 1-st. If 3D array then third dimension + features along the 1-st. If 3D array then third dimension correspond to "multiple time series mode". - + index: vector Which indices (on 3-rd dimension) from arrays p_A, p_Q,p_H, p_R to use - on every time step. If this parameter is None then it is assumed + on every time step. If this parameter is None then it is assumed that p_A, p_Q, p_H, p_R do not change over time and indices are not needed. index[0,:] - correspond to A, index[1,:] - correspond to Q index[2,:] - correspond to H, index[3,:] - correspond to R. If index.shape[0] == 1, it is assumed that indides for all matrices are the same. - + m_init: vector or matrix Initial distribution mean. If None it is assumed to be zero. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. In regular case ("one time series mode") it is a vector. - + P_init: square symmetric matrix or scalar Initial covariance of the states. If the parameter is scalar - then it is assumed that initial covariance matrix is unit matrix + then it is assumed that initial covariance matrix is unit matrix multiplied by this scalar. If None the unit matrix is used instead. "multiple time series mode" does not affect it, since it does not affect anything related to state variaces. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then "grad_calc_params" parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then "grad_calc_params" parameter must provide the extra parameters for gradient calculation. - + grad_params_no: int - If previous parameter is true, then this parameters gives the - total number of parameters in the gradient. - + If previous parameter is true, then this parameters gives the + total number of parameters in the gradient. + grad_calc_params: dictionary - Dictionary with derivatives of model matrices with respect + Dictionary with derivatives of model matrices with respect to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init". They can be None, in this case zero matrices (no dependence on parameters) is assumed. If there is only one parameter then third dimension is automatically added. - + Output: -------------- - + M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array Filter estimates of the state means. In the extra step the initial value is included. In the "multiple time series mode" third dimension correspond to different timeseries. - + P: (no_steps+1, state_dim, state_dim) 3D array Filter estimates of the state covariances. In the extra step the initial value is included. - + log_likelihood: double or (1, time_series_no) 3D array. If the parameter calc_log_likelihood was set to true, return logarithm of marginal likelihood of the state-space model. If the parameter was false, return None. In the "multiple time series mode" it is a vector providing log_likelihood for each time series. - + grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix If calc_grad_log_likelihood is true, return gradient of log likelihood - with respect to parameters. It returns it column wise, so in - "multiple time series mode" gradients for each time series is in the + with respect to parameters. It returns it column wise, so in + "multiple time series mode" gradients for each time series is in the corresponding column. - + matrs_for_smoother: dict Dictionary with model functions for smoother. The intrinsic model functions are computed in this functions and they are returned to use in smoother for convenience. They are: 'p_a', 'p_f_A', 'p_f_Q' The dictionary contains the same fields. """ - + #import pdb; pdb.set_trace() - + # Parameters checking -> - # index + # index p_A = np.atleast_1d(p_A) p_Q = np.atleast_1d(p_Q) p_H = np.atleast_1d(p_H) p_R = np.atleast_1d(p_R) - + # Reshape and check measurements: Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) measurement_dim = Y.shape[1] time_series_no = Y.shape[2] # multiple time series mode - + if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\ ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\ ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ @@ -716,7 +758,7 @@ class DescreteStateSpace(object): model_matrices_chage_with_time = True else: model_matrices_chage_with_time = False - + # Check index old_index_shape = None if index is None: @@ -729,19 +771,19 @@ class DescreteStateSpace(object): if len(index.shape) == 1: index.shape = (1,index.shape[0]) old_index_shape = (index.shape[0],) - - if (index.shape[1] != Y.shape[0]): + + if (index.shape[1] != Y.shape[0]): raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}") - - if (index.shape[0] == 1): - A_time_var_index = 0; Q_time_var_index = 0 - H_time_var_index = 0; R_time_var_index = 0 + + if (index.shape[0] == 1): + A_time_var_index = 0; Q_time_var_index = 0 + H_time_var_index = 0; R_time_var_index = 0 elif (index.shape[0] == 4): - A_time_var_index = 0; Q_time_var_index = 1 + A_time_var_index = 0; Q_time_var_index = 1 H_time_var_index = 2; R_time_var_index = 3 else: raise ValueError("First Dimension of index must be either 1 or 4.") - + state_dim = p_A.shape[0] # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension # correspond to change in time. @@ -749,35 +791,35 @@ class DescreteStateSpace(object): (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q') (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') - + # m_init if m_init is None: m_init = np.zeros((state_dim, time_series_no)) else: m_init = np.atleast_2d(m_init).T - + # P_init if P_init is None: - P_init = np.eye(state_dim) + P_init = np.eye(state_dim) elif not isinstance(P_init, collections.Iterable): #scalar P_init = P_init*np.eye(state_dim) - + if p_kalman_filter_type not in ('regular', 'svd'): raise ValueError("Kalman filer type neither 'regular nor 'svd'.") - + # Functions to pass to the kalman_filter algorithm: # Parameters: # k - number of Kalman filter iteration # m - vector for calculating matrices. Required for EKF. Not used here. - + c_p_A = p_A.copy() # create a copy because this object is passed to the smoother c_p_Q = p_Q.copy() # create a copy because this object is passed to the smoother c_index = index.copy() # create a copy because this object is passed to the smoother - + if calc_grad_log_likelihood: if model_matrices_chage_with_time: - raise ValueError("When computing likelihood gradient A and Q can not change over time.") - + raise ValueError("When computing likelihood gradient A and Q can not change over time.") + dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA') dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ') dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') @@ -787,7 +829,7 @@ class DescreteStateSpace(object): if dm_init is None: # multiple time series mode. Keep grad_params always as a last dimension dm_init = np.zeros((state_dim, time_series_no, grad_params_no)) - + dP_init = grad_calc_params.get('dP_init') if dP_init is None: dP_init = np.zeros((state_dim,state_dim,grad_params_no)) @@ -798,227 +840,227 @@ class DescreteStateSpace(object): dR = None dm_init = None dP_init = None - + dynamic_callables = Std_Dynamic_Callables_Class(c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ) measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) - + (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) = \ cls._kalman_algorithm_raw(state_dim, dynamic_callables, measurement_callables, Y, m_init, P_init, p_kalman_filter_type = p_kalman_filter_type, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, dm_init=dm_init, dP_init=dP_init) - + # restore shapes so that input parameters are unchenged if old_index_shape is not None: index.shape = old_index_shape - + if old_Y_shape is not None: Y.shape = old_Y_shape - + if old_A_shape is not None: p_A.shape = old_A_shape - + if old_Q_shape is not None: p_Q.shape = old_Q_shape - + if old_H_shape is not None: p_H.shape = old_H_shape - + if old_R_shape is not None: p_R.shape = old_R_shape # Return values - + return (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) - - @classmethod + + @classmethod def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None, P_init=None,calc_log_likelihood=False): - + """ - Extended Kalman Filter - - Input: + Extended Kalman Filter + + Input: ----------------- - + p_state_dim: integer - + p_a: if None - the function from the linear model is assumed. No non- linearity in the dynamic is assumed. - + function (k, x_{k-1}, A_{k}). Dynamic function. - k: (iteration number), + k: (iteration number), x_{k-1}: (previous state) - x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}. - + x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + p_f_A: matrix - in this case function which returns this matrix is assumed. Look at this parameter description in kalman_filter function. - + function (k, m, P) return Jacobian of dynamic function, it is passed into p_a. - + k: (iteration number), m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - + p_f_Q: matrix. In this case function which returns this matrix is asumed. Look at this parameter description in kalman_filter function. - + function (k). Returns noise matrix of dynamic model on iteration k. - k: (iteration number). - + k: (iteration number). + p_h: if None - the function from the linear measurement model is assumed. No nonlinearity in the measurement is assumed. - + function (k, x_{k}, H_{k}). Measurement function. k: (iteration number), x_{k}: (current state) H_{k}: Jacobian matrices of f_h. In the linear case it is exactly H_{k}. - + p_f_H: matrix - in this case function which returns this matrix is assumed. function (k, m, P) return Jacobian of dynamic function, it is passed into p_h. k: (iteration number), m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - + p_f_R: matrix. In this case function which returns this matrix is asumed. - function (k). Returns noise matrix of measurement equation + function (k). Returns noise matrix of measurement equation on iteration k. - k: (iteration number). - + k: (iteration number). + Y: matrix or vector Data. If Y is matrix then samples are along 0-th dimension and - features along the 1-st. May have missing values. - + features along the 1-st. May have missing values. + p_mean: vector Initial distribution mean. If None it is assumed to be zero - + P_init: square symmetric matrix or scalar Initial covariance of the states. If the parameter is scalar - then it is assumed that initial covariance matrix is unit matrix + then it is assumed that initial covariance matrix is unit matrix multiplied by this scalar. If None the unit matrix is used instead. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. """ - + # Y Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) - + # m_init if m_init is None: m_init = np.zeros((p_state_dim,1)) else: m_init = np.atleast_2d(m_init).T - + # P_init if P_init is None: - P_init = np.eye(p_state_dim) + P_init = np.eye(p_state_dim) elif not isinstance(P_init, collections.Iterable): #scalar P_init = P_init*np.eye(p_state_dim) - + if p_a is None: p_a = lambda k,m,A: np.dot(A, m) - + old_A_shape = None if not isinstance(p_f_A, types.FunctionType): # not a function but array p_f_A = np.atleast_1d(p_f_A) (p_A, old_A_shape) = cls._check_A_matrix(p_f_A) - - p_f_A = lambda k, m, P: p_A[:,:, 0] # make function + + p_f_A = lambda k, m, P: p_A[:,:, 0] # make function else: if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]: raise ValueError("p_f_A function returns matrix of wrong size") - - old_Q_shape = None + + old_Q_shape = None if not isinstance(p_f_Q, types.FunctionType): # not a function but array p_f_Q = np.atleast_1d(p_f_Q) (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q) - - p_f_Q = lambda k: p_Q[:,:, 0] # make function + + p_f_Q = lambda k: p_Q[:,:, 0] # make function else: if p_f_Q(1).shape[0] != m_init.shape[0]: - raise ValueError("p_f_Q function returns matrix of wrong size") - + raise ValueError("p_f_Q function returns matrix of wrong size") + if p_h is None: lambda k,m,H: np.dot(H, m) - + old_H_shape = None if not isinstance(p_f_H, types.FunctionType): # not a function but array p_f_H = np.atleast_1d(p_f_H) (p_H, old_H_shape) = cls._check_H_matrix(p_f_H) - - p_f_H = lambda k, m, P: p_H # make function + + p_f_H = lambda k, m, P: p_H # make function else: if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]: - raise ValueError("p_f_H function returns matrix of wrong size") + raise ValueError("p_f_H function returns matrix of wrong size") old_R_shape = None if not isinstance(p_f_R, types.FunctionType): # not a function but array p_f_R = np.atleast_1d(p_f_R) (p_R, old_R_shape) = cls._check_H_matrix(p_f_R) - - p_f_R = lambda k: p_R # make function + + p_f_R = lambda k: p_R # make function else: if p_f_R(1).shape[0] != m_init.shape[0]: raise ValueError("p_f_R function returns matrix of wrong size") - + # class dynamic_callables_class(Dynamic_Model_Callables): -# -# Ak = -# Qk = - - +# +# Ak = +# Qk = + + class measurement_callables_class(R_handling_Class): def __init__(self,R, index, R_time_var_index, unique_R_number): super(measurement_callables_class,self).__init__(R, index, R_time_var_index, unique_R_number) - + Hk = AddMethodToClass(f_H) f_h = AddMethodToClass(f_hl) - - + + (M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init, - P_init, calc_log_likelihood, + P_init, calc_log_likelihood, calc_grad_log_likelihood=False, grad_calc_params=None) - + if old_Y_shape is not None: Y.shape = old_Y_shape - + if old_A_shape is not None: p_A.shape = old_A_shape - + if old_Q_shape is not None: p_Q.shape = old_Q_shape - + if old_H_shape is not None: p_H.shape = old_H_shape - + if old_R_shape is not None: p_R.shape = old_R_shape - - return (M, P) - + + return (M, P) + @classmethod def _kalman_algorithm_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, Y, m_init, P_init, p_kalman_filter_type='regular', - calc_log_likelihood=False, + calc_log_likelihood=False, calc_grad_log_likelihood=False, grad_params_no=None, dm_init=None, dP_init=None): """ - General nonlinear filtering algorithm for inference in the state-space + General nonlinear filtering algorithm for inference in the state-space model: x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + Current Features: ---------------------------------------- - + 1) Function support "multiple time series mode" which means that exactly the same State-Space model is used to filter several sets of measurements. In this case third dimension of Y should include these state-space measurements @@ -1026,101 +1068,101 @@ class DescreteStateSpace(object): 2) Measurement may include missing values. In this case update step is not done for this measurement. (later may be changed) - - Input: + + Input: ----------------- state_dim: int Demensionality of the states - - p_a: function (k, x_{k-1}, A_{k}). Dynamic function. - k (iteration number), - x_{k-1} + + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + k (iteration number), + x_{k-1} A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. - + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is passed into p_a. k (iteration number), m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - + p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. - k (iteration number). - + k (iteration number). + p_h: function (k, x_{k}, H_{k}). Measurement function. k (iteration number), x_{k} H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. - + p_f_H: function (k, m, P) return Jacobian of dynamic function, it is passed into p_h. k (iteration number), m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - - p_f_R: function (k). Returns noise matrix of measurement equation + + p_f_R: function (k). Returns noise matrix of measurement equation on iteration k. - k (iteration number). - + k (iteration number). + Y: matrix or vector or 3D array Data. If Y is matrix then samples are along 0-th dimension and - features along the 1-st. If 3D array then third dimension + features along the 1-st. If 3D array then third dimension correspond to "multiple time series mode". - + m_init: vector or matrix - Initial distribution mean. For "multiple time series mode" + Initial distribution mean. For "multiple time series mode" it is matrix, second dimension of which correspond to different - time series. In regular case ("one time series mode") it is a + time series. In regular case ("one time series mode") it is a vector. - + P_init: matrix or scalar Initial covariance of the states. Must be not None "multiple time series mode" does not affect it, since it does not affect anything related to state variaces. - + p_kalman_filter_type: string - - + + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + grad_calc_params: dictionary - Dictionary with derivatives of model matrices with respect + Dictionary with derivatives of model matrices with respect to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init". - + Output: -------------- - + M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array Filter estimates of the state means. In the extra step the initial value is included. In the "multiple time series mode" third dimension correspond to different timeseries. - + P: (no_steps+1, state_dim, state_dim) 3D array Filter estimates of the state covariances. In the extra step the initial value is included. - + log_likelihood: double or (1, time_series_no) 3D array. If the parameter calc_log_likelihood was set to true, return logarithm of marginal likelihood of the state-space model. If the parameter was false, return None. In the "multiple time series mode" it is a vector providing log_likelihood for each time series. - + grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix If calc_grad_log_likelihood is true, return gradient of log likelihood - with respect to parameters. It returns it column wise, so in - "multiple time series mode" gradients for each time series is in the + with respect to parameters. It returns it column wise, so in + "multiple time series mode" gradients for each time series is in the corresponding column. - + """ - + steps_no = Y.shape[0] # number of steps in the Kalman Filter time_series_no = Y.shape[2] # multiple time series mode - + # Allocate space for results # Mean estimations. Initial values will be included M = np.empty(((steps_no+1),state_dim,time_series_no)) @@ -1129,16 +1171,16 @@ class DescreteStateSpace(object): P = np.empty(((steps_no+1),state_dim,state_dim)) P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful P[0,:,:] = P_init # Initialize initial covariance matrix - + if p_kalman_filter_type == 'svd': - (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, + (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, overwrite_a=False,check_finite=True) S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance P_upd = (P_init, S,U) - + log_likelihood = 0 if calc_log_likelihood else None grad_log_likelihood = 0 if calc_grad_log_likelihood else None - + #setting initial values for derivatives update dm_upd = dm_init dP_upd = dP_init @@ -1146,47 +1188,47 @@ class DescreteStateSpace(object): for k in range(0,steps_no): # In this loop index for new estimations is (k+1), old - (k) # This happened because initial values are stored at 0-th index. - + prev_mean = M[k,:,:] # mean from the previous step - if p_kalman_filter_type == 'svd': + if p_kalman_filter_type == 'svd': m_pred, P_pred, dm_pred, dP_pred = \ cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, - calc_grad_log_likelihood=calc_grad_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_upd, p_dP = dP_upd) else: m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_upd, p_dP = dP_upd ) - + cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_upd, p_dP = dP_upd ) + k_measurment = Y[k,:,:] - + if (np.any(np.isnan(k_measurment)) == False): - if p_kalman_filter_type == 'svd': + if p_kalman_filter_type == 'svd': m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, - k_measurment, calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, + cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, + k_measurment, calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_pred, p_dP = dP_pred ) - - + + # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, - # calc_log_likelihood=calc_log_likelihood, - # calc_grad_log_likelihood=calc_grad_log_likelihood, + # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, + # calc_log_likelihood=calc_log_likelihood, + # calc_grad_log_likelihood=calc_grad_log_likelihood, # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) - # - # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, + # + # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, # overwrite_a=False,check_finite=True) # P_upd = (P_upd, S,U) else: m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, + cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_pred, p_dP = dP_pred ) - + else: # if k_measurment.shape != (1,1): # raise ValueError("Nan measurements are currently not supported for \ @@ -1195,7 +1237,7 @@ class DescreteStateSpace(object): # m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred # log_likelihood_update = 0.0; # d_log_likelihood_update = 0.0; - + if not np.all(np.isnan(k_measurment)): raise ValueError("""Nan measurements are currently not supported if they are intermixed with not NaN measurements""") @@ -1205,268 +1247,268 @@ class DescreteStateSpace(object): log_likelihood_update = np.zeros((time_series_no,)) if calc_grad_log_likelihood: d_log_likelihood_update = np.zeros((grad_params_no,time_series_no)) - - + + if calc_log_likelihood: log_likelihood += log_likelihood_update - + if calc_grad_log_likelihood: grad_log_likelihood += d_log_likelihood_update - + M[k+1,:,:] = m_upd # separate mean value for each time series - - if p_kalman_filter_type == 'svd': + + if p_kalman_filter_type == 'svd': P[k+1,:,:] = P_upd[0] else: P[k+1,:,:] = P_upd - + # !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print iteration time base on another boolean variable return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) - - @staticmethod - def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, + + @staticmethod + def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, p_dm = None, p_dP = None): """ - Desctrete prediction function - + Desctrete prediction function + Input: k:int - Iteration No. Starts at 0. Total number of iterations equal to the + Iteration No. Starts at 0. Total number of iterations equal to the number of measurements. - + p_m: matrix of size (state_dim, time_series_no) - Mean value from the previous step. For "multiple time series mode" + Mean value from the previous step. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + p_P: Covariance matrix from the previous step. - + p_dyn_model_callable: class - - + + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + p_dm: 3D array (state_dim, time_series_no, parameters_no) - Mean derivatives from the previous step. For "multiple time series mode" + Mean derivatives from the previous step. For "multiple time series mode" it is 3D array, second dimension of which correspond to different time series. - + p_dP: 3D array (state_dim, state_dim, parameters_no) Mean derivatives from the previous step - + Output: ---------------------------- m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects - Results of the prediction steps. - + Results of the prediction steps. + """ - + # index correspond to values from previous iteration. A = p_dyn_model_callable.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) - Q = p_dyn_model_callable.Qk(k) # state noise matrix - + Q = p_dyn_model_callable.Qk(k) # state noise matrix + # Prediction step -> m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance # Prediction step <- - + if calc_grad_log_likelihood: - dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters + dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters - + param_number = p_dP.shape[2] - + # p_dm, p_dP - derivatives form the previoius step dm_pred = np.empty(p_dm.shape) dP_pred = np.empty(p_dP.shape) - + for j in range(param_number): dA = dA_all_params[:,:,j] dQ = dQ_all_params[:,:,j] - + dP = p_dP[:,:,j] dm = p_dm[:,:,j] dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm) # prediction step derivatives for current parameter: - + dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T)) - dP_pred[:,:,j] += dP_pred[:,:,j].T + dP_pred[:,:,j] += dP_pred[:,:,j].T dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ - + dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize else: dm_pred = None dP_pred = None - + return m_pred, P_pred, dm_pred, dP_pred - - @staticmethod - def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, + + @staticmethod + def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, p_dm = None, p_dP = None): """ - Desctrete prediction function - + Desctrete prediction function + Input: k:int - Iteration No. Starts at 0. Total number of iterations equal to the + Iteration No. Starts at 0. Total number of iterations equal to the number of measurements. - + p_m: matrix of size (state_dim, time_series_no) - Mean value from the previous step. For "multiple time series mode" + Mean value from the previous step. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + p_P: tuple (Prev_cov, S, V) Covariance matrix from the previous step and its SVD decomposition. - Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V) - + Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V) + p_dyn_model_callable: object - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + p_dm: 3D array (state_dim, time_series_no, parameters_no) - Mean derivatives from the previous step. For "multiple time series mode" + Mean derivatives from the previous step. For "multiple time series mode" it is 3D array, second dimension of which correspond to different time series. - + p_dP: 3D array (state_dim, state_dim, parameters_no) Mean derivatives from the previous step - + Output: ---------------------------- m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects - Results of the prediction steps. - + Results of the prediction steps. + """ - + # covariance from the previous step and its SVD decomposition # p_prev_cov = v * S * V.T - Prev_cov, S_old, V_old = p_P - #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step - + Prev_cov, S_old, V_old = p_P + #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step + # index correspond to values from previous iteration. A = p_dyn_model_callable.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian) Q = p_dyn_model_callable.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step) - Q_sr = p_dyn_model_callable.Q_srk(k) + Q_sr = p_dyn_model_callable.Q_srk(k) # Prediction step -> m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean - + # coavariance prediction have changed: svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) ) - (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True, + (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True, overwrite_a=False,check_finite=True) - + # predicted variance computed by the regular method. For testing #P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q V_new = Vh.T S_new = S**2 - + P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance P_pred = (P_pred, S_new, Vh.T) # Prediction step <- - + # derivatives if calc_grad_log_likelihood: - dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters + dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters - + param_number = p_dP.shape[2] - + # p_dm, p_dP - derivatives form the previoius step dm_pred = np.empty(p_dm.shape) dP_pred = np.empty(p_dP.shape) - + for j in range(param_number): dA = dA_all_params[:,:,j] dQ = dQ_all_params[:,:,j] - + #dP = p_dP[:,:,j] #dm = p_dm[:,:,j] dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j]) # prediction step derivatives for current parameter: - - + + dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T)) - dP_pred[:,:,j] += dP_pred[:,:,j].T + dP_pred[:,:,j] += dP_pred[:,:,j].T dP_pred[:,:,j] += np.dot( A ,np.dot(p_dP[:,:,j], A.T)) + dQ - + dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize else: dm_pred = None dP_pred = None - + return m_pred, P_pred, dm_pred, dP_pred - + @staticmethod - def _kalman_update_step(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, + def _kalman_update_step(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, calc_grad_log_likelihood=False, p_dm = None, p_dP = None): """ Input: - + k: int - Iteration No. Starts at 0. Total number of iterations equal to the + Iteration No. Starts at 0. Total number of iterations equal to the number of measurements. - + m_P: matrix of size (state_dim, time_series_no) - Mean value from the previous step. For "multiple time series mode" + Mean value from the previous step. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + p_P: Covariance matrix from the prediction step. - + p_meas_model_callable: object - + measurement: (measurement_dim, time_series_no) matrix - One measurement used on the current update step. For - "multiple time series mode" it is matrix, second dimension of + One measurement used on the current update step. For + "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + p_dm: 3D array (state_dim, time_series_no, parameters_no) - Mean derivatives from the prediction step. For "multiple time series mode" + Mean derivatives from the prediction step. For "multiple time series mode" it is 3D array, second dimension of which correspond to different time series. - + p_dP: array Covariance derivatives from the prediction step. - + Output: ---------------------------- m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects Results of the prediction steps. - + log_likelihood_update: double or 1D array - Update to the log_likelihood from this step - + Update to the log_likelihood from this step + d_log_likelihood_update: (grad_params_no, time_series_no) matrix Update to the gradient of log_likelihood, "multiple time series mode" adds extra columns to the gradient. - - """ + + """ #import pdb; pdb.set_trace() - + m_pred = p_m # from prediction step - P_pred = p_P # from prediction step - + P_pred = p_P # from prediction step + H = p_meas_model_callable.Hk(k, m_pred, P_pred) R = p_meas_model_callable.Rk(k) - + time_series_no = p_m.shape[1] # number of time serieses log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None @@ -1478,7 +1520,7 @@ class DescreteStateSpace(object): if (S < 0): raise ValueError("Kalman Filter Update: S is negative step %i" % k ) #import pdb; pdb.set_trace() - + K = P_pred.dot(H.T) / S if calc_log_likelihood: log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + @@ -1490,42 +1532,42 @@ class DescreteStateSpace(object): else: LL,islower = linalg.cho_factor(S) K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T - + if calc_log_likelihood: - log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + + log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + 2*np.sum( np.log(np.diag(LL)) ) +\ np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v - + if calc_grad_log_likelihood: - dm_pred_all_params = p_dm # derivativas of the prediction phase + dm_pred_all_params = p_dm # derivativas of the prediction phase dP_pred_all_params = p_dP - + param_number = p_dP.shape[2] - + dH_all_params = p_meas_model_callable.dHk(k) dR_all_params = p_meas_model_callable.dRk(k) - + dm_upd = np.empty(dm_pred_all_params.shape) dP_upd = np.empty(dP_pred_all_params.shape) - + # firts dimension parameter_no, second - time series number d_log_likelihood_update = np.empty((param_number,time_series_no)) for param in range(param_number): - + dH = dH_all_params[:,:,param] dR = dR_all_params[:,:,param] - + dm_pred = dm_pred_all_params[:,:,param] dP_pred = dP_pred_all_params[:,:,param] - + # Terms in the likelihood derivatives - dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) + dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) dS = np.dot(dH, np.dot( P_pred, H.T)) dS += dS.T dS += np.dot(H, np.dot( dP_pred, H.T)) + dR - + # TODO: maybe symmetrize dS - + #dm and dP for the next stem if LL is not None: # the state vector is not a scalar tmp1 = linalg.cho_solve((LL,islower), H).T @@ -1535,17 +1577,17 @@ class DescreteStateSpace(object): tmp1 = H.T / S tmp2 = dH.T / S tmp3 = dS.T / S - + dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ np.dot( P_pred, np.dot( tmp1, tmp3 ) ) - + # terms required for the next step, save this for each parameter dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) - - dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) + + dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) dP_upd[:,:,param] += dP_upd[:,:,param].T dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) - + dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize # computing the likelihood change for each parameter: if LL is not None: # the state vector is not 1D @@ -1554,139 +1596,139 @@ class DescreteStateSpace(object): else: # the state vector is a scalar #tmp4 = dv / S tmp5 = v / S - - + + d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ - np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) - # Before + np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) + # Before #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ - #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) - - - + #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) + + + # Compute the actual updates for mean and variance of the states. m_upd = m_pred + K.dot( v ) - + # Covariance update and ensure it is symmetric P_upd = K.dot(S).dot(K.T) P_upd = 0.5*(P_upd + P_upd.T) P_upd = P_pred - P_upd# this update matrix is symmetric - + return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update - + @staticmethod - def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, + def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, calc_grad_log_likelihood=False, p_dm = None, p_dP = None): """ Input: - + k: int - Iteration No. Starts at 0. Total number of iterations equal to the + Iteration No. Starts at 0. Total number of iterations equal to the number of measurements. - + m_P: matrix of size (state_dim, time_series_no) - Mean value from the previous step. For "multiple time series mode" + Mean value from the previous step. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + p_P: tuple (P_pred, S, V) Covariance matrix from the prediction step and its SVD decomposition. P_pred = V * S * V.T The tuple is (P_pred, S, V) - + p_h: function (k, x_{k}, H_{k}). Measurement function. k (iteration number), starts at 0 - x_{k} state + x_{k} state H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. - + p_f_H: function (k, m, P) return Jacobian of measurement function, it is passed into p_h. k (iteration number), starts at 0 m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - - p_f_R: function (k). Returns noise matrix of measurement equation + + p_f_R: function (k). Returns noise matrix of measurement equation on iteration k. k (iteration number). starts at 0 - - p_f_iRsr: function (k). Returns the square root of the noise matrix of - measurement equation on iteration k. + + p_f_iRsr: function (k). Returns the square root of the noise matrix of + measurement equation on iteration k. k (iteration number). starts at 0 - + measurement: (measurement_dim, time_series_no) matrix - One measurement used on the current update step. For - "multiple time series mode" it is matrix, second dimension of + One measurement used on the current update step. For + "multiple time series mode" it is matrix, second dimension of which correspond to different time series. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + p_dm: 3D array (state_dim, time_series_no, parameters_no) - Mean derivatives from the prediction step. For "multiple time series mode" + Mean derivatives from the prediction step. For "multiple time series mode" it is 3D array, second dimension of which correspond to different time series. - + p_dP: array Covariance derivatives from the prediction step. - + grad_calc_params_2: List or None List with derivatives. The first component is 'f_dH' - function(k) which returns the derivative of H. The second element is 'f_dR' - function(k). Function which returns the derivative of R. - + Output: ---------------------------- m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects Results of the prediction steps. - + log_likelihood_update: double or 1D array - Update to the log_likelihood from this step - + Update to the log_likelihood from this step + d_log_likelihood_update: (grad_params_no, time_series_no) matrix Update to the gradient of log_likelihood, "multiple time series mode" adds extra columns to the gradient. - - """ - + + """ + #import pdb; pdb.set_trace() - + m_pred = p_m # from prediction step - P_pred,S_pred,V_pred = p_P # from prediction step - + P_pred,S_pred,V_pred = p_P # from prediction step + H = p_meas_model_callable.Hk(k, m_pred, P_pred) R = p_meas_model_callable.Rk(k) - R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix - + R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix + time_series_no = p_m.shape[1] # number of time serieses - + log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None # Update step (only if there is data) #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. v = measurement-p_meas_model_callable.f_h(k, m_pred, H) - + svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) ) - - (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True, + + (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True, overwrite_a=False,check_finite=True) - + # P_upd = U_upd S_upd**2 U_upd.T - U_upd = np.dot(V_pred, Vh.T) + U_upd = np.dot(V_pred, Vh.T) S_upd = (1.0/S)**2 - + P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step - + # stil need to compute S and K for derivative computation S = H.dot(P_pred).dot(H.T) + R if measurement.shape[0]==1: # measurements are one dimensional if (S < 0): raise ValueError("Kalman Filter Update: S is negative step %i" % k ) #import pdb; pdb.set_trace() - + K = P_pred.dot(H.T) / S if calc_log_likelihood: log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + @@ -1698,49 +1740,49 @@ class DescreteStateSpace(object): else: LL,islower = linalg.cho_factor(S) K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T - + if calc_log_likelihood: - log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + + log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + 2*np.sum( np.log(np.diag(LL)) ) +\ - np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v - - + np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v + + # Old method of computing updated covariance (for testing) -> #P_upd_tst = K.dot(S).dot(K.T) #P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T) #P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric # Old method of computing updated covariance (for testing) <- - + if calc_grad_log_likelihood: - dm_pred_all_params = p_dm # derivativas of the prediction phase + dm_pred_all_params = p_dm # derivativas of the prediction phase dP_pred_all_params = p_dP - + param_number = p_dP.shape[2] - + dH_all_params = p_meas_model_callable.dHk(k) dR_all_params = p_meas_model_callable.dRk(k) - + dm_upd = np.empty(dm_pred_all_params.shape) dP_upd = np.empty(dP_pred_all_params.shape) - + # firts dimension parameter_no, second - time series number d_log_likelihood_update = np.empty((param_number,time_series_no)) for param in range(param_number): - + dH = dH_all_params[:,:,param] dR = dR_all_params[:,:,param] - + dm_pred = dm_pred_all_params[:,:,param] dP_pred = dP_pred_all_params[:,:,param] - + # Terms in the likelihood derivatives - dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) + dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) dS = np.dot(dH, np.dot( P_pred, H.T)) dS += dS.T dS += np.dot(H, np.dot( dP_pred, H.T)) + dR - + # TODO: maybe symmetrize dS - + #dm and dP for the next stem if LL is not None: # the state vector is not a scalar tmp1 = linalg.cho_solve((LL,islower), H).T @@ -1750,77 +1792,77 @@ class DescreteStateSpace(object): tmp1 = H.T / S tmp2 = dH.T / S tmp3 = dS.T / S - + dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ np.dot( P_pred, np.dot( tmp1, tmp3 ) ) - + # terms required for the next step, save this for each parameter dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) - - dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) + + dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) dP_upd[:,:,param] += dP_upd[:,:,param].T dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) - + dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize # computing the likelihood change for each parameter: if LL is not None: # the state vector is not 1D tmp5 = linalg.cho_solve((LL,islower), v) else: # the state vector is a scalar tmp5 = v / S - - + + d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ - np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) - # Before + np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) + # Before #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ - #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) - + #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) + # Compute the actual updates for mean of the states. Variance update # is computed earlier. m_upd = m_pred + K.dot( v ) - + return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update - + @staticmethod - def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, + def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, p_P_prev_step, p_dynamic_callables): """ Rauch–Tung–Striebel(RTS) update step - + Input: ----------------------------- k: int - Iteration No. Starts at 0. Total number of iterations equal to the + Iteration No. Starts at 0. Total number of iterations equal to the number of measurements. - + p_m: matrix of size (state_dim, time_series_no) Filter mean on step k - + p_P: matrix of size (state_dim,state_dim) Filter Covariance on step k - + p_m_pred: matrix of size (state_dim, time_series_no) Means from the smoother prediction step. - + p_P_pred: Covariance from the smoother prediction step. - + p_m_prev_step - Smoother mean from the previous step. - + Smoother mean from the previous step. + p_P_prev_step: - Smoother covariance from the previous step. - + Smoother covariance from the previous step. + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is passed into p_a. k (iteration number), starts at 0 m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - """ - + """ + A = p_dynamic_callables.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) - + tmp = np.dot( A, p_P.T) if A.shape[0] == 1: # 1D states G = tmp.T / p_P_pred # P[:,:,k] is symmetric @@ -1833,16 +1875,16 @@ class DescreteStateSpace(object): # hence the Cholesky method does not work. res = sp.linalg.lstsq(p_P_pred, tmp) G = res[0].T - + m_upd = p_m + G.dot( p_m_prev_step-p_m_pred ) P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T) - + P_upd = 0.5*(P_upd + P_upd.T) - + return m_upd, P_upd, G - - @classmethod - def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means, + + @classmethod + def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means, filter_covars): """ This function implements Rauch–Tung–Striebel(RTS) smoother algorithm @@ -1850,125 +1892,125 @@ class DescreteStateSpace(object): These notations are the same: x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) + Input: -------------- - - p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. k (iteration number), starts at 0 x_{k-1} State from the previous step A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. - + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is passed into p_a. k (iteration number), starts at 0 m: point where Jacobian is evaluated P: parameter for Jacobian, usually covariance matrix. - + p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. k (iteration number). starts at 0 - - filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array + + filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array Results of the Kalman Filter means estimation. - + filter_covars: (no_steps+1, state_dim, state_dim) 3D array Results of the Kalman Filter covariance estimation. - + Output: ------------- - - M: (no_steps+1, state_dim) matrix + + M: (no_steps+1, state_dim) matrix Smoothed estimates of the state means - + P: (no_steps+1, state_dim, state_dim) 3D array Smoothed estimates of the state covariances """ no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) - + M = np.empty(filter_means.shape) # smoothed means P = np.empty(filter_covars.shape) # smoothed covars - #G = np.empty( (no_steps,state_dim,state_dim) ) # G from the update step of the smoother - + #G = np.empty( (no_steps,state_dim,state_dim) ) # G from the update step of the smoother + M[-1,:] = filter_means[-1,:] P[-1,:,:] = filter_covars[-1,:,:] - for k in range(no_steps-1,-1,-1): - + for k in range(no_steps-1,-1,-1): + m_pred, P_pred, tmp1, tmp2 = \ - cls._kalman_prediction_step(k, filter_means[k,:], - filter_covars[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=False) + cls._kalman_prediction_step(k, filter_means[k,:], + filter_covars[k,:,:], p_dynamic_callables, + calc_grad_log_likelihood=False) p_m = filter_means[k,:] if len(p_m.shape)<2: p_m.shape = (p_m.shape[0],1) - + p_m_prev_step = M[k+1,:] if len(p_m_prev_step.shape)<2: p_m_prev_step.shape = (p_m_prev_step.shape[0],1) - - m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k, - p_m ,filter_covars[k,:,:], + + m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k, + p_m ,filter_covars[k,:,:], m_pred, P_pred, p_m_prev_step ,P[k+1,:,:], p_dynamic_callables) - + M[k,:] = m_upd#np.squeeze(m_upd) P[k,:,:] = P_upd #G[k,:,:] = G_upd.T # store transposed G. # Return values - + return (M, P) #, G) - + @staticmethod def _EM_gradient(A,Q,H,R,m_init,P_init,measurements, M, P, G, dA, dQ, dH, dR, dm_init, dP_init): """ Gradient computation with the EM algorithm. - + Input: ----------------- - + M: Means from the smoother P: Variances from the smoother G: Gains? from the smoother """ import pdb; pdb.set_trace(); - + param_number = dA.shape[-1] d_log_likelihood_update = np.empty((param_number,1)) sample_no = measurements.shape[0] P_1 = P[1:,:,:] # remove 0-th step P_2 = P[0:-1,:,:] # remove 0-th step - + M_1 = M[1:,:] # remove 0-th step M_2 = M[0:-1,:] # remove the last step - + Sigma = np.mean(P_1,axis=0) + np.dot(M_1.T, M_1) / sample_no # Phi = np.mean(P_2,axis=0) + np.dot(M_2.T, M_2) / sample_no # - + B = np.dot( measurements.T, M_1 )/ sample_no C = (sp.einsum( 'ijk,ikl', P_1, G) + np.dot(M_1.T, M_2)) / sample_no # - + # C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) ) # for k in range(P_1.shape[0]): # C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] ) # C1 = C1 / sample_no - + D = np.dot( measurements.T, measurements ) / sample_no - + try: P_init_inv = sp.linalg.inv(P_init) - + if np.max( np.abs(P_init_inv)) > 10e13: compute_P_init_terms = False else: compute_P_init_terms = True except np.linalg.LinAlgError: compute_P_init_terms = False - + try: Q_inv = sp.linalg.inv(Q) - + if np.max( np.abs(Q_inv)) > 10e13: compute_Q_terms = False else: @@ -1978,60 +2020,60 @@ class DescreteStateSpace(object): try: R_inv = sp.linalg.inv(R) - + if np.max( np.abs(R_inv)) > 10e13: compute_R_terms = False else: compute_R_terms = True except np.linalg.LinAlgError: compute_R_terms = False - - + + d_log_likelihood_update = np.zeros((param_number,1)) - for j in range(param_number): + for j in range(param_number): if compute_P_init_terms: d_log_likelihood_update[j,:] -= 0.5 * np.sum(P_init_inv* dP_init[:,:,j].T ) #p #m - + M0_smoothed = M[0]; M0_smoothed.shape = (M0_smoothed.shape[0],1) - tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) ) ) #p #m + tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) ) ) #p #m d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp1.T ) - + tmp2 = sp.outer( dm_init[:,j], M0_smoothed ) tmp2 += tmp2.T d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp2.T ) - + if compute_Q_terms: - + d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv* dQ[:,:,j].T ) #m - - tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m + + tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m tmp1 = np.dot( dQ[:,:,j], np.dot( Q_inv, tmp1) ) d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(Q_inv * tmp1.T) - - tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T; + + tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T; tmp3 = np.dot(dA[:,:,j], np.dot(Phi,A.T)); tmp3 += tmp3.T - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) ) - + d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) ) + if compute_R_terms: - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T ) - - tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T)) + d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T ) + + tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T)) tmp1 = np.dot( dR[:,:,j], np.dot( R_inv, tmp1) ) d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(R_inv * tmp1.T) - - tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T; + + tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T; tmp3 = np.dot(dH[:,:,j], np.dot(Sigma,H.T)); tmp3 += tmp3.T d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv.T * (tmp3 - tmp2) ) - + return d_log_likelihood_update - + @staticmethod def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'): """ Veryfy that on exit the matrix has appropriate shape for the KF algorithm. - + Input: - p_M: matrix + p_M: matrix As it is given for the user state_dim: int State dimensioanlity @@ -2042,22 +2084,22 @@ class DescreteStateSpace(object): Output: --------------- p_M: matrix of the right shape - + old_M_shape: tuple Old Shape """ - + old_M_shape = None - if len(p_M.shape) < 3: # new shape is 3 dimensional + if len(p_M.shape) < 3: # new shape is 3 dimensional old_M_shape = p_M.shape # save shape to restore it on exit if len(p_M.shape) == 2: # matrix p_M.shape = (p_M.shape[0],p_M.shape[1],1) - elif len(p_M.shape) == 1: # scalar but in array already + elif len(p_M.shape) == 1: # scalar but in array already if (p_M.shape[0] != 1): raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which) else: p_M.shape = (1,1,1) - + if (which == 'A') or (which == 'Q'): if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim): raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim)) @@ -2067,41 +2109,41 @@ class DescreteStateSpace(object): if (which == 'R'): if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim): raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim)) - + return (p_M,old_M_shape) - - @staticmethod + + @staticmethod def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'): """ Function checks (mostly check dimensions) matrices for marginal likelihood gradient parameters calculation. It check dA, dQ matrices. - + Input: ------------- - dM: None, scaler or 3D matrix + dM: None, scaler or 3D matrix It is supposed to be (state_dim,state_dim,grad_params_no) matrix. If None then zero matrix is assumed. If scalar then the function checks consistency with "state_dim" and "grad_params_no". - + state_dim: int State dimensionality - + grad_params_no: int How many parrameters of likelihood gradient in total. - + which: string 'dA' or 'dQ' - - + + Output: -------------- - function of (k) which returns the parameters matrix. - - """ - - + function of (k) which returns the parameters matrix. + + """ + + if dM is None: - dM=np.zeros((state_dim,state_dim,grad_params_no)) + dM=np.zeros((state_dim,state_dim,grad_params_no)) elif isinstance(dM, np.ndarray): if state_dim == 1: if len(dM.shape) < 3: @@ -2114,54 +2156,54 @@ class DescreteStateSpace(object): raise ValueError("When computing likelihood gradient wrong %s dimension." % which) else: dM = np.ones((1,1,1)) * dM - + # if not isinstance(dM, types.FunctionType): # f_dM = lambda k: dM # else: # f_dM = dM - + return dM - - - @staticmethod + + + @staticmethod def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'): """ Function checks (mostly check dimensions) matrices for marginal likelihood gradient parameters calculation. It check dH, dR matrices. - + Input: ------------- - dM: None, scaler or 3D matrix - It is supposed to be + dM: None, scaler or 3D matrix + It is supposed to be (measurement_dim ,state_dim,grad_params_no) for "dH" matrix. (measurement_dim,measurement_dim,grad_params_no) for "dR" - + If None then zero matrix is assumed. If scalar then the function checks consistency with "state_dim" and "grad_params_no". - + state_dim: int State dimensionality - + grad_params_no: int How many parrameters of likelihood gradient in total. - + measurement_dim: int Dimensionality of measurements. - + which: string 'dH' or 'dR' - - + + Output: -------------- - function of (k) which returns the parameters matrix. - """ - + function of (k) which returns the parameters matrix. + """ + if dM is None: if which == 'dH': dM=np.zeros((measurement_dim ,state_dim,grad_params_no)) elif which == 'dR': - dM=np.zeros((measurement_dim,measurement_dim,grad_params_no)) + dM=np.zeros((measurement_dim,measurement_dim,grad_params_no)) elif isinstance(dM, np.ndarray): if state_dim == 1: if len(dM.shape) < 3: @@ -2171,22 +2213,22 @@ class DescreteStateSpace(object): if which == 'dH': dM.shape = (measurement_dim,state_dim,1) elif which == 'dR': - dM.shape = (measurement_dim,measurement_dim,1) + dM.shape = (measurement_dim,measurement_dim,1) elif isinstance(dM, np.int): if state_dim > 1: raise ValueError("When computing likelihood gradient wrong dH dimension.") else: dM = np.ones((1,1,1)) * dM - + # if not isinstance(dM, types.FunctionType): # f_dM = lambda k: dM # else: # f_dM = dM - + return dM - - - + + + class Struct(object): pass @@ -2194,30 +2236,30 @@ class ContDescrStateSpace(DescreteStateSpace): """ Class for continuous-discrete Kalman filter. State equation is continuous while measurement equation is discrete. - + d x(t)/ dt = F x(t) + L q; where q~ N(0, Qc) y_{t_k} = H_{k} x_{t_k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - + """ - + class AQcompute_once(Q_handling_Class): """ Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state equation. dt - time steps. - + It has the same interface as AQcompute_batch. - + It computes matrices for only one time step. This object is used when there are many different time steps and storing matrices for each of them would take too much memory. """ - + def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): """ - Constructor. All necessary parameters are passed here and stored - in the opject. - + Constructor. All necessary parameters are passed here and stored + in the opject. + Input: ------------------- F, L, Qc, P_inf : matrices @@ -2226,10 +2268,10 @@ class ContDescrStateSpace(DescreteStateSpace): All time steps compute_derivatives: bool Whether to calculate derivatives - + dP_inf, dF, dQc: 3D array Derivatives if they are required - + Output: ------------------- Nothing @@ -2237,48 +2279,48 @@ class ContDescrStateSpace(DescreteStateSpace): # Copies are done because this object is used later in smoother # and these parameters must not change. self.F = F.copy() - self.L = L.copy() + self.L = L.copy() self.Qc = Qc.copy() - + self.dt = dt # copy is not taken because dt is internal parameter - - # Parameters are used to calculate derivatives but derivatives + + # Parameters are used to calculate derivatives but derivatives # are not used in the smoother. Therefore copies are not taken. - self.P_inf = P_inf + self.P_inf = P_inf self.dP_inf = dP_inf self.dF = dF - self.dQc = dQc - - self.compute_derivatives = compute_derivatives - self.grad_params_no = grad_params_no - - + self.dQc = dQc + + self.compute_derivatives = compute_derivatives + self.grad_params_no = grad_params_no + + self.last_k = 0 self.last_k_computed = False self.v_Ak = None self.v_Qk = None self.v_dAk = None self.v_dQk = None - + self.square_root_computed = False # !!!Print statistics! Which object is created - + def f_a(self, k,m,A): """ Dynamic model """ - + return np.dot(A, m) # default dynamic model - + def _recompute_for_new_k(self,k): """ Computes the necessary matrices for an index k and store the results. - + Input: ---------------------- k: int - Index in the time differences array dt where to compute matrices - + Index in the time differences array dt where to compute matrices + Output: ---------------------- Ak,Qk, dAk, dQk: matrices and/or 3D arrays @@ -2286,9 +2328,9 @@ class ContDescrStateSpace(DescreteStateSpace): """ if (self.last_k != k) or (self.last_k_computed == False): v_Ak,v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F, - self.L,self.Qc,self.dt[k],self.compute_derivatives, + self.L,self.Qc,self.dt[k],self.compute_derivatives, grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc) - + self.last_k = k self.last_k_computed = True self.v_Ak = v_Ak @@ -2301,47 +2343,47 @@ class ContDescrStateSpace(DescreteStateSpace): v_Qk = self.v_Qk v_dAk = self.v_dAk v_dQk = self.v_dQk - + # !!!Print statistics! Print sizes of matrices - - return v_Ak,v_Qk, v_dAk, v_dQk - + + return v_Ak,v_Qk, v_dAk, v_dQk + def reset(self, compute_derivatives): """ For reusing this object e.g. in smoother computation. Actually, this object can not be reused because it computes the matrices on - every iteration. But this method is written for keeping the same + every iteration. But this method is written for keeping the same interface with the class AQcompute_batch. """ - + self.last_k = 0 self.last_k_computed = False self.compute_derivatives = compute_derivatives self.Q_square_root_computed = False - + return self - + def Ak(self,k,m,P): v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_Ak - + def Qk(self,k): v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_Qk - + def dAk(self, k): v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_dAk - + def dQk(self, k): v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_dQk - + def Q_srk(self,k): """ Square root of the noise matrix Q """ - + if ((self.last_k == k) and (self.last_k_computed == True)): if not self.Q_square_root_computed: (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) @@ -2352,7 +2394,7 @@ class ContDescrStateSpace(DescreteStateSpace): square_root = self.Q_square_root else: raise ValueError("Square root of Q can not be computed") - + return square_root def return_last(self): @@ -2367,29 +2409,29 @@ class ContDescrStateSpace(DescreteStateSpace): Q = self.v_Qk dA = self.v_dAk dQ = self.v_dQk - - return k, A, Q, dA, dQ - + + return k, A, Q, dA, dQ + class AQcompute_batch_Python(Q_handling_Class): """ Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state - equation. dt - time steps. - + equation. dt - time steps. + It has the same interface as AQcompute_once. - + It computes matrices for all time steps. This object is used when - there are not so many (controlled by internal variable) + there are not so many (controlled by internal variable) different time steps and storing all the matrices do not take too much memory. - + Since all the matrices are computed all together, this object can be used - in smoother without repeating the computations. + in smoother without repeating the computations. """ def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): """ - Constructor. All necessary parameters are passed here and stored - in the opject. - + Constructor. All necessary parameters are passed here and stored + in the opject. + Input: ------------------- F, L, Qc, P_inf : matrices @@ -2398,18 +2440,18 @@ class ContDescrStateSpace(DescreteStateSpace): All time steps compute_derivatives: bool Whether to calculate derivatives - + dP_inf, dF, dQc: 3D array Derivatives if they are required - + Output: ------------------- Nothing """ As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, - L,Qc,dt,compute_derivatives, + L,Qc,dt,compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) - + self.As = As self.Qs = Qs self.dAs = dAs @@ -2419,18 +2461,18 @@ class ContDescrStateSpace(DescreteStateSpace): (self.dAs.nbytes if (self.dAs is not None) else 0) +\ (self.dQs.nbytes if (self.dQs is not None) else 0) +\ (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) - + self.Q_svd_dict = {} self.last_k = None # !!!Print statistics! Which object is created # !!!Print statistics! Print sizes of matrices - + def f_a(self, k,m,A): """ Dynamic model - """ + """ return np.dot(A, m) # default dynamic model - + def reset(self, compute_derivatives=False): """ For reusing this object e.g. in smoother computation. It makes sence @@ -2438,24 +2480,24 @@ class ContDescrStateSpace(DescreteStateSpace): time steps. """ return self - + def Ak(self,k,m,P): self.last_k = k return self.As[:,:, self.reconstruct_indices[k]] - + def Qk(self,k): self.last_k = k return self.Qs[:,:, self.reconstruct_indices[k]] - + def dAk(self,k): self.last_k = k return self.dAs[:,:, :, self.reconstruct_indices[k]] - + def dQk(self,k): self.last_k = k return self.dQs[:,:, :, self.reconstruct_indices[k]] - - + + def Q_srk(self,k): """ Square root of the noise matrix Q @@ -2464,19 +2506,19 @@ class ContDescrStateSpace(DescreteStateSpace): if matrix_index in self.Q_svd_dict: square_root = self.Q_svd_dict[matrix_index] else: - (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], - full_matrices=False, compute_uv=True, + (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], + full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) square_root = U * np.sqrt(S) self.Q_svd_dict[matrix_index] = square_root - + return square_root def return_last(self): """ Function returns last available matrices. """ - + if (self.last_k is None): raise ValueError("Matrices are not computed.") else: @@ -2485,173 +2527,173 @@ class ContDescrStateSpace(DescreteStateSpace): Q = self.Qs[:,:, ind] dA = self.dAs[:,:, :, ind] dQ = self.dQs[:,:, :, ind] - + return self.last_k, A, Q, dA, dQ @classmethod - def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None, - m_init=None, P_init=None, + def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None, + m_init=None, P_init=None, p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, + calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_params_no=0, grad_calc_params=None): """ This function implements the continuous-discrete Kalman Filter algorithm These notations for the State-Space model are assumed: d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + Current Features: ---------------------------------------- - 1) The function generaly do not modify the passed parameters. If + 1) The function generaly do not modify the passed parameters. If it happens then it is an error. There are several exeprions: scalars - can be modified into a matrix, in some rare cases shapes of + can be modified into a matrix, in some rare cases shapes of the derivatives matrices may be changed, it is ignored for now. - + 2) Copies of F,L,Qc are created in memory because they may be used later in smoother. References to copies are kept in "AQcomp" object return parameter. - + 3) Function support "multiple time series mode" which means that exactly the same State-Space model is used to filter several sets of measurements. In this case third dimension of Y should include these state-space measurements Log_likelihood and Grad_log_likelihood have the corresponding dimensions then. - 4) Calculation of Grad_log_likelihood is not supported if matrices + 4) Calculation of Grad_log_likelihood is not supported if matrices H, or R changes overf time (with index k). (later may be changed) 5) Measurement may include missing values. In this case update step is not done for this measurement. (later may be changed) - - Input: + + Input: ----------------- - + F: (state_dim, state_dim) matrix F in the model. - + L: (state_dim, noise_dim) matrix L in the model. - + Qc: (noise_dim, noise_dim) matrix Q_c in the model. - + p_H: scalar, matrix (measurement_dim, state_dim) , 3D array H_{k} in the model. If matrix then H_{k} = H - constant. If it is 3D array then H_{k} = p_Q[:,:, index[2,k]] - + p_R: scalar, square symmetric matrix, 3D array R_{k} in the model. If matrix then R_{k} = R - constant. If it is 3D array then R_{k} = p_R[:,:, index[3,k]] - + P_inf: (state_dim, state_dim) matrix State varince matrix on infinity. - + X: 1D array Time points of measurements. Needed for converting continuos problem to the discrete one. - + Y: matrix or vector or 3D array Data. If Y is matrix then samples are along 0-th dimension and - features along the 1-st. If 3D array then third dimension + features along the 1-st. If 3D array then third dimension correspond to "multiple time series mode". - + index: vector Which indices (on 3-rd dimension) from arrays p_H, p_R to use - on every time step. If this parameter is None then it is assumed + on every time step. If this parameter is None then it is assumed that p_H, p_R do not change over time and indices are not needed. index[0,:] - correspond to H, index[1,:] - correspond to R If index.shape[0] == 1, it is assumed that indides for all matrices are the same. - + m_init: vector or matrix Initial distribution mean. If None it is assumed to be zero. For "multiple time series mode" it is matrix, second dimension of which correspond to different time series. In regular case ("one time series mode") it is a vector. - + P_init: square symmetric matrix or scalar Initial covariance of the states. If the parameter is scalar - then it is assumed that initial covariance matrix is unit matrix + then it is assumed that initial covariance matrix is unit matrix multiplied by this scalar. If None the unit matrix is used instead. "multiple time series mode" does not affect it, since it does not affect anything related to state variaces. - + p_kalman_filter_type: string, one of ('regular', 'svd') Which Kalman Filter is used. Regular or SVD. SVD is more numerically stable, in particular, Covariace matrices are guarantied to be positive semi-definite. However, 'svd' works slower, especially for small data due to SVD call overhead. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then "grad_calc_params" parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then "grad_calc_params" parameter must provide the extra parameters for gradient calculation. - + grad_params_no: int - If previous parameter is true, then this parameters gives the - total number of parameters in the gradient. - + If previous parameter is true, then this parameters gives the + total number of parameters in the gradient. + grad_calc_params: dictionary - Dictionary with derivatives of model matrices with respect + Dictionary with derivatives of model matrices with respect to parameters "dF", "dL", "dQc", "dH", "dR", "dm_init", "dP_init". They can be None, in this case zero matrices (no dependence on parameters) is assumed. If there is only one parameter then third dimension is automatically added. - + Output: -------------- - + M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array Filter estimates of the state means. In the extra step the initial value is included. In the "multiple time series mode" third dimension correspond to different timeseries. - + P: (no_steps+1, state_dim, state_dim) 3D array Filter estimates of the state covariances. In the extra step the initial value is included. - + log_likelihood: double or (1, time_series_no) 3D array. - + If the parameter calc_log_likelihood was set to true, return logarithm of marginal likelihood of the state-space model. If the parameter was false, return None. In the "multiple time series mode" it is a vector providing log_likelihood for each time series. - + grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix If calc_grad_log_likelihood is true, return gradient of log likelihood - with respect to parameters. It returns it column wise, so in - "multiple time series mode" gradients for each time series is in the + with respect to parameters. It returns it column wise, so in + "multiple time series mode" gradients for each time series is in the corresponding column. - - AQcomp: object + + AQcomp: object Contains some pre-computed values for converting continuos model into discrete one. It can be used later in the smoothing pahse. """ - + p_H = np.atleast_1d(p_H) p_R = np.atleast_1d(p_R) - + X.shape, old_X_shape = cls._reshape_input_data(X.shape, 2) # represent as column if (X.shape[1] != 1): raise ValueError("Only one dimensional X data is supported.") - + Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) # represent as column - + state_dim = F.shape[0] measurement_dim = Y.shape[1] time_series_no = Y.shape[2] # multiple time series mode - + if ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)): model_matrices_chage_with_time = True else: model_matrices_chage_with_time = False - + # Check index old_index_shape = None if index is None: @@ -2663,31 +2705,31 @@ class ContDescrStateSpace(DescreteStateSpace): if len(index.shape) == 1: index.shape = (1,index.shape[0]) old_index_shape = (index.shape[0],) - - if (index.shape[1] != Y.shape[0]): + + if (index.shape[1] != Y.shape[0]): raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}") - - if (index.shape[0] == 1): - H_time_var_index = 0; R_time_var_index = 0 + + if (index.shape[0] == 1): + H_time_var_index = 0; R_time_var_index = 0 elif (index.shape[0] == 4): H_time_var_index = 0; R_time_var_index = 1 else: raise ValueError("First Dimension of index must be either 1 or 2.") - + (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') - + if m_init is None: m_init = np.zeros((state_dim, time_series_no)) else: m_init = np.atleast_2d(m_init).T - + if P_init is None: P_init = P_inf.copy() - + if p_kalman_filter_type not in ('regular', 'svd'): raise ValueError("Kalman filer type neither 'regular nor 'svd'.") - + # Functions to pass to the kalman_filter algorithm: # Parameters: # k - number of Kalman filter iteration @@ -2695,26 +2737,26 @@ class ContDescrStateSpace(DescreteStateSpace): # f_hl = lambda k,m,H: np.dot(H, m) # f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]] #f_R = lambda k: p_R[:,:, index[R_time_var_index, k]] - #o_R = R_handling( p_R, index, R_time_var_index, 20) - + #o_R = R_handling( p_R, index, R_time_var_index, 20) + if calc_grad_log_likelihood: - - dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') + + dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') dQc = cls._check_grad_state_matrices(grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ') dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA') - + dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') - + dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter if dm_init is None: # multiple time series mode. Keep grad_params always as a last dimension dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) ) - + dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter if dP_init is None: dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function - + else: dP_inf = None dF = None @@ -2723,61 +2765,61 @@ class ContDescrStateSpace(DescreteStateSpace): dR = None dm_init = None dP_init = None - + measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) #import pdb; pdb.set_trace() - - dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood, - grad_params_no=grad_params_no, + + dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood, + grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) - + if print_verbose: print("General: run Continuos-Discrete Kalman Filter") # Also for dH, dR and probably for all derivatives (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim, - dynamic_callables, measurement_callables, - X, Y, m_init=m_init, P_init=P_init, - p_kalman_filter_type=p_kalman_filter_type, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, + dynamic_callables, measurement_callables, + X, Y, m_init=m_init, P_init=P_init, + p_kalman_filter_type=p_kalman_filter_type, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, dm_init=dm_init, dP_init=dP_init) - + if old_index_shape is not None: index.shape = old_index_shape - + if old_X_shape is not None: X.shape = old_X_shape - + if old_Y_shape is not None: Y.shape = old_Y_shape - + if old_H_shape is not None: p_H.shape = old_H_shape - + if old_R_shape is not None: p_R.shape = old_R_shape - + return (M, P, log_likelihood, grad_log_likelihood, AQcomp) - + @classmethod - def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y, - m_init, P_init, + def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y, + m_init, P_init, p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, grad_params_no=None, + calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_params_no=None, dm_init=None, dP_init=None): """ - General filtering algorithm for inference in the continuos-discrete + General filtering algorithm for inference in the continuos-discrete state-space model: d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + Current Features: ---------------------------------------- - + 1) Function support "multiple time series mode" which means that exactly the same State-Space model is used to filter several sets of measurements. In this case third dimension of Y should include these state-space measurements @@ -2785,76 +2827,76 @@ class ContDescrStateSpace(DescreteStateSpace): 2) Measurement may include missing values. In this case update step is not done for this measurement. (later may be changed) - - Input: + + Input: ----------------- state_dim: int Demensionality of the states - + F: (state_dim, state_dim) matrix F in the model. - + L: (state_dim, noise_dim) matrix L in the model. - + Qc: (noise_dim, noise_dim) matrix Q_c in the model. - + P_inf: (state_dim, state_dim) matrix State varince matrix on infinity. - + p_h: function (k, x_{k}, H_{k}). Measurement function. k (iteration number), x_{k} H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. - + f_H: function (k, m, P) return Jacobian of dynamic function, it is passed into p_h. k (iteration number), m: point where Jacobian is evaluated, P: parameter for Jacobian, usually covariance matrix. - - p_f_R: function (k). Returns noise matrix of measurement equation + + p_f_R: function (k). Returns noise matrix of measurement equation on iteration k. k (iteration number). - + m_init: vector or matrix - Initial distribution mean. For "multiple time series mode" + Initial distribution mean. For "multiple time series mode" it is matrix, second dimension of which correspond to different - time series. In regular case ("one time series mode") it is a + time series. In regular case ("one time series mode") it is a vector. - + P_init: matrix or scalar Initial covariance of the states. Must be not None "multiple time series mode" does not affect it, since it does not affect anything related to state variaces. - + p_kalman_filter_type: string, one of ('regular', 'svd') Which Kalman Filter is used. Regular or SVD. SVD is more numerically stable, in particular, Covariace matrices are guarantied to be positive semi-definite. However, 'svd' works slower, especially for small data due to SVD call overhead. - + calc_log_likelihood: boolean Whether to calculate marginal likelihood of the state-space model. - + calc_grad_log_likelihood: boolean - Whether to calculate gradient of the marginal likelihood - of the state-space model. If true then the next parameter must + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must provide the extra parameters for gradient calculation. - + grad_params_no: int Number of gradient parameters - + dP_inf, dF, dQc, dH, dR, dm_init, dP_init: matrices or 3D arrays. Necessary parameters for derivatives calculation. - + """ - + #import pdb; pdb.set_trace() steps_no = Y.shape[0] # number of steps in the Kalman Filter time_series_no = Y.shape[2] # multiple time series mode - + # Allocate space for results # Mean estimations. Initial values will be included M = np.empty(((steps_no+1),state_dim,time_series_no)) @@ -2863,10 +2905,10 @@ class ContDescrStateSpace(DescreteStateSpace): P = np.empty(((steps_no+1),state_dim,state_dim)) P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful P[0,:,:] = P_init # Initialize initial covariance matrix - + #import pdb;pdb.set_trace() if p_kalman_filter_type == 'svd': - (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, + (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, overwrite_a=False,check_finite=True) S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance P_upd = (P_init, S,U) @@ -2874,57 +2916,57 @@ class ContDescrStateSpace(DescreteStateSpace): #grad_log_likelihood = np.zeros((grad_params_no,1)) log_likelihood = 0 if calc_log_likelihood else None grad_log_likelihood = 0 if calc_grad_log_likelihood else None - + #setting initial values for derivatives update dm_upd = dm_init dP_upd = dP_init # Main loop of the Kalman filter for k in range(0,steps_no): # In this loop index for new estimations is (k+1), old - (k) - # This happened because initial values are stored at 0-th index. + # This happened because initial values are stored at 0-th index. #import pdb; pdb.set_trace() - + prev_mean = M[k,:,:] # mean from the previous step - - if p_kalman_filter_type == 'svd': + + if p_kalman_filter_type == 'svd': m_pred, P_pred, dm_pred, dP_pred = \ cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, - calc_grad_log_likelihood=calc_grad_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_upd, p_dP = dP_upd) else: m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=calc_grad_log_likelihood, + cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_upd, p_dP = dP_upd ) - + #import pdb; pdb.set_trace() k_measurment = Y[k,:,:] - + if (np.any(np.isnan(k_measurment)) == False): - - if p_kalman_filter_type == 'svd': + + if p_kalman_filter_type == 'svd': m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, - k_measurment, calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, + cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, + k_measurment, calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, p_dm = dm_pred, p_dP = dP_pred ) - - + + # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, - # calc_log_likelihood=calc_log_likelihood, - # calc_grad_log_likelihood=calc_grad_log_likelihood, + # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, + # calc_log_likelihood=calc_log_likelihood, + # calc_grad_log_likelihood=calc_grad_log_likelihood, # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) - # - # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, + # + # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, # overwrite_a=False,check_finite=True) # P_upd = (P_upd, S,U) else: m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_pred, p_dP = dP_pred ) + cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_pred, p_dP = dP_pred ) else: if k_measurment.shape != (1,1): raise ValueError("Nan measurements are currently not supported for \ @@ -2933,17 +2975,17 @@ class ContDescrStateSpace(DescreteStateSpace): m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred log_likelihood_update = 0.0; d_log_likelihood_update = 0.0; - - + + if calc_log_likelihood: log_likelihood += log_likelihood_update - + if calc_grad_log_likelihood: grad_log_likelihood += d_log_likelihood_update - + M[k+1,:,:] = m_upd # separate mean value for each time series - - if p_kalman_filter_type == 'svd': + + if p_kalman_filter_type == 'svd': P[k+1,:,:] = P_upd[0] else: P[k+1,:,:] = P_upd @@ -2951,119 +2993,119 @@ class ContDescrStateSpace(DescreteStateSpace): # !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print iteration time base on another boolean variable return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) - - @classmethod - def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars, + + @classmethod + def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars, p_dynamic_callables=None, X=None, F=None,L=None,Qc=None): """ - + Continuos-discrete Rauch–Tung–Striebel(RTS) smoother. - + This function implements Rauch–Tung–Striebel(RTS) smoother algorithm based on the results of _cont_discr_kalman_filter_raw. - + Model: d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) - - Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) - + + Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) + Input: -------------- - - filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array + + filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array Results of the Kalman Filter means estimation. - + filter_covars: (no_steps+1, state_dim, state_dim) 3D array Results of the Kalman Filter covariance estimation. - + Dynamic_callables: object or None Object form the filter phase which provides functions for computing A, Q, dA, dQ fro discrete model from the continuos model. - + X, F, L, Qc: matrices If AQcomp is None, these matrices are used to create this object from scratch. - + Output: ------------- - - M: (no_steps+1,state_dim) matrix + + M: (no_steps+1,state_dim) matrix Smoothed estimates of the state means - + P: (no_steps+1,state_dim, state_dim) 3D array Smoothed estimates of the state covariances """ - + f_a = lambda k,m,A: np.dot(A, m) # state dynamic model if p_dynamic_callables is None: # make this object from scratch - p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False, + p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None) no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) - + M = np.empty(filter_means.shape) # smoothed means P = np.empty(filter_covars.shape) # smoothed covars - + if print_verbose: print("General: run Continuos-Discrete Kalman Smoother") - + M[-1,:,:] = filter_means[-1,:,:] P[-1,:,:] = filter_covars[-1,:,:] - for k in range(no_steps-1,-1,-1): - + for k in range(no_steps-1,-1,-1): + prev_mean = filter_means[k,:] # mean from the previous step m_pred, P_pred, tmp1, tmp2 = \ - cls._kalman_prediction_step(k, prev_mean, - filter_covars[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=False) + cls._kalman_prediction_step(k, prev_mean, + filter_covars[k,:,:], p_dynamic_callables, + calc_grad_log_likelihood=False) p_m = filter_means[k,:] p_m_prev_step = M[(k+1),:] - m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k, - p_m ,filter_covars[k,:,:], + m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k, + p_m ,filter_covars[k,:,:], m_pred, P_pred, p_m_prev_step ,P[(k+1),:,:], p_dynamic_callables) - + M[k,:,:] = m_upd P[k,:,:] = P_upd # Return values return (M, P) - + @classmethod def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False, - grad_params_no=None, + grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): """ Function return the object which is used in Kalman filter and/or smoother to obtain matrices A, Q and their derivatives for discrete model from the continuous model. - - There are 2 objects AQcompute_once and AQcompute_batch and the function + + There are 2 objects AQcompute_once and AQcompute_batch and the function returs the appropriate one based on the number of different time steps. - + Input: ---------------------- X, F, L, Qc: matrices Continuous model matrices - + f_a: function - Dynamic Function is attached to the Dynamic_Model_Callables class + Dynamic Function is attached to the Dynamic_Model_Callables class compute_derivatives: boolean Whether to compute derivatives - + grad_params_no: int Number of parameters in the gradient - + P_inf, dP_inf, dF, dQ: matrices and 3D objects Data necessary to compute derivatives. - + Output: -------------------------- AQcomp: object Its methods return matrices (and optionally derivatives) for the discrete state-space model. - - """ - + + """ + unique_round_decimals = 10 threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time dt = np.empty((X.shape[0],)) @@ -3071,121 +3113,121 @@ class ContDescrStateSpace(DescreteStateSpace): dt[0] = 0#dt[1] unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) number_unique_indices = len(unique_indices) - + #import pdb; pdb.set_trace() if use_cython: class AQcompute_batch(state_space_cython.AQcompute_batch_Cython): def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, - L,Qc,dt,compute_derivatives, + L,Qc,dt,compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) - + super(AQcompute_batch,self).__init__(As, Qs, reconstruct_indices, dAs,dQs) else: AQcompute_batch = cls.AQcompute_batch_Python - + if number_unique_indices > threshold_number_of_unique_time_steps: AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) if print_verbose: print("CDO: Continue-to-discrete INSTANTANEOUS object is created.") - print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) - + print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) + else: AQcomp = AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) - if print_verbose: + if print_verbose: print("CDO: Continue-to-discrete BATCH object is created.") print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) - print("CDO: Total size if its data: %i" % (AQcomp.total_size_of_data,) ) - + print("CDO: Total size if its data: %i" % (AQcomp.total_size_of_data,) ) + return AQcomp - + @staticmethod - def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False, - grad_params_no=None, P_inf=None, + def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False, + grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): """ Linear Time-Invariant Stochastic Differential Equation (LTI SDE): - + dx(t) = F x(t) dt + L d \beta ,where - + x(t): (vector) stochastic process \beta: (vector) Brownian motion process - F, L: (time invariant) matrices of corresponding dimensions + F, L: (time invariant) matrices of corresponding dimensions Qc: covariance of noise. - + This function rewrites it into the corresponding state-space form: - + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) - - + + Input: -------------- F,L: LTI SDE matrices of corresponding dimensions - - Qc: matrix (n,n) - Covarince between different dimensions of noise \beta. + + Qc: matrix (n,n) + Covarince between different dimensions of noise \beta. n is the dimensionality of the noise. - + dt: double or iterable Time difference used on this iteration. If dt is iterable, then A and Q_noise are computed for every - unique dt - + unique dt + compute_derivatives: boolean Whether derivatives of A and Q are required. - + grad_params_no: int Number of gradient parameters - + P_inf: (state_dim. state_dim) matrix - + dP_inf - + dF: 3D array Derivatives of F - + dQc: 3D array Derivatives of Qc - + dR: 3D array Derivatives of R - + Output: -------------- - + A: matrix - A_{k}. Because we have LTI SDE only dt can affect on matrix + A_{k}. Because we have LTI SDE only dt can affect on matrix difference for different k. - + Q_noise: matrix - Covariance matrix of (vector) q_{k-1}. Only dt can affect the + Covariance matrix of (vector) q_{k-1}. Only dt can affect the matrix difference for different k. - + reconstruct_index: array If dt was iterable return three dimensinal arrays A and Q_noise. Third dimension of these arrays correspond to unique dt's. - This reconstruct_index contain indices of the original dt's + This reconstruct_index contain indices of the original dt's in the uninue dt sequence. A[:,:, reconstruct_index[5]] - is matrix A of 6-th(indices start from zero) dt in the original + is matrix A of 6-th(indices start from zero) dt in the original sequence. dA: 3D array Derivatives of A - + dQ: 3D array Derivatives of Q - """ + """ # Dimensionality n = F.shape[0] - + if not isinstance(dt, collections.Iterable): # not iterable, scalar - + # The dynamical model - A = matrix_exponent(F*dt) + A = matrix_exponent(F*dt) if np.any( np.isnan(A)): - A = linalg.expm3(F*dt) - + A = linalg.expm3(F*dt) + # The covariance matrix Q by matrix fraction decomposition -> Phi = np.zeros((2*n,2*n)) Phi[:n,:n] = F @@ -3193,20 +3235,20 @@ class ContDescrStateSpace(DescreteStateSpace): Phi[n:,n:] = -F.T AB = matrix_exponent(Phi*dt) AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n)))) - + Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T) # The covariance matrix Q by matrix fraction decomposition <- - + if compute_derivatives: dA = np.zeros([n, n, grad_params_no]) dQ = np.zeros([n, n, grad_params_no]) - + #AA = np.zeros([2*n, 2*n, nparam]) FF = np.zeros([2*n, 2*n]) AA = np.zeros([2*n, 2*n, grad_params_no]) - + for p in range(0, grad_params_no): - + FF[:n,:n] = F FF[n:,:n] = dF[:,:,p] FF[n:,n:] = F @@ -3225,17 +3267,17 @@ class ContDescrStateSpace(DescreteStateSpace): Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) Q_noise = Q_noise_2 #PP = A.dot(P).dot(A.T) + Q_noise_2 - + # The derivatives of A and Q dA[:,:,p] = AA[n:,:n,p] dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \ - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times - - else: + + else: dA = None dQ = None Q_noise = Q_noise_1 - + #Q_noise = Q_noise_1 # Return @@ -3252,8 +3294,8 @@ class ContDescrStateSpace(DescreteStateSpace): Q_noise = np.empty((n,n,dt_unique.shape[0])) if compute_derivatives: - dA = np.empty((n,n,grad_params_no,dt_unique.shape[0])) - dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) + dA = np.empty((n,n,grad_params_no,dt_unique.shape[0])) + dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) else: dA = None dQ = None @@ -3262,7 +3304,7 @@ class ContDescrStateSpace(DescreteStateSpace): A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j], compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) if compute_derivatives: - dA[:,:,:,j] = dA_t + dA[:,:,:,j] = dA_t dQ[:,:,:,j] = dQ_t # Return @@ -3274,8 +3316,8 @@ def matrix_exponent(M): """ if (M.shape[0] == 1): # 1*1 matrix - Mexp = np.array( ((np.exp(M[0,0]) ,),) ) - + Mexp = np.array( ((np.exp(M[0,0]) ,),) ) + else: # matrix is larger method = None try: @@ -3293,7 +3335,7 @@ def matrix_exponent(M): method = 2 if np.any(np.isnan(Mexp)): raise ValueError("Matrix Exponent is not computed 2") - + return Mexp def balance_matrix(A): @@ -3302,43 +3344,43 @@ def balance_matrix(A): matrix A: A = T * bA * T^{-1}, where norms of columns of bA and of rows of bA are as close as possible. It is usually used as a preprocessing step in eigenvalue calculation routine. It is useful also for State-Space models. - + See also: - [1] Beresford N. Parlett and Christian Reinsch (1969). Balancing - a matrix for calculation of eigenvalues and eigenvectors. + [1] Beresford N. Parlett and Christian Reinsch (1969). Balancing + a matrix for calculation of eigenvalues and eigenvectors. Numerische Mathematik, 13(4): 293-304. - + Input: ---------------------- A: square matrix Matrix to be balanced - + Output: ---------------- bA: matrix Balanced matrix - + T: matrix Left part of the similarity transformation - - T_inv: matrix + + T_inv: matrix Right part of the similarity transformation. """ - + if len(A.shape) != 2 or (A.shape[0] != A.shape[1]): - raise ValueError('balance_matrix: Expecting square matrix') - - N = A.shape[0] # matrix size - + raise ValueError('balance_matrix: Expecting square matrix') + + N = A.shape[0] # matrix size + gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,)) bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False) if info < 0: - raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info) + raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info) # calculating the similarity transforamtion: def perm_matr(D, c1,c2): """ Function creates the permutation matrix which swaps columns c1 and c2. - + Input: -------------- D: int @@ -3349,16 +3391,16 @@ def balance_matrix(A): Column 2. Numeration starts from 1...D """ i1 = c1-1; i2 = c2-1 # indices - P = np.eye(D); - P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements + P = np.eye(D); + P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements P[i1,i2] = 1.0; P[i2,i1] = 1.0 return P - - P = np.eye(N) # permutation matrix + + P = np.eye(N) # permutation matrix if (hi != N-1): # there are row permutations - for k in range(N-1,hi,-1): - new_perm = perm_matr(N, k+1, pivscale[k]) + for k in range(N-1,hi,-1): + new_perm = perm_matr(N, k+1, pivscale[k]) P = np.dot(P,new_perm) if (lo != 0): for k in range(0,lo,1): @@ -3367,38 +3409,38 @@ def balance_matrix(A): D = pivscale.copy() D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one. #D = np.diag(D) # make a diagonal matrix - + T = np.dot(P,np.diag(D)) # similarity transformation in question T_inv = np.dot(np.diag(D**(-1)),P.T) - + #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) ) return bA.copy(), T, T_inv - + def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): """ Balances State-Space model for more numerical stability This is based on the following: - + dx/dt = F x + L w y = H x - + Let T z = x, which gives - + dz/dt = inv(T) F T z + inv(T) L w - y = H T z + y = H T z """ - + bF,T,T_inv = balance_matrix(F) - + bL = np.dot( T_inv, L) bQc = Qc # not affected - + bH = np.dot(H, T) - + bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T)) - - #import pdb; pdb.set_trace() + + #import pdb; pdb.set_trace() # LL,islower = linalg.cho_factor(Pinf) # inds = np.triu_indices(Pinf.shape[0],k=1) # LL[inds] = 0.0 @@ -3406,40 +3448,40 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): # bPinf = np.dot( bLL, bLL.T) bP0 = np.dot(T_inv, np.dot(P0, T_inv.T)) - + if dF is not None: bdF = np.zeros(dF.shape) for i in range(dF.shape[2]): bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T)) - + else: bdF = None - + if dPinf is not None: bdPinf = np.zeros(dPinf.shape) for i in range(dPinf.shape[2]): bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T)) - + # LL,islower = linalg.cho_factor(dPinf[:,:,i]) # inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1) # LL[inds] = 0.0 # bLL = np.dot(T_inv, LL) # bdPinf[:,:,i] = np.dot( bLL, bLL.T) - - + + else: bdPinf = None - + if dP0 is not None: bdP0 = np.zeros(dP0.shape) for i in range(dP0.shape[2]): bdP0[:,:,i] = np.dot( T_inv, np.dot( dP0[:,:,i], T_inv.T)) else: - bdP0 = None - - + bdP0 = None + + bdQc = dQc # not affected - + # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) - + return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T \ No newline at end of file diff --git a/GPy/testing/gpy_kernels_state_space_tests.py b/GPy/testing/gpy_kernels_state_space_tests.py index 0d27be86..4e176a81 100644 --- a/GPy/testing/gpy_kernels_state_space_tests.py +++ b/GPy/testing/gpy_kernels_state_space_tests.py @@ -31,10 +31,8 @@ class StateSpaceKernelsTests(np.testing.TestCase): if check_gradients: self.assertTrue(m1.checkgrad()) - #import pdb; pdb.set_trace() - if optimize: - m1.optimize(optimizer='bfgs',max_iters=optimize_max_iters) + m1.optimize(optimizer='lbfgsb',max_iters=optimize_max_iters) if compare_with_GP and (predict_X is None): predict_X = X @@ -45,7 +43,7 @@ class StateSpaceKernelsTests(np.testing.TestCase): if compare_with_GP: m2 = GPy.models.GPRegression(X,Y, gp_kernel) - m2.optimize(optimizer='bfgs', max_iters=optimize_max_iters) + m2.optimize(optimizer='lbfgsb', max_iters=optimize_max_iters) #print(m2) x_pred_reg_2 = m2.predict(predict_X)