From 5ff256079babf6ad131a1e5d731bd2f681c6c1c9 Mon Sep 17 00:00:00 2001 From: Alexander Grigorievskiy Date: Tue, 7 Apr 2015 18:20:11 +0300 Subject: [PATCH] EXT: State-Space modelling functionality is untied with the GPy models. Currently, these new functionality is added on the side, not intervening the old state-space functionality. Example file has been changed and minimal example where descripancies appear is cunstructed. --- GPy/examples/state_space.py | 39 +- GPy/models/state_space_main.py | 1611 ++++++++++++++++++++++++++++++++ GPy/models/state_space_new.py | 322 +++++++ 3 files changed, 1968 insertions(+), 4 deletions(-) create mode 100644 GPy/models/state_space_main.py create mode 100644 GPy/models/state_space_new.py diff --git a/GPy/examples/state_space.py b/GPy/examples/state_space.py index bb5973e1..ce506b9c 100644 --- a/GPy/examples/state_space.py +++ b/GPy/examples/state_space.py @@ -2,19 +2,50 @@ import GPy import numpy as np import matplotlib.pyplot as plt -X = np.linspace(0, 10, 2000)[:, None] -Y = np.sin(X) + np.random.randn(*X.shape)*0.1 +import GPy.models.state_space_new as SS_new + +#X = np.linspace(0, 10, 2000)[:, None] +#Y = np.sin(X) + np.random.randn(*X.shape)*0.1 + +## Need to run these lines when X and Y are imported -> +#X.shape = (X.shape[0],1) +#Y.shape = (Y.shape[0],1) +## Need to run these lines when X and Y are imported <- + +# Generation of minimal example data -> +X = np.random.rand(3) +sort_index = np.argsort(X) +X = X[sort_index]; X.shape = (X.shape[0],1) +Y = np.sin(10*X) + np.random.randn(*X.shape)*0.1 +# Generation of minimal example data <- + +#plt.figure() +#plt.plot( X, Y) +#plt.show() kernel = GPy.kern.Matern32(X.shape[1]) m = GPy.models.StateSpace(X,Y, kernel) +print m +# m.optimize() - +# print m kernel1 = GPy.kern.Matern32(X.shape[1]) m1 = GPy.models.GPRegression(X,Y, kernel1) +print m1 m1.optimize() -print m1 \ No newline at end of file +print m1 + +kernel2 = GPy.kern.Matern32(X.shape[1]) +m2 = SS_new.StateSpace(X,Y, kernel2) + +print m2 + +m2.optimize() + +print m2 + diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py new file mode 100644 index 00000000..d6260bc3 --- /dev/null +++ b/GPy/models/state_space_main.py @@ -0,0 +1,1611 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Feb 19 12:32:32 2015 + + +""" + +import collections # for cheking whether a variable is iterable +import types # for cheking whether a variable is a function + +import numpy as np +import scipy as sp +import scipy.linalg as linalg + +class DescreteStateSpace(object): + """ + This class implents state-space inference for linear and non-linear + state-space models. + Linear models are: + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) + + Nonlinear: + x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) + Here f_a and f_h are some functions of k (iteration number), x_{k-1} or + x_{k} (state value on certain iteration), A_{k} and H_{k} - Jacobian + matrices of f_a and f_h respectively. In the linear case they are exactly + A_{k} and H_{k}. + + + Currently two nonlinear Gaussian filter algorithms are implemented: + Extended Kalman Filter (EKF), Statistically linearized Filter (SLF), which + implementations are very similar. + + """ + + @staticmethod + def reshape_input_data(shape): + """ + Function returns the column-wise shape for for an input shape. + + Inputs: + -------------- + shape: tuple + Shape of an input array, so that it is always a column. + old_shape: tuple or None + If the shape has been modified, return old shape, otherwise None + """ + + + if (len(shape) > 2): + raise ValueError("Input array is not supposed to be 3 or more dimensional") + elif len(shape) == 1: + return ((shape[0],1), shape) + else: # len(shape) == 2 + return ((shape[1],1), shape) if (shape[0] == 1) else (shape,None) + + +# def __init__(self, X, Y, kernel): +# """ +# Inputs: +# -------------- +# +# X: double array (N_samples,1) +# Time points of the observations +# Y: double array (N_samples,N_dim) +# Observations +# """ +# +# # If X is 1D array make it column (x.shape[0],1) +# X.shape, old_Y_shape = DescreteStateSpace.reshape_input_data(X.shape) +# Y.shape, old_X_shape = DescreteStateSpace.reshape_input_data(Y.shape) +# +# self.num_data, input_dim = X.shape +# assert input_dim==1, "State space methods for time only" +# num_data_Y, self.output_dim = Y.shape +# assert num_data_Y == self.num_data, "Number of samples in X and Y don't match" +# assert self.output_dim == 1, "State space methods for single outputs only" +# +# # Make sure the observations are ordered in time +# sort_index = np.argsort(X[:,0]) +# self.X = X[sort_index] +# self.Y = Y[sort_index] +# +# # Noise variance ??? - TODO: figure out +# self.sigma2 = 1 +# +# self.kern = kernel +# +# # Make sure all parameters are positive +# #self.ensure_default_constraints() +# +# # Assert that the kernel is supported +# #assert self.kern.sde() not False, "This kernel is not supported for state space estimation" + + @classmethod + def kalman_filter(cls,p_A, p_Q, p_H, p_R, Y, index = None, m_init=None, + P_init=None, calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_params_no=None, + grad_calc_params=None): + """ + This function implements the basic (raw) Kalman Filter algorithm + These notations are assumed: + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + + Currently, H_{k} and R_{k} are not supposed to change over time. + I. e. They are assumed to be constant. Later they may be done time + dependent by analogy with A_{k} and Q_{k-1}. + + + Input: + ----------------- + + p_A: scalar, square matrix, 3D array + A_{k} in the model. If matrix then A_{k} = A - constant. + If it is 3D array then A_{k} = p_A[:,:, index[k]] + + p_Q: scalar, square symmetric matrix, 3D array + Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant. + If it is 3D array then Q_{k-1} = p_Q[:,:, index[k]] + p_H: scalar, matrix + H_{k} = p_H. Assumed to be constant. + + p_R: scalar, square symmetric matrix + H_{k} = p_H. Assumed to be constant. + + Y: matrix or vector + Data. If Y is matrix then samples are along 0-th dimension and + features along the 1-st. May have missing values. + + index: vector + Which indices (on 3-rd dimension) from arrays p_A and p_Q to use + on every time step. If this parameter is None then it is assumed + that p_A and p_Q are matrices and indices are not needed. + + p_mean: vector + Initial distribution mean. If None it is assumed to be zero + + P_init: square symmetric matrix or scalar + Initial covariance of the states. If the parameter is scalar + then it is assumed that initial covariance matrix is unit matrix + multiplied by this scalar. If None the unit matrix is used instead. + + calc_log_likelihood: boolean + Whether to calculate marginal likelihood of the state-space model. + + calc_grad_log_likelihood: boolean + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must + provide the extra parameters for gradient calculation. + + grad_params_no: int + Number of parameters + + Output: + -------------- + + M: (state_dim, no_steps) matrix + Filter estimates of the state means + + P: (state_dim, state_dim, no_steps) 3D array + Filter estimates of the state covariances + + log_likelihood: double + If the parameter calc_log_likelihood was set to true, return + logarithm of marginal likelihood of the state-space model. If + the parameter was false, return None + + """ + + # Parameters checking -> + # index + p_A = np.atleast_1d(p_A) + p_Q = np.atleast_1d(p_Q) + if ((len(p_A.shape) >= 3) and (p_A.shape[2] != 1)) or \ + ((len(p_Q.shape) >= 3) and (p_Q.shape[2] != 1)): + if index is None: + raise ValueError("A and Q can not change over time if indices are not given") + else: + A_or_Q_change_over_time = True + else: + index = np.zeros((Y.shape[0],) ) + A_or_Q_change_over_time = False + + # p_A + (p_A, old_A_shape) = cls._check_A_matrix(p_A) + + # p_Q + (p_Q,old_Q_shape) = cls._check_Q_matrix(p_Q) + + # p_H + p_H = np.atleast_1d(p_H) + (p_H,old_H_shape) = cls._check_H_matrix(p_H) + + # p_R + p_R = np.atleast_1d(p_R) + (p_R,old_R_shape) = cls._check_R_matrix(p_R) + + state_dim = p_A.shape[0] + # m_init + if m_init is None: + m_init = np.zeros((state_dim,1)) + else: + m_init = np.atleast_2d(m_init).T + + # P_init + if P_init is None: + P_init = np.eye(state_dim) + elif not isinstance(P_init, collections.Iterable): #scalar + P_init = P_init*np.eye(state_dim) + + # Y + Y.shape, old_Y_shape = cls.reshape_input_data(Y.shape) + if (Y.shape[0] != len(index)): + raise ValueError("Number of measurements must be equal the number of A_{k} and Q_{k}") + + measurement_dim = Y.shape[1] + # Functions to pass to the kalman_filter algorithm: + # PArameters: + # k - number of Kalman filter iteration + # m - vector for calculating matrices. Required for EKF. Not used here. + fa = lambda k,m,A: np.dot(A, m) + f_A = lambda k,m,P: p_A[:,:, index[k]] + f_Q = lambda k: p_Q[:,:, index[k]] + fh = lambda k,m,H: np.dot(H, m) + f_H = lambda k,m,P: p_H + f_R = lambda k: p_R + + + grad_calc_params_pass_further = None + if calc_grad_log_likelihood: + if A_or_Q_change_over_time: + raise ValueError("When computing likelihood gradient A and Q can not change over time.") + + f_dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA') + + f_dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ') + + f_dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') + + f_dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dR') + + dm_init = grad_calc_params.get('dm_init') + if dm_init is None: + dm_init = np.zeros( (state_dim,grad_params_no) ) + + dP_init = grad_calc_params.get('dP_init') + if dP_init is None: + dP_init = np.zeros( (state_dim,state_dim,grad_params_no) ) + + grad_calc_params_pass_further = {} + grad_calc_params_pass_further['f_dA'] = f_dA + grad_calc_params_pass_further['f_dQ'] = f_dQ + grad_calc_params_pass_further['f_dH'] = f_dH + grad_calc_params_pass_further['f_dR'] = f_dR + grad_calc_params_pass_further['dm_init'] = dm_init + grad_calc_params_pass_further['dP_init'] = dP_init + + (M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(state_dim, fa, f_A, f_Q, fh, f_H, f_R, Y, m_init, + P_init, calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + grad_calc_params=grad_calc_params_pass_further) + # restore shapes so that input parameters are unchenged + if old_A_shape is not None: + p_A.shape = old_A_shape + + if old_Q_shape is not None: + p_Q.shape = old_Q_shape + + if old_H_shape is not None: + p_H.shape = old_H_shape + + if old_R_shape is not None: + p_R.shape = old_R_shape + + if old_Y_shape is not None: + Y.shape = old_Y_shape + # Return values + return (M, P,log_likelihood, grad_log_likelihood) + + @classmethod + def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None, + P_init=None,calc_log_likelihood=False): + + """ + Extended Kalman Filter + + Input: + ----------------- + + p_state_dim: integer + + p_a: if None - the function from the linear model is assumed. No non- + linearity in the dynamic is assumed. + + function (k, x_{k-1}, A_{k}). Dynamic function. + k: (iteration number), + x_{k-1}: (previous state) + x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + + p_f_A: matrix - in this case function which returns this matrix is assumed. + Look at this parameter description in kalman_filter function. + + function (k, m, P) return Jacobian of dynamic function, it is + passed into p_a. + + k: (iteration number), + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_Q: matrix. In this case function which returns this matrix is asumed. + Look at this parameter description in kalman_filter function. + + function (k). Returns noise matrix of dynamic model on iteration k. + k: (iteration number). + + p_h: if None - the function from the linear measurement model is assumed. + No nonlinearity in the measurement is assumed. + + function (k, x_{k}, H_{k}). Measurement function. + k: (iteration number), + x_{k}: (current state) + H_{k}: Jacobian matrices of f_h. In the linear case it is exactly H_{k}. + + p_f_H: matrix - in this case function which returns this matrix is assumed. + function (k, m, P) return Jacobian of dynamic function, it is + passed into p_h. + k: (iteration number), + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_R: matrix. In this case function which returns this matrix is asumed. + function (k). Returns noise matrix of measurement equation + on iteration k. + k: (iteration number). + + Y: matrix or vector + Data. If Y is matrix then samples are along 0-th dimension and + features along the 1-st. May have missing values. + + p_mean: vector + Initial distribution mean. If None it is assumed to be zero + + P_init: square symmetric matrix or scalar + Initial covariance of the states. If the parameter is scalar + then it is assumed that initial covariance matrix is unit matrix + multiplied by this scalar. If None the unit matrix is used instead. + + calc_log_likelihood: boolean + Whether to calculate marginal likelihood of the state-space model. + """ + + # Y + Y.shape, old_Y_shape = cls.reshape_input_data(Y.shape) + + # m_init + if m_init is None: + m_init = np.zeros((p_state_dim,1)) + else: + m_init = np.atleast_2d(m_init).T + + # P_init + if P_init is None: + P_init = np.eye(p_state_dim) + elif not isinstance(P_init, collections.Iterable): #scalar + P_init = P_init*np.eye(p_state_dim) + + if p_a is None: + p_a = lambda k,m,A: np.dot(A, m) + + old_A_shape = None + if not isinstance(p_f_A, types.FunctionType): # not a function but array + p_f_A = np.atleast_1d(p_f_A) + (p_A, old_A_shape) = cls._check_A_matrix(p_f_A) + + p_f_A = lambda k, m, P: p_A[:,:, 0] # make function + else: + if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]: + raise ValueError("p_f_A function returns matrix of wrong size") + + old_Q_shape = None + if not isinstance(p_f_Q, types.FunctionType): # not a function but array + p_f_Q = np.atleast_1d(p_f_Q) + (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q) + + p_f_Q = lambda k: p_Q[:,:, 0] # make function + else: + if p_f_Q(1).shape[0] != m_init.shape[0]: + raise ValueError("p_f_Q function returns matrix of wrong size") + + if p_h is None: + lambda k,m,H: np.dot(H, m) + + old_H_shape = None + if not isinstance(p_f_H, types.FunctionType): # not a function but array + p_f_H = np.atleast_1d(p_f_H) + (p_H, old_H_shape) = cls._check_H_matrix(p_f_H) + + p_f_H = lambda k, m, P: p_H # make function + else: + if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]: + raise ValueError("p_f_H function returns matrix of wrong size") + + old_R_shape = None + if not isinstance(p_f_R, types.FunctionType): # not a function but array + p_f_R = np.atleast_1d(p_f_R) + (p_R, old_R_shape) = cls._check_H_matrix(p_f_R) + + p_f_R = lambda k: p_R # make function + else: + if p_f_R(1).shape[0] != m_init.shape[0]: + raise ValueError("p_f_R function returns matrix of wrong size") + + (M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init, + P_init, calc_log_likelihood, + calc_grad_log_likelihood=False, grad_calc_params=None) + + if old_Y_shape is not None: + Y.shape = old_Y_shape + + if old_A_shape is not None: + p_A.shape = old_A_shape + + if old_Q_shape is not None: + p_Q.shape = old_Q_shape + + if old_H_shape is not None: + p_H.shape = old_H_shape + + if old_R_shape is not None: + p_R.shape = old_R_shape + + return (M, P) + + @classmethod + def _kalman_algorithm_raw(cls,state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init, + P_init, calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_calc_params=None): + """ + General nonlinear filtering algorithm for inference in the state-space + model: + + x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) + + Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) + + Input: + ----------------- + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + k (iteration number), + x_{k-1} + x_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_a. + k (iteration number), + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. + k (iteration number). + + p_h: function (k, x_{k}, H_{k}). Measurement function. + k (iteration number), + x_{k} + H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. + + p_f_H: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_h. + k (iteration number), + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_R: function (k). Returns noise matrix of measurement equation + on iteration k. + k (iteration number). + + Y: matrix or vector + Data. If Y is matrix then samples are along 0-th dimension and + features along the 1-st. May have missing values. + + m_init: vector + Initial distribution mean. Must be not None + + P_init: matrix or scalar + Initial covariance of the states. Must be not None + + calc_log_likelihood: boolean + Whether to calculate marginal likelihood of the state-space model. + + calc_grad_log_likelihood: boolean + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must + provide the extra parameters for gradient calculation. + + + Output: + -------------- + + M: (state_dim, no_steps) matrix + Filter estimates of the state means + + P: (state_dim, state_dim, no_steps) 3D array + Filter estimates of the state covariances + + log_likelihood: double + If the parameter calc_log_likelihood was set to true, return + logarithm of marginal likelihood of the state-space model. If + the parameter was false, return None + + """ + + steps_no = Y.shape[0] # number of steps in the Kalman Filter + + if calc_grad_log_likelihood: + grad_calc_params_1 = (grad_calc_params['f_dA'], grad_calc_params['f_dQ']) + grad_calc_params_2 = (grad_calc_params['f_dH'],grad_calc_params['f_dR']) + + dm_init = grad_calc_params['dm_init']; dP_init = grad_calc_params['dP_init'] + else: + grad_calc_params_1 = None + grad_calc_params_2 = None + + dm_init = None; dP_init = None + + # Allocate space for results + # Mean estimations. Initial values will be included + M = np.empty((state_dim,(steps_no+1))) + # Variance estimations. Initial values will be included + P = np.empty((state_dim,state_dim,(steps_no+1))) + + # Initialize + M[:,0] = m_init + P[:,:,0] = P_init + + if calc_log_likelihood: + log_likelihood = 0 + else: + log_likelihood = None + + if calc_grad_log_likelihood: + grad_log_likelihood = 0 + else: + grad_log_likelihood = None + # Main loop of the Kalman filter + for k in range(0,steps_no): + # In this loop index for new estimations is (k+1), old - (k) + # This happened because initial values are stored at 0-th index. + + if k == 0: #settinginitial values + dm_upd = dm_init + dP_upd = dP_init + + m_pred, P_pred, dm_pred, dP_pred = \ + cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], p_a, p_f_A, p_f_Q, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = grad_calc_params_1) + + m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ + cls._kalman_update_step(k, m_pred , P_pred, p_h, p_f_H, p_f_R, Y, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = grad_calc_params_2) + + if calc_log_likelihood: + log_likelihood += log_likelihood_update + + if calc_grad_log_likelihood: + grad_log_likelihood += d_log_likelihood_update + + M[:,k+1] = m_upd + P[:,:,k+1] = P_upd + # Return values, get rid of initial values + #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood) + return (M, P, log_likelihood, grad_log_likelihood) + + @staticmethod + def _kalman_prediction_step(k, p_m , p_P, p_a, p_f_A, p_f_Q, calc_grad_log_likelihood=False, + p_dm = None, p_dP = None, grad_calc_params_1 = None): + """ + Desctrete prediction function + + Input: + k:int + Iteration No. Starts at 0. Total number of iterations equal to the + number of measurements. + + p_m: matrix of size (state_dim, time_series_no) + Mean value from the previous step + p_P: + Covariance matrix from the previous step. + + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + k (iteration number), starts at 0 + x_{k-1} State from the previous step + A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_a. + k (iteration number), starts at 0 + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. + k (iteration number). starts at 0 + + calc_grad_log_likelihood: boolean + Whether to calculate gradient of the marginal likelihood + of the state-space model. If true then the next parameter must + provide the extra parameters for gradient calculation. + p_dm: 3D array (state_dim, time_series_no, parameters_no) + Mean derivatives from the previous step + + p_dP: 3D array (state_dim, state_dim, parameters_no) + Mean derivatives from the previous step + + """ + + # index correspond to values from previous iteration. + A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian) + Q = p_f_Q(k) # state noise matrix + + + # Prediction step -> + m_pred = p_a(k, p_m, A) # predicted mean + P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance + # Prediction step <- + + + if calc_grad_log_likelihood: + p_f_dA = grad_calc_params_1[0]; dA_all_params = p_f_dA(k) # derivatives of A wrt parameters + p_f_dQ = grad_calc_params_1[1]; dQ_all_params = p_f_dQ(k) # derivatives of Q wrt parameters + dm_all_params = p_dm # derivarites from the previous step + dP_all_params = p_dP + + param_number = p_dP.shape[2] + + dm_pred = np.empty(dm_all_params.shape) + dP_pred = np.empty(dP_all_params.shape) + + for j in range(param_number): + dA = dA_all_params[:,:,j] + dQ = dQ_all_params[:,:,j] + + + dm = dm_all_params[:,j] + dP = dP_all_params[:,:,j] + + # prediction step derivatives for current parameter: + dm_pred[:,j] = np.dot(dA, p_m) + np.dot(A, dm) + dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T)) + dP_pred[:,:,j] += dP_pred[:,:,j].T + dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ + + else: + dm_pred = None + dP_pred = None + + return m_pred, P_pred, dm_pred, dP_pred + + + @staticmethod + def _kalman_update_step(k, p_m , p_P, p_h, p_f_H, p_f_R, Y, calc_log_likelihood= False, + calc_grad_log_likelihood=False, p_dm = None, p_dP = None, grad_calc_params_2 = None): + """ + Input: + + k: int + Iteration No. Starts at 0. Total number of iterations equal to the + number of measurements. + + m_P: matrix of size (state_dim, time_series_no) + Mean value from the previous step. + + p_P: + Covariance matrix from the previous step. + + p_h: function (k, x_{k}, H_{k}). Measurement function. + k (iteration number), starts at 0 + x_{k} state + H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. + + p_f_H: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_h. + k (iteration number), starts at 0 + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_R: function (k). Returns noise matrix of measurement equation + on iteration k. + k (iteration number). starts at 0 + calc_grad_log_likelihood: int + + p_dm: array + Mean derivatives from the prediction step + + p_dP: array + Covariance derivatives from the prediction step + """ + + m_pred = p_m + P_pred = p_P + + H = p_f_H(k, m_pred, P_pred) + R = p_f_R(k) + measurement = Y[k,:].T # measurement as column + + log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None + # Update step (only if there is data) + if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. + v = measurement-p_h(k, m_pred, H) + S = H.dot(P_pred).dot(H.T) + R + if Y.shape[1]==1: # measurements are one dimensional + K = P_pred.dot(H.T) / S + if calc_log_likelihood: + log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + + v*v / S) + log_likelihood_update = log_likelihood_update[0,0] # to make int + if np.isnan(log_likelihood_update): + pass + LL = None; islower = None + else: + LL,islower = linalg.cho_factor(S) + K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T + + if calc_log_likelihood: + log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + + 2*np.sum( np.log(np.diag(LL)) ) + + np.dot(v.T, linalg.cho_solve((LL,islower),v)) ) + log_likelihood_update = log_likelihood_update[0,0] # to make int + + + if calc_grad_log_likelihood: + dm_pred_all_params = p_dm # derivativas of the prediction phase + dP_pred_all_params = p_dP + + param_number = p_dP.shape[2] + + p_f_dH = grad_calc_params_2[0]; dH_all_params = p_f_dH(k) + p_f_dR = grad_calc_params_2[1]; dR_all_params = p_f_dR(k) + + dm_upd = np.empty(dm_pred_all_params.shape) + dP_upd = np.empty(dP_pred_all_params.shape) + + d_log_likelihood_update = np.empty((param_number,1)) + for param in range(param_number): + + dH = dH_all_params[:,:,param] + dR = dR_all_params[:,:,param] + + dm_pred = dm_pred_all_params[:,param] + dP_pred = dP_pred_all_params[:,:,param] + + # Terms in the likelihood derivatives + dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) + dS = np.dot(dH, np.dot( P_pred, H.T)) + dS += dS.T + dS += np.dot(H, np.dot( dP_pred, H.T)) + dR + + # TODO: maybe symmetrize dS + + #dm and dP for the next stem + if LL is not None: # the state vector is not a scalar + tmp1 = linalg.cho_solve((LL,islower), H).T + tmp2 = linalg.cho_solve((LL,islower), dH).T + tmp3 = linalg.cho_solve((LL,islower), dS).T + else: # the state vector is a scalar + tmp1 = H.T / S + tmp2 = dH.T / S + tmp3 = dS.T / S + + dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ + np.dot( P_pred, np.dot( tmp1, tmp3 ) ) + + # terms required for the next step, save this for each parameter + dm_upd[:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) + dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) + dP_upd[:,:,param] += dP_upd[:,:,param].T + dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) + + dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize + # computing the likelihood change for each parameter: + if LL is not None: # the state vector is not 1D + #tmp4 = linalg.cho_solve((LL,islower), dv) + tmp5 = linalg.cho_solve((LL,islower), v) + else: # the state vector is a scalar + #tmp4 = dv / S + tmp5 = v / S + + + d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ + np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) + + + + # Compute the actual updates for mean and variance of the states. + m_upd = m_pred + K.dot( v ) + + # Covariance update and ensure it is symmetric + P_upd = K.dot(S).dot(K.T) + P_upd = 0.5*(P_upd + P_upd.T) + P_upd = P_pred - P_upd# this update matrix is symmetric + + return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update + + @staticmethod + def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, + p_P_prev_step, p_f_A): + """ + Input: + + k: int + Iteration No. Starts at 0. Total number of iterations equal to the + number of measurements. + + p_m: matrix of size (state_dim, time_series_no) + Filter mean on step k + + p_P: + Filter Covariance on step k + + p_m_pred: matrix of size (state_dim, time_series_no) + Prediction mean + + p_P_pred: + Prediction covariance: + + p_m_prev_step + Smoother mean from the previous step. + + p_P_prev_step: + Smoother covariance from the previous step. + + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_a. + k (iteration number), starts at 0 + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + """ + + A = p_f_A(k,p_m,p_P) # state transition matrix (or Jacobian) + + tmp = np.dot( A, p_P.T) + if A.shape[0] == 1: # 1D states + G = tmp.T / p_P_pred # P[:,:,k] is symmetric + else: + LL,islower = linalg.cho_factor(p_P_pred) + G = linalg.cho_solve((LL,islower),tmp).T + + m_upd = p_m + G.dot( p_m_prev_step-p_m_pred ) + P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T) + + P_upd = 0.5*(P_upd + P_upd.T) + + return m_upd, P_upd + + @classmethod + def _rts_smoother_raw(cls,state_dim, p_a, p_f_A, p_f_Q, filter_means, + filter_covars): + """ + This function implements Rauch–Tung–Striebel(RTS) smoother algorithm + based on the results of kalman_filter_raw. + These notations are the same: + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) + + Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) + + Input: + -------------- + + p_a: function (k, x_{k-1}, A_{k}). Dynamic function. + k (iteration number), starts at 0 + x_{k-1} State from the previous step + A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. + + p_f_A: function (k, m, P) return Jacobian of dynamic function, it is + passed into p_a. + k (iteration number), starts at 0 + m: point where Jacobian is evaluated + P: parameter for Jacobian, usually covariance matrix. + + p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. + k (iteration number). starts at 0 + + filter_means: + Include initial values + + filter_covars + Include initial values + + Output: + ------------- + + M: (state_dim, no_steps) matrix + Smoothed estimates of the state means + + P: (state_dim, state_dim, no_steps) 3D array + Smoothed estimates of the state covariances + """ + + no_steps = filter_covars.shape[-1] # number + + M = np.empty(filter_means.shape) # smoothed means + P = np.empty(filter_covars.shape) # smoothed covars + + M[:,-1] = filter_means[:,-1] + P[:,:,-1] = filter_covars[:,:,-1] + for k in range(no_steps-2,-1,-1): + + m_pred, P_pred, tmp1, tmp2 = \ + cls._kalman_prediction_step(k, filter_means[:,k], + filter_covars[:,:,k], p_a, p_f_A, p_f_Q, + calc_grad_log_likelihood=False) + + m_upd, P_upd = cls._rts_smoother_update_step(k, + filter_means[:,k] ,filter_covars[:,:,k], + m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], p_f_A) + + M[:,k] = m_upd + P[:,:,k] = P_upd + # Return values + + return (M, P) + + @staticmethod + def _check_A_matrix(p_A): + """ + Check A matrix so that it has right shape + """ + + old_A_shape = None + if len(p_A.shape) < 3: + old_A_shape = p_A.shape # save shape to restore it on exit + if len(p_A.shape) == 2: # matrix + p_A.shape = (p_A.shape[0],p_A.shape[1],1) + if p_A.shape[0] != p_A.shape[1]: + raise ValueError("p_A must be a square matrix") + + elif len(p_A.shape) == 1: # scalar but in array already + if (p_A.shape[0] != 1): + raise ValueError("Parameter p_A is an 1D array, while it must be matrix or scalar") + else: + p_A.shape = (1,1,1) + + return (p_A,old_A_shape) + + @staticmethod + def _check_Q_matrix(p_Q): + """ + Check Q matrix + """ + + old_Q_shape = None + if len(p_Q.shape) < 3: + old_Q_shape = p_Q.shape # save shape to restore it on exit + if len(p_Q.shape) == 2: # matrix + p_Q.shape = (p_Q.shape[0],p_Q.shape[1],1) + if p_Q.shape[0] != p_Q.shape[1]: + raise ValueError("p_Q must be a square matrix") + + elif len(p_Q.shape) == 1: # scalar but in array already + if (p_Q.shape[0] != 1): + raise ValueError("Parameter p_Q is an 1D array, while it must be matrix or scalar") + else: + p_Q.shape = (1,1,1) + + return (p_Q,old_Q_shape) + + @staticmethod + def _check_H_matrix(p_H): + """ + Check H matrix + """ + + old_H_shape = None + if (len(p_H.shape) == 1): + if p_H.shape[0] != 1: + raise ValueError("Ambiguity in the shape of p_H") + else: + old_H_shape = p_H.shape + p_H.shape = (1,1) + + return (p_H, old_H_shape) + + @staticmethod + def _check_R_matrix(p_R): + """ + Check R matrix + """ + + old_R_shape = None + if (len(p_R.shape) == 1): + if p_R.shape[0] != 1: + raise ValueError("Ambiguity in the shape of p_H") + else: + old_R_shape = p_R.shape + p_R.shape = (1,1) + + return (p_R, old_R_shape) + + @staticmethod + def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'): + """ + Check dA, dQ matrices + + Input: + + which: string 'dA' or 'dQ' + """ + + + if dM is None: + dM=np.zeros((state_dim,state_dim,grad_params_no)) + elif isinstance(dM, np.ndarray): + if state_dim == 1: + if len(dM.shape) < 3: + dM.shape = (1,1,1) + else: + if len(dM.shape) < 3: + dM.shape = (state_dim,state_dim,1) + elif isinstance(dM, np.int): + if state_dim > 1: + raise ValueError("When computing likelihood gradient wrong %s dimension." % which) + else: + dM = np.ones((1,1,1)) * dM + if not isinstance(dM, types.FunctionType): + f_dM = lambda k: dM + else: + f_dM = dM + + return f_dM + + + @staticmethod + def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'): + """ + Check dA, dQ matrices + + Input: + + which: string 'dH' or 'dR' + """ + + if dM is None: + if which == 'dH': + dM=np.zeros((measurement_dim ,state_dim,grad_params_no)) + elif which == 'dR': + dM=np.zeros((measurement_dim,measurement_dim,grad_params_no)) + elif isinstance(dM, np.ndarray): + if state_dim == 1: + if len(dM.shape) < 3: + dM.shape = (1,1,1) + else: + if len(dM.shape) < 3: + if which == 'dH': + dM.shape = (measurement_dim,state_dim,1) + elif which == 'dR': + dM.shape = (measurement_dim,measurement_dim,1) + elif isinstance(dM, np.int): + if state_dim > 1: + raise ValueError("When computing likelihood gradient wrong dH dimension.") + else: + dM = np.ones((1,1,1)) * dM + if not isinstance(dM, types.FunctionType): + f_dM = lambda k: dM + else: + f_dM = dM + + return f_dM + + + +class Struct(object): + pass + +class ContDescrStateSpace(DescreteStateSpace): + """ + Class for continuous-discrete Kalman filter. State equation is + continuous while measurement equation is discrete. + + d x(t)/ dt = F x(t) + L q; where q~ N(0, Qc) + y_{t_k} = H_{k} x_{t_k} + r_{k}; r_{k-1} ~ N(0, R_{k}) + + """ + + class AQcompute_once(object): + """ + Class for providing the functions for A and Q dA and dQ calculation + and for caching the result of the call + """ + + def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): + """ + Input: + + dt: array + Array of dt steps + """ + self.dt = dt + self.F = F + self.L = L + self.Qc = Qc + + self.P_inf = P_inf + self.dP_inf = dP_inf + self.dF = dF + self.dQc = dQc + + self.compute_derivatives = compute_derivatives + self.grad_params_no = grad_params_no + + + self.last_k = 0 + self.last_k_computed = False + self.Ak = None + self.Qk = None + + def recompute_for_new_k(self,k): + """ + """ + + if self.last_k == k: + if (self.last_k_computed == False): + Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F, + self.L,self.Qc,self.dt[k],self.compute_derivatives, + grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc) + self.Ak = Ak + self.Qk = Qk + self.dAk = dAk + self.dQk = dQk + self.last_k_computed = True + else: + Ak = self.Ak + Qk = self.Qk + dAk = self.dAk + dQk = self.dQk + else: + Ak,Qk, tmp, dAk, dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F, + self.L,self.Qc,self.dt[k],self.compute_derivatives, + grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc) + + self.last_k = k + self.last_k_computed = True + self.Ak = Ak + self.Qk = Qk + self.dAk = dAk + self.dQk = dQk + return Ak,Qk, dAk, dQk + + def reset(self, compute_derivatives): + """ + For reusing this object e.g. in smoother computation + """ + + self.last_k = 0 + self.last_k_computed = False + self.compute_derivatives = compute_derivatives + + return self + + def f_A(self,k,m,P): + + Ak,Qk, dAk, dQk = self.recompute_for_new_k(k) + return Ak + + def f_Q(self,k): + + Ak,Qk, dAk, dQk = self.recompute_for_new_k(k) + + return Qk + + def f_dA(self, k): + + Ak,Qk, dAk, dQk = self.recompute_for_new_k(k) + + return dAk + + def f_dQ(self, k): + + Ak,Qk, dAk, dQk = self.recompute_for_new_k(k) + + return dQk + + + class AQcompute_batch(object): + """ + """ + def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): + """ + Input: + + dt: array + Array of dt steps + """ + As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, + L,Qc,dt,compute_derivatives, + grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + + self.As = As + self.Qs = Qs + self.dAs = dAs + self.dQs = dQs + self.reconstruct_indices = reconstruct_indices + + def reset(self, compute_derivatives): + """ + For reusing this object e.g. in smoother computation + """ + + return self + + def f_A(self,k,m,P): + """ + """ + return self.As[:,:, self.reconstruct_indices[k]] + + def f_Q(self,k): + """ + """ + return self.Qs[:,:, self.reconstruct_indices[k]] + + def f_dA(self,k): + """ + """ + return self.dAs[:,:, :, self.reconstruct_indices[k]] + + def f_dQ(self,k): + """ + """ + return self.dQs[:,:, :, self.reconstruct_indices[k]] + + @classmethod + def cont_discr_kalman_filter(cls,F,L,Qc,H,R,P_inf,X,Y,m_init=None, + P_init=None, calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_params_no=None, grad_calc_params=None): + """ + + """ + + # Time step lengths + state_dim = F.shape[0] + measurement_dim = Y.shape[1] + + if m_init is None: + m_init = np.zeros((state_dim,1)) + + if P_init is None: + P_init = P_inf.copy() + + if calc_grad_log_likelihood: + + dF = cls._check_grad_state_matrices( grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') + dQc = cls._check_grad_state_matrices( grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ') + dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dQ') + + dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') + dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') + + dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter + if dm_init is None: + dm_init = np.zeros( (state_dim,grad_params_no) ) + + dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter + if dP_init is None: + dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function + + else: + dP_inf = None + dF = None + dQc = None + dH = None + dR = None + + + # TODO: Defaults for m_init, P_init, dm_init, dP_init. !!! + # Also for dH, dR and probably for all derivatives + (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=m_init, + P_init=P_init, calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, dP_inf=dP_inf, + dF = dF, dQc=dQc, dH=dH, dR=dR, dm_init=dm_init, dP_init=dP_init) + + return (M, P, log_likelihood, grad_log_likelihood,AQcomp) + + @classmethod + def _cont_discr_kalman_filter_raw(cls,state_dim,F,L,Qc,H,R,P_inf,X,Y,m_init=None, + P_init=None, calc_log_likelihood=False, + calc_grad_log_likelihood=False, grad_params_no=None, dP_inf=None, + dF = None, dQc=None, dH=None, dR=None, dm_init=None, dP_init=None): + """ + Kalman filter for continuos dynamic model. + + Input: + F: + Dynamic model matrix + + X: + Time steps + """ + + steps_no = X.shape[0] # number of steps in the Kalman Filter + + # Return object which computes A, Q and possibly derivatives on the way, pass the derivative matrices not functions + AQcomp = cls._cont_to_discrete_object(X, F,L,Qc,compute_derivatives=calc_grad_log_likelihood, + grad_params_no=grad_params_no, + P_inf=P_inf, dP_inf=dP_inf(0), + dF = dF(0), dQc=dQc(0)) + + # Functions required for Kalman filter + f_a = lambda k,m,A: np.dot(A, m) # state dynamic model + f_h = lambda k,m,H: np.dot(H, m) # measurement model + f_H = lambda k,m,P: H + f_R = lambda k: R + + # Allocate space for results + # Mean estimations. Initial values will be included + M = np.empty((state_dim,(steps_no+1))) + # Variance estimations. Initial values will be included + P = np.empty((state_dim,state_dim,(steps_no+1))) + + # Initialize + M[:,0] = m_init[:,0] + P[:,:,0] = P_init + + log_likelihood = 0 + grad_log_likelihood = np.zeros((grad_params_no,1)) + # Main loop of the Kalman filter + for k in range(0,steps_no): + # In this loop index for new estimations is (k+1), old - (k) + # This happened because initial values are stored at 0-th index. + + if k == 0: #setting initial values + dm_upd = dm_init + dP_upd = dP_init + + m_pred, P_pred, dm_pred, dP_pred = \ + cls._kalman_prediction_step(k, M[:,k] ,P[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_upd, p_dP = dP_upd, grad_calc_params_1 = (AQcomp.f_dA, AQcomp.f_dQ) ) + + m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ + cls._kalman_update_step(k, m_pred , P_pred, f_h, f_H, f_R, Y, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) + + if calc_log_likelihood: + log_likelihood += log_likelihood_update + + if calc_grad_log_likelihood: + grad_log_likelihood += d_log_likelihood_update + + M[:,k+1] = m_upd + P[:,:,k+1] = P_upd + # Return values, get rid of initial values + #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood) + # Return values, get rid of initial values + #return (M[:,1:], P[:,:,1:], log_likelihood, grad_log_likelihood) + return (M, P, log_likelihood, grad_log_likelihood, AQcomp.reset(False)) + + @classmethod + def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars, + AQcomp=None, X=None, F=None,L=None,Qc=None): + """ + + UPDATE Description! + + This function implements Rauch–Tung–Striebel(RTS) smoother algorithm + based on the results of kalman_filter_raw. + These notations are the same: + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) + + Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) + + Input: + -------------- + + filter_means: + Include initial values + + filter_covars + Include initial values + + Output: + ------------- + + M: (state_dim, no_steps) matrix + Smoothed estimates of the state means + + P: (state_dim, state_dim, no_steps) 3D array + Smoothed estimates of the state covariances + """ + + if AQcomp is None: # make this object from scratch + AQcomp = cls._cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False, + grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None) + + f_a = lambda k,m,A: np.dot(A, m) # state dynamic model + + no_steps = filter_covars.shape[-1] # number + + M = np.empty(filter_means.shape) # smoothed means + P = np.empty(filter_covars.shape) # smoothed covars + + M[:,-1] = filter_means[:,-1] + P[:,:,-1] = filter_covars[:,:,-1] + for k in range(no_steps-2,-1,-1): + + m_pred, P_pred, tmp1, tmp2 = \ + cls._kalman_prediction_step(k, filter_means[:,k], + filter_covars[:,:,k], f_a, AQcomp.f_A, AQcomp.f_Q, + calc_grad_log_likelihood=False) + + m_upd, P_upd = cls._rts_smoother_update_step(k, + filter_means[:,k] ,filter_covars[:,:,k], + m_pred, P_pred, M[:,k+1] ,P[:,:,k+1], AQcomp.f_A) + + M[:,k] = m_upd + P[:,:,k] = P_upd + # Return values + + return (M, P) + + @classmethod + def _cont_to_discrete_object(cls, X, F,L,Qc,compute_derivatives=False,grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): + """ + Function return the object which can be used in Kalman filter and/or + smoother to obtain matrices A and Q as well as necessary derivatives(clarify). + + """ + + unique_round_decimals = 8 + dt = np.empty((X.shape[0],)) + dt[1:] = np.diff(X[:,0],axis=0) + dt[0] = dt[1] + unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) + + if len(unique_indices) > 20: + AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives, + grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + else: + AQcomp = cls.AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives, + grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + + return AQcomp + + @staticmethod + def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False, + grad_params_no=None, P_inf=None, + dP_inf=None, dF = None, dQc=None): + """ + Linear Time-Invariant Stochastic Differential Equation (LTI SDE): + + dx(t) = F x(t) dt + L d \beta ,where + + x(t): (vector) stochastic process + \beta: (vector) Brownian motion process + F, L: (time invariant) matrices of corresponding dimensions + Qc: covariance of noise. + + This function rewrites it into the corresponding state-space form: + + x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) + + + Input: + -------------- + F,L: LTI SDE matrices of corresponding dimensions + + Qc: matrix (n,n) + Covarince between different dimensions of noise \beta. + n is the dimensionality of the noise. + + dt: double or iterable + Time difference used on this iteration. + If dt is iterable, then A and Q_noise are computed for every + unique dt + + compute_derivatives + + param_num: int + Number of parameters + + P_inf + + dP_inf + + dF + + dQc + + dR + + dm_prev: + Mean derivatives from the previos step + dP_prev: + Variance derivatives from the previous step + + + Output: + -------------- + + A: matrix + A_{k}. Because we have LTI SDE only dt can affect on matrix + difference for different k. + + Q_noise: matrix + Covariance matrix of (vector) q_{k-1}. Only dt can affect the + matrix difference for different k. + + reconstruct_index: array + If dt was iterable return three dimensinal arrays A and Q_noise. + Third dimension of these arrays correspond to unique dt's. + This reconstruct_index contain indices of the original dt's + in the uninue dt sequence. A[:,:, reconstruct_index[5]] + is matrix A of 6-th(indices start from zero) dt in the original + sequence. + """ + + # Dimensionality + n = F.shape[0] + + if not isinstance(dt, collections.Iterable): # not iterable, scalar + + # The dynamical model + A = linalg.expm(F*dt) + + # The covariance matrix Q by matrix fraction decomposition -> + Phi = np.zeros((2*n,2*n)) + Phi[:n,:n] = F + Phi[:n,n:] = L.dot(Qc).dot(L.T) + Phi[n:,n:] = -F.T + AB = linalg.expm(Phi*dt).dot(np.vstack((np.zeros((n,n)),np.eye(n)))) + Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T) + # The covariance matrix Q by matrix fraction decomposition <- + + if compute_derivatives: + dA = np.zeros([n, n, grad_params_no]) + dQ = np.zeros([n, n, grad_params_no]) + + #AA = np.zeros([2*n, 2*n, nparam]) + FF = np.zeros([2*n, 2*n]) + AA = np.zeros([2*n, 2*n, grad_params_no]) + + for p in range(0, grad_params_no): + + FF[:n,:n] = F + FF[n:,:n] = dF[:,:,p] + FF[n:,n:] = F + + # Solve the matrix exponential + AA[:,:,p] = linalg.expm(FF*dt) + + # Solve the differential equation + #foo = AA[:,:,p].dot(np.vstack([m, dm[:,p]])) + #mm = foo[:n,:] + #dm[:,p] = foo[n:,:] + + # The discrete-time dynamical model* + if p==0: + A = AA[:n,:n,p] + Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) + Q_noise = Q_noise_2 + #PP = A.dot(P).dot(A.T) + Q_noise_2 + + # The derivatives of A and Q + dA[:,:,p] = AA[n:,:n,p] + dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \ + - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times + + # The derivatives of P + #dP[:,:,p] = dA.dot(P).dot(A.T) + A.dot(dP_prev[:,:,p]).dot(A.T) \ + # + A.dot(P).dot(dA.T) + dQ + #Q_noise = Q_noise_2 + else: + dA = None + dQ = None + Q_noise = Q_noise_1 + + + # Return + return A, Q_noise,None, dA, dQ + + else: # iterable, array + + # Time discretizations (round to 14 decimals to avoid problems) + dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8), + return_index=True,return_inverse=True) + del tmp + # Allocate space for A and Q + A = np.empty((n,n,dt_unique.shape[0])) + Q_noise = np.empty((n,n,dt_unique.shape[0])) + + if compute_derivatives: + dA = np.empty((n,n,grad_params_no,dt_unique.shape[0])) + dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) + + # Call this function for each unique dt + for j in range(0,dt_unique.shape[0]): + A[:,:,j], Q_noise[:,:,j], tmp1, dA[:,:,:,j], dQ[:,:,:,j] = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j], + compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) + + # Return + return A, Q_noise, reconstruct_index, dA, dQ \ No newline at end of file diff --git a/GPy/models/state_space_new.py b/GPy/models/state_space_new.py new file mode 100644 index 00000000..567ee434 --- /dev/null +++ b/GPy/models/state_space_new.py @@ -0,0 +1,322 @@ +# Copyright (c) 2013, Arno Solin. +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# +# This implementation of converting GPs to state space models is based on the article: +# +# @article{Sarkka+Solin+Hartikainen:2013, +# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen}, +# year = {2013}, +# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing}, +# journal = {IEEE Signal Processing Magazine}, +# volume = {30}, +# number = {4}, +# pages = {51--61} +# } +# + +import numpy as np +from scipy import linalg +from ..core import Model +from .. import kern +from GPy.plotting.matplot_dep.models_plots import gpplot +from GPy.plotting.matplot_dep.base_plots import x_frame1D +from GPy.plotting.matplot_dep import Tango +import pylab as pb +from GPy.core.parameterization.param import Param + +import GPy +import GPy.models.state_space_main as ssm +#import state_space_main as ssm +reload(ssm) +print ssm.__file__ + +class StateSpace(Model): + def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'): + super(StateSpace, self).__init__(name=name) + self.num_data, input_dim = X.shape + assert input_dim==1, "State space methods for time only" + num_data_Y, self.output_dim = Y.shape + assert num_data_Y == self.num_data, "X and Y data don't match" + assert self.output_dim == 1, "State space methods for single outputs only" + + # Make sure the observations are ordered in time + sort_index = np.argsort(X[:,0]) + self.X = X[sort_index] + self.Y = Y[sort_index] + + # Noise variance + self.sigma2 = Param('Gaussian_noise', sigma2) + self.link_parameter(self.sigma2) + + # Default kernel + if kernel is None: + self.kern = kern.Matern32(1) + else: + self.kern = kernel + self.link_parameter(self.kern) + + self.sigma2.constrain_positive() + + # Assert that the kernel is supported + if not hasattr(self.kern, 'sde'): + raise NotImplementedError('SDE must be implemented for the kernel being used') + #assert self.kern.sde() not False, "This kernel is not supported for state space estimation" + + def parameters_changed(self): + """ + Parameters have now changed + """ + + # Get the model matrices from the kernel + (F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde() + + # necessary parameters + measurement_dim = self.output_dim + grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter + + # add measurement noise as a parameter and get the gradient matrices + dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no]) + dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no]) + dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no]) + + # Assign the values for the kernel function + dF[:,:,:-1] = dFt + dQc[:,:,:-1] = dQct + dP_inf[:,:,:-1] = dP_inft + + # The sigma2 derivative + dR = np.zeros([measurement_dim,measurement_dim,grad_params_no]) + dR[:,:,-1] = np.eye(measurement_dim) + + + # Use the Kalman filter to evaluate the likelihood + + grad_calc_params = {} + grad_calc_params['dP_inf'] = dP_inf + grad_calc_params['dF'] = dF + grad_calc_params['dQc'] = dQc + grad_calc_params['dR'] = dR + + (filter_means, filter_covs, log_likelihood, + grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None, + P_init=None, calc_log_likelihood=True, + calc_grad_log_likelihood=True, + grad_params_no=grad_params_no, + grad_calc_params=grad_calc_params) + + self._log_marginal_likelihood = log_likelihood + #gradients = self.compute_gradients() + self.sigma2.gradient_full[:] = grad_log_likelihood[-1,0] + self.kern.gradient_full[:] = grad_log_likelihood[:-1,0] + + def log_likelihood(self): + return self._log_marginal_likelihood + + def _predict_raw(self, Xnew, Ynew=None, filteronly=False): + """ + Inner function. It is called only from inside this class + """ + + # Set defaults + if Ynew is None: + Ynew = self.Y + + # Make a single matrix containing training and testing points + X = np.vstack((self.X, Xnew)) + Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape))) + + # Sort the matrix (save the order) + _, return_index, return_inverse = np.unique(X,True,True) + X = X[return_index] + Y = Y[return_index] + + # Get the model matrices from the kernel + (F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde() + state_dim = F.shape[0] + + # Run the Kalman filter + (M, P, tmp_log_likelihood, + tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None, + P_init=None, calc_log_likelihood=False, + calc_grad_log_likelihood=False) + + # Run the Rauch-Tung-Striebel smoother + if not filteronly: + (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, + AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc) + + # remove initial values + M = M[:,1:] + P = P[:,:,1:] + + # Put the data back in the original order + M = M[:,return_inverse] + P = P[:,:,return_inverse] + + # Only return the values for Xnew + M = M[:,self.num_data:] + P = P[:,:,self.num_data:] + + # Calculate the mean and variance + m = H.dot(M).T + V = np.tensordot(H[0],P,(0,0)) + V = np.tensordot(V,H[0],(0,0)) + V = V[:,None] + + # Return the posterior of the state + return (m, V) + + def predict(self, Xnew, filteronly=False): + + # Run the Kalman filter to get the state + (m, V) = self._predict_raw(Xnew,filteronly=filteronly) + + # Add the noise variance to the state variance + V += self.sigma2 + + # Lower and upper bounds + lower = m - 2*np.sqrt(V) + upper = m + 2*np.sqrt(V) + + # Return mean and variance + return (m, V, lower, upper) + + def plot(self, plot_limits=None, levels=20, samples=0, fignum=None, + ax=None, resolution=None, plot_raw=False, plot_filter=False, + linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + + # Deal with optional parameters + if ax is None: + fig = pb.figure(num=fignum) + ax = fig.add_subplot(111) + + # Define the frame on which to plot + resolution = resolution or 200 + Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) + + # Make a prediction on the frame and plot it + if plot_raw: + m, v = self.predict_raw(Xgrid,filteronly=plot_filter) + lower = m - 2*np.sqrt(v) + upper = m + 2*np.sqrt(v) + Y = self.Y + else: + m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter) + Y = self.Y + + # Plot the values + gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol) + ax.plot(self.X, self.Y, 'kx', mew=1.5) + + # Optionally plot some samples + if samples: + if plot_raw: + Ysim = self.posterior_samples_f(Xgrid, samples) + else: + Ysim = self.posterior_samples(Xgrid, samples) + for yi in Ysim.T: + ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25) + + # Set the limits of the plot to some sensible values + ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten())) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + def prior_samples_f(self,X,size=10): + + # Sort the matrix (save the order) + (_, return_index, return_inverse) = np.unique(X,True,True) + X = X[return_index] + + # Get the model matrices from the kernel + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() + + # Allocate space for results + Y = np.empty((size,X.shape[0])) + + # Simulate random draws + #for j in range(0,size): + # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T)) + Y = self.simulate(F,L,Qc,Pinf,X.T,size) + + # Only observations + Y = np.tensordot(H[0],Y,(0,0)) + + # Reorder simulated values + Y = Y[:,return_inverse] + + # Return trajectory + return Y.T + + def posterior_samples_f(self,X,size=10): + + # Sort the matrix (save the order) + (_, return_index, return_inverse) = np.unique(X,True,True) + X = X[return_index] + + # Get the model matrices from the kernel + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() + + # Run smoother on original data + (m,V) = self.predict_raw(X) + + # Simulate random draws from the GP prior + y = self.prior_samples_f(np.vstack((self.X, X)),size) + + # Allocate space for sample trajectories + Y = np.empty((size,X.shape[0])) + + # Run the RTS smoother on each of these values + for j in range(0,size): + yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1) + (m2,V2) = self.predict_raw(X,Ynew=yobs) + Y[j,:] = m.T + y[self.num_data:,j].T - m2.T + + # Reorder simulated values + Y = Y[:,return_inverse] + + # Return posterior sample trajectories + return Y.T + + def posterior_samples(self, X, size=10): + + # Make samples of f + Y = self.posterior_samples_f(X,size) + + # Add noise + Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1]) + + # Return trajectory + return Y + + + def simulate(self,F,L,Qc,Pinf,X,size=1): + # Simulate a trajectory using the state space model + + # Allocate space for results + f = np.zeros((F.shape[0],size,X.shape[1])) + + # Initial state + f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size)) + + # Time step lengths + dt = np.empty(X.shape) + dt[:,0] = X[:,1]-X[:,0] + dt[:,1:] = np.diff(X) + + # Solve the LTI SDE for these time steps + As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt) + + # Sweep through remaining time points + for k in range(1,X.shape[1]): + + # Form discrete-time model + A = As[:,:,index[1-k]] + Q = Qs[:,:,index[1-k]] + + # Draw the state + f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size)) + + # Return values + return f \ No newline at end of file