STATE-SPACE: Recent modifications to state-space inference, including bug fixes in state-space kernels.

This commit is contained in:
Alex Grigorievskiy 2017-04-06 11:59:13 +03:00
parent 5ce5c5d8ce
commit dfe32266b6
6 changed files with 533 additions and 91 deletions

View file

@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
import warnings
from scipy import special as special
@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic):
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
# TODO: write comment to the constructor arguments
def __init__(self, *args, **kwargs):
"""
Init constructior.
Two optinal extra parameters are added in addition to the ones in
StdPeriodic kernel.
:param approx_order: approximation order for the RBF covariance. (Default 7)
:type approx_order: int
:param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing.
:type balance: bool
"""
#import pdb; pdb.set_trace()
if 'approx_order' in kwargs:
self.approx_order = kwargs.get('approx_order')
del kwargs['approx_order']
else:
self.approx_order = 7
if 'balance' in kwargs:
self.balance = bool( kwargs.get('balance') )
del kwargs['balance']
else:
self.balance = False
super(sde_StdPeriodic, self).__init__(*args, **kwargs)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
@ -38,40 +71,47 @@ class sde_StdPeriodic(StdPeriodic):
def sde(self):
"""
Return the state space representation of the covariance.
Return the state space representation of the standard periodic covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order)
After this Bessel functions of the first becomes NaN. Rescaling
time variable might help.
! Note: one must keep wevelength also not very low. Because then
! Note: one must keep period also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
#import pdb; pdb.set_trace()
# Params to use: (in that order)
#self.variance
#self.period
#self.lengthscale
N = 7 # approximation order
if self.approx_order is not None:
N = int(self.approx_order)
else:
N = 7 # approximation order
p_period = float(self.period)
p_lengthscale = 2*float(self.lengthscale)
p_variance = float(self.variance)
w0 = 2*np.pi/self.period # frequency
lengthscale = 2*self.lengthscale
w0 = 2*np.pi/p_period # frequency
# lengthscale is multiplied by 2 because of different definition of lengthscale
[q2,dq2l] = seriescoeff(N,lengthscale,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
[q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance)
dq2l = 2*dq2l
dq2l = 2*dq2l # This is because the lengthscale if multiplied by 2.
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
eps = 1e-12
if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps):
warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") )
if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps):
warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") )
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic):
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
dP_inf[:,:,0] = P_inf / p_variance
# Derivatives self.period
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period );
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / p_period );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
@ -101,6 +141,11 @@ class sde_StdPeriodic(StdPeriodic):
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
if self.balance:
# Benefits of this are not very sound.
import GPy.models.state_space_main as ssm
(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 )
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
#print(coeffs)
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl
return coeffs.squeeze(), coeffs_dl.squeeze()

View file

@ -11,6 +11,7 @@ from .stationary import RatQuad
import numpy as np
import scipy as sp
import warnings
class sde_RBF(RBF):
"""
@ -25,6 +26,37 @@ class sde_RBF(RBF):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, *args, **kwargs):
"""
Init constructior.
Two optinal extra parameters are added in addition to the ones in
RBF kernel.
:param approx_order: approximation order for the RBF covariance. (Default 10)
:type approx_order: int
:param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing.
:type balance: bool
"""
if 'balance' in kwargs:
self.balance = bool( kwargs.get('balance') )
del kwargs['balance']
else:
self.balance = True
if 'approx_order' in kwargs:
self.approx_order = kwargs.get('approx_order')
del kwargs['approx_order']
else:
self.approx_order = 6
super(sde_RBF, self).__init__(*args, **kwargs)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
@ -37,23 +69,43 @@ class sde_RBF(RBF):
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
Note! For Sparse GP inference too small or two high values of lengthscale
lead to instabilities. This is because Qc are too high or too low
and P_inf are not full rank. This effect depends on approximatio order.
For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted.
N=6: (0.06,31)
Variance should be within reasonable bounds as well, but its dependence is linear.
The above facts do not take into accout regularization.
"""
#import pdb; pdb.set_trace()
if self.approx_order is not None:
N = self.approx_order
else:
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
p_lengthscale = float( self.lengthscale )
p_variance = float(self.variance)
kappa = 1.0/2.0/p_lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) )
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
eps = 1e-12
if (float(Qc) > 1.0/eps) or (float(Qc) < eps):
warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small.
It influece conditioning of P_inf: {0:e}""".format(float(Qc)) )
pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
pp = sp.poly1d(pp1)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
@ -69,6 +121,7 @@ class sde_RBF(RBF):
H[0,0] = 1
# Infinite covariance:
#import pdb; pdb.set_trace()
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
@ -79,17 +132,17 @@ class sde_RBF(RBF):
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dQcvariance = Qc/p_variance
dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),))
dPinf_variance = Pinf/self.variance
dPinf_variance = Pinf/p_variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
@ -101,10 +154,11 @@ class sde_RBF(RBF):
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
if self.balance:
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {}
self.Q_square_root_dict = {}
self.Q_inverse_dict = {}
self.last_k = 0
# !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices
@ -478,18 +480,53 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_svd_dict:
square_root = self.Q_svd_dict[matrix_index]
if matrix_index in self.Q_square_root_dict:
square_root = self.Q_square_root_dict[matrix_index]
else:
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
if matrix_index not in self.Q_svd_dict
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
else:
U,S,Vh = self.Q_svd_dict[matrix_index]
square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root
self.Q_suqare_root_dict[matrix_index] = square_root
return square_root
cpdef Q_inverse(self, int k, float jitter=0.0):
"""
Square root of the noise matrix Q
"""
cdef int matrix_index = <int>self.reconstruct_indices[k]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_inverse_dict:
Q_inverse = self.Q_inverse_dict[matrix_index]
else:
if matrix_index not in self.Q_svd_dict
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
else:
U,S,Vh = self.Q_svd_dict[matrix_index]
Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T )
self.Q_inverse_dict[matrix_index] = Q_inverse
return Q_inverse
# def return_last(self):
# """
# Function returns last available matrices.

View file

@ -12,6 +12,8 @@ import numpy as np
import scipy as sp
import scipy.linalg as linalg
import warnings
try:
from . import state_space_setup
setup_available = True
@ -41,6 +43,10 @@ if print_verbose:
else:
print("state_space: cython is NOT used")
# When debugging external module can set some value to this variable (e.g.)
# 'model' and in this module this variable can be seen.s
tmp_buffer = None
class Dynamic_Callables_Python(object):
@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class):
self.R_square_root = {}
def Rk(self, k):
return self.R[:, :, self.index[self.R_time_var_index, k]]
return self.R[:, :, int(self.index[self.R_time_var_index, k])]
def dRk(self, k):
if self.dR is None:
@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class):
P: parameter for Jacobian, usually covariance matrix.
"""
return self.H[:, :, self.index[self.H_time_var_index, k]]
return self.H[:, :, int(self.index[self.H_time_var_index, k])]
def dHk(self, k):
if self.dH is None:
@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace):
self.v_dQk = None
self.square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
# !!!Print statistics! Which object is created
def f_a(self, k,m,A):
@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace):
self.v_Qk = v_Qk
self.v_dAk = v_dAk
self.v_dQk = v_dQk
self.Q_square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
else:
v_Ak = self.v_Ak
v_Qk = self.v_Qk
@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace):
self.last_k = 0
self.last_k_computed = False
self.compute_derivatives = compute_derivatives
self.Q_square_root_computed = False
self.Q_square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
self.Q_eigen_computed = False
return self
def Ak(self,k,m,P):
@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace):
def Q_srk(self,k):
"""
Check square root, maybe rewriting for Spectral decomposition is needed.
Square root of the noise matrix Q
"""
if ((self.last_k == k) and (self.last_k_computed == True)):
if not self.Q_square_root_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
if not self.Q_svd_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
self.Q_svd = (U, S, Vh)
self.Q_svd_computed = True
else:
(U, S, Vh) = self.Q_svd
square_root = U * np.sqrt(S)
self.square_root_computed = True
self.Q_square_root = square_root
@ -2397,6 +2418,55 @@ class ContDescrStateSpace(DescreteStateSpace):
return square_root
def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
"""
Function inverts Q matrix and regularizes the inverse.
Regularization is useful when original matrix is badly conditioned.
Function is currently used only in SparseGP code.
Inputs:
------------------------------
k: int
Iteration number.
p_largest_cond_num: float
Largest condition value for the inverted matrix. If cond. number is smaller than that
no regularization happen.
regularization_type: 1 or 2
Regularization type.
regularization_type: int (1 or 2)
type 1: 1/(S[k] + regularizer) regularizer is computed
type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
"""
#import pdb; pdb.set_trace()
if ((self.last_k == k) and (self.last_k_computed == True)):
if not self.Q_inverse_computed:
if not self.Q_svd_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
self.Q_svd = (U, S, Vh)
self.Q_svd_computed = True
else:
(U, S, Vh) = self.Q_svd
Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
self.Q_inverse_computed = True
self.Q_inverse_r = Q_inverse_r
else:
Q_inverse_r = self.Q_inverse_r
else:
raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
This requires some programming""")
return Q_inverse_r
def return_last(self):
"""
Function returns last computed matrices.
@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace):
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {}
self.Q_square_root_dict = {}
self.Q_inverse_dict = {}
self.last_k = None
# !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices
@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace):
Square root of the noise matrix Q
"""
matrix_index = self.reconstruct_indices[k]
if matrix_index in self.Q_svd_dict:
square_root = self.Q_svd_dict[matrix_index]
if matrix_index in self.Q_square_root_dict:
square_root = self.Q_square_root_dict[matrix_index]
else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
if matrix_index in self.Q_svd_dict:
(U, S, Vh) = self.Q_svd_dict[matrix_index]
else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root
self.Q_square_root_dict[matrix_index] = square_root
return square_root
def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
"""
Function inverts Q matrix and regularizes the inverse.
Regularization is useful when original matrix is badly conditioned.
Function is currently used only in SparseGP code.
Inputs:
------------------------------
k: int
Iteration number.
p_largest_cond_num: float
Largest condition value for the inverted matrix. If cond. number is smaller than that
no regularization happen.
regularization_type: 1 or 2
Regularization type.
regularization_type: int (1 or 2)
type 1: 1/(S[k] + regularizer) regularizer is computed
type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
"""
#import pdb; pdb.set_trace()
matrix_index = self.reconstruct_indices[k]
if matrix_index in self.Q_inverse_dict:
Q_inverse_r = self.Q_inverse_dict[matrix_index]
else:
if matrix_index in self.Q_svd_dict:
(U, S, Vh) = self.Q_svd_dict[matrix_index]
else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
self.Q_inverse_dict[matrix_index] = Q_inverse_r
return Q_inverse_r
def return_last(self):
"""
Function returns last available matrices.
@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace):
@classmethod
def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
grad_params_no=None,
P_inf=None, dP_inf=None, dF = None, dQc=None):
P_inf=None, dP_inf=None, dF = None, dQc=None,
dt0=None):
"""
Function return the object which is used in Kalman filter and/or
smoother to obtain matrices A, Q and their derivatives for discrete model
@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace):
threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
dt = np.empty((X.shape[0],))
dt[1:] = np.diff(X[:,0],axis=0)
dt[0] = 0#dt[1]
if dt0 is None:
dt[0] = 0#dt[1]
else:
if isinstance(dt0,str):
dt = dt[1:]
else:
dt[0] = dt0
unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
number_unique_indices = len(unique_indices)
@ -3161,6 +3291,9 @@ class ContDescrStateSpace(DescreteStateSpace):
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
TODO: this function can be redone to "preprocess dataset", when
close time points are handeled properly (with rounding parameter) and
values are averaged accordingly.
Input:
--------------
@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace):
n = F.shape[0]
if not isinstance(dt, collections.Iterable): # not iterable, scalar
#import pdb; pdb.set_trace()
# The dynamical model
A = matrix_exponent(F*dt)
if np.any( np.isnan(A)):
A = linalg.expm3(F*dt)
# The covariance matrix Q by matrix fraction decomposition ->
Phi = np.zeros((2*n,2*n))
@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace):
# The discrete-time dynamical model*
if p==0:
A = AA[:n,:n,p]
Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T)
Q_noise = Q_noise_2
Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T)
Q_noise = Q_noise_3
#PP = A.dot(P).dot(A.T) + Q_noise_2
# The derivatives of A and Q
dA[:,:,p] = AA[n:,:n,p]
dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
- A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
tmp = dA[:,:,p].dot(P_inf).dot(A.T)
dQ[:,:,p] = dP_inf[:,:,p] - tmp \
- A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
else:
dA = None
dQ = None
@ -3283,6 +3416,10 @@ class ContDescrStateSpace(DescreteStateSpace):
#Q_noise = Q_noise_1
# Return
#import pdb; pdb.set_trace()
#if dt != 0:
# Q_noise = Q_noise + np.eye(Q_noise.shape[0])*1e-8
Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
return A, Q_noise,None, dA, dQ
else: # iterable, array
@ -3486,4 +3623,124 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
# (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0
#def psd_matrix_inverse(k,Q, U=None,S=None, p_largest_cond_num=None, regularization_type=2):
# """
# Function inverts positive definite matrix and regularizes the inverse.
# Regularization is useful when original matrix is badly conditioned.
# Function is currently used only in SparseGP code.
#
# Inputs:
# ------------------------------
# k: int
# Iteration umber. Used for information only. Value -1 corresponds to P_inf_inv.
#
# Q: matrix
# To be inverted
#
# U,S: matrix. vector
# SVD components of Q
#
# p_largest_cond_num: float
# Largest condition value for the inverted matrix. If cond. number is smaller than that
# no regularization happen.
#
# regularization_type: 1 or 2
# Regularization type.
# """
# #import pdb; pdb.set_trace()
## if (k == 0) or (k == -1): # -1 - P_inf_inv computation
## import pdb; pdb.set_trace()
#
# if p_largest_cond_num is None:
# raise ValueError("psd_matrix_inverse: None p_largest_cond_num")
#
# if U is None or S is None:
# (U, S, Vh) = sp.linalg.svd( Q, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
# if S[0] < (1e-4):
# #import pdb; pdb.set_trace()
# warnings.warn("""state_space_main psd_matrix_inverse: largest singular value is too small {0:e}.
# condition number is {1:e} Maybe somethigng is wrong
# """.format(S[0], S[0]/S[-1]))
# S = S + (1e-4 - S[0]) # make the S[0] at least 1e-4
#
# current_conditional_number = S[0]/S[-1]
# if (current_conditional_number > p_largest_cond_num):
# if (regularization_type == 1):
# regularizer = S[0] / p_largest_cond_num
# # the second computation of SVD is done to compute more precisely singular
# # vectors of small singular values, since small singular values become large.
# # It is not very clear how this step is useful but test is here.
# (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]),
# full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
#
# Q_inverse_r = np.dot( U * 1.0/S , U.T ) # Assume Q_inv is positive definite
#
# # In this case, RBF kernel we get complx eigenvalues. Probably
# # for small eigenvalue corresponding eigenvectors are not very orthogonal.
# ##########Q_inverse = np.dot( Vh.T * ( 1.0/(S + regularizer)) , U.T )
# elif (regularization_type == 2):
#
# new_border_value = np.sqrt(current_conditional_number)/2
# if p_largest_cond_num >= new_border_value: # this type of regularization works
# regularizer = ( S[0] / p_largest_cond_num / 2.0 )**2
#
# Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
# else:
#
# better_curr_cond_num = new_border_value
# warnings.warn("""state_space_main psd_matrix_inverse: reg_type = 2 can't be done completely.
# Current conditionakl number {0:e} is reduced to {1:e} by reg_type = 1""".format(current_conditional_number, better_curr_cond_num))
#
# regularizer = S[0] / better_curr_cond_num
# # the second computation of SVD is done to compute more precisely singular
# # vectors of small singular values, since small singular values become large.
# # It is not very clear how this step is useful but test is here.
# (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]),
# full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
#
# regularizer = ( S[0] / p_largest_cond_num / 2.0 )**2
#
# Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
#
# assert regularizer*10 < S[0], "regularizer is not << S[0]"
# assert regularizer > 10*S[-1], "regularizer is not >> S[-1]"
#
## Old version ->
## lamda_star = np.sqrt(current_conditional_number)
## if 2*p_largest_cond_num >= lamda_star:
## lamda = current_conditional_number / 2 / p_largest_cond_num
##
## regularizer = (S[-1] * lamda)**2
##
## Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
## else:
## better_curr_cond_num = (2*p_largest_cond_num)**2 / 2 # division by 2 just in case here
## warnings.warn("""state_space_main psd_matrix_inverse: reg_type = 2 can't be done completely.
## Current conditionakl number {0:e} is reduced to {1:e} by reg_type = 1""".format(current_conditional_number, better_curr_cond_num))
##
## regularizer = S[0] / better_curr_cond_num
## # the second computation of SVD is done to compute more precisely singular
## # vectors of small singular values, since small singular values become large.
## # It is not very clear how this step is useful but test is here.
## (U, S, Vh) = sp.linalg.svd( Q + regularizer*np.eye(Q.shape[0]),
## full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
##
## lamda = better_curr_cond_num / 2 / p_largest_cond_num
##
## regularizer = (S[-1] * lamda)**2
##
## Q_inverse_r = np.dot( U * ( S/(S**2 + regularizer)) , U.T ) # Assume Q_inv is positive definite
##
## assert lamda > 10, "Some assumptions are incorrect if this is not satisfied."
## Old version <-
# else:
# raise ValueError("AQcompute_batch_Python:Q_inverse: Invalid regularization type")
#
# else:
# Q_inverse_r = np.dot( U * 1.0/S , U.T ) # Assume Q_inv is positive definite
# # When checking conditional number 2 times difference is ok.
# Q_inverse_r = 0.5*(Q_inverse_r + Q_inverse_r.T)
#
# return Q_inverse_r

View file

@ -23,7 +23,16 @@ from . import state_space_main as ssm
from . import state_space_setup as ss_setup
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'):
"""
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1:
@ -51,6 +60,7 @@ class StateSpace(Model):
ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace()
self.balance = balance
global ssm
#from . import state_space_main as ssm
@ -58,8 +68,8 @@ class StateSpace(Model):
reload(ssm)
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
self.X = X[sort_index,:]
self.Y = Y[sort_index,:]
# Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var)
@ -86,11 +96,12 @@ class StateSpace(Model):
#np.set_printoptions(16)
#print(self.param_array)
#import pdb; pdb.set_trace()
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
#Qc = Qc + np.eye(Qc.shape[0]) * 1e-8
#import pdb; pdb.set_trace()
# necessary parameters
measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -112,8 +123,9 @@ class StateSpace(Model):
dR[:,:,-1] = np.eye(measurement_dim)
# Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
if self.balance:
(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
print("SSM parameters_changed balancing!")
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf
@ -125,7 +137,7 @@ class StateSpace(Model):
kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered.
# becomes 3D even though is must be 2D. The reason is undiscovered.
Y = self.Y
if self.ts_number is None:
Y.shape = (self.num_data,1)
@ -146,7 +158,7 @@ class StateSpace(Model):
if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood")
print("State-Space: NaN values in the grad_log_likelihood")
#print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
@ -159,7 +171,7 @@ class StateSpace(Model):
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw):
"""
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
@ -178,6 +190,9 @@ class StateSpace(Model):
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
balance: bool
Whether to balance or not the model as a whole
Output:
--------------------
@ -211,6 +226,11 @@ class StateSpace(Model):
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
# Balancing
if (p_balance==True):
(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
print("SSM _raw_predict balancing!")
#Y = self.Y[:, 0,0]
# Run the Kalman filter
#import pdb; pdb.set_trace()
@ -261,10 +281,23 @@ class StateSpace(Model):
# Return the posterior of the state
return (m, V)
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw):
"""
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
if balance is None:
p_balance = self.balance
else:
p_balance = balance
# Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
(m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance)
# Add the noise variance to the state variance
if include_likelihood:
@ -277,8 +310,22 @@ class StateSpace(Model):
# Return mean and variance
return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
mu, var = self._raw_predict(Xnew)
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw):
"""
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
if balance is None:
p_balance = self.balance
else:
p_balance = balance
mu, var = self._raw_predict(Xnew, p_balance=p_balance)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]

View file

@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase):
mean_compare_decimal=5, var_compare_decimal=5)
def test_RBF_kernel(self,):
#import pdb;pdb.set_trace()
np.random.seed(234) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,])
ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10)
gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True,
@ -375,7 +377,7 @@ if __name__ == "__main__":
print("Running state-space inference tests...")
unittest.main()
#tt = StateSpaceKernelsTests('test_periodic_kernel')
#tt = StateSpaceKernelsTests('test_RBF_kernel')
#import pdb; pdb.set_trace()
#tt.test_Matern32_kernel()
#tt.test_Matern52_kernel()