[statespace] make predict comply to gpy standards (no confidence interval)

This commit is contained in:
Max Zwiessele 2016-04-04 15:37:51 +01:00
parent 5f3956478f
commit d6ccccc7e4

View file

@ -15,52 +15,43 @@
# #
import numpy as np import numpy as np
from scipy import linalg
from scipy import stats from scipy import stats
from ..core import Model
from .. import kern
#from GPy.plotting.matplot_dep.models_plots import gpplot
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
#from GPy.plotting.matplot_dep import Tango
#import pylab as pb
from GPy.core.parameterization.param import Param
import GPy
from .. import likelihoods from .. import likelihoods
#from . import state_space_setup as ss_setup
from ..core import Model
from . import state_space_main as ssm from . import state_space_main as ssm
from . import state_space_setup as ss_setup from . import state_space_setup as ss_setup
class StateSpace(Model): class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'): def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
super(StateSpace, self).__init__(name=name) super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1: if len(X.shape) == 1:
X = np.atleast_2d(X).T X = np.atleast_2d(X).T
self.num_data, input_dim = X.shape self.num_data, self.input_dim = X.shape
if len(Y.shape) == 1: if len(Y.shape) == 1:
Y = np.atleast_2d(Y).T Y = np.atleast_2d(Y).T
assert input_dim==1, "State space methods are only for 1D data" assert self.input_dim==1, "State space methods are only for 1D data"
if len(Y.shape)==2: if len(Y.shape)==2:
num_data_Y, self.output_dim = Y.shape num_data_Y, self.output_dim = Y.shape
ts_number = None ts_number = None
elif len(Y.shape)==3: elif len(Y.shape)==3:
num_data_Y, self.output_dim, ts_number = Y.shape num_data_Y, self.output_dim, ts_number = Y.shape
self.ts_number = ts_number self.ts_number = ts_number
assert num_data_Y == self.num_data, "X and Y data don't match" assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods are for single outputs only" assert self.output_dim == 1, "State space methods are for single outputs only"
self.kalman_filter_type = kalman_filter_type self.kalman_filter_type = kalman_filter_type
#self.kalman_filter_type = 'svd' # temp test #self.kalman_filter_type = 'svd' # temp test
ss_setup.use_cython = use_cython ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
global ssm global ssm
#from . import state_space_main as ssm #from . import state_space_main as ssm
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython): if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
@ -72,13 +63,13 @@ class StateSpace(Model):
# Noise variance # Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var) self.likelihood = likelihoods.Gaussian(variance=noise_var)
# Default kernel # Default kernel
if kernel is None: if kernel is None:
raise ValueError("State-Space Model: the kernel must be provided.") raise ValueError("State-Space Model: the kernel must be provided.")
else: else:
self.kern = kernel self.kern = kernel
self.link_parameter(self.kern) self.link_parameter(self.kern)
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
self.posterior = None self.posterior = None
@ -92,14 +83,14 @@ class StateSpace(Model):
""" """
Parameters have now changed Parameters have now changed
""" """
#np.set_printoptions(16) #np.set_printoptions(16)
#print(self.param_array) #print(self.param_array)
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
# Get the model matrices from the kernel # Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde() (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters # necessary parameters
measurement_dim = self.output_dim measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -109,30 +100,30 @@ class StateSpace(Model):
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no]) dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no]) dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no]) dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function # Assign the values for the kernel function
dF[:,:,:-1] = dFt dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t dP0[:,:,:-1] = dP0t
# The sigma2 derivative # The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no]) dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim) dR[:,:,-1] = np.eye(measurement_dim)
# Balancing # Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0) #(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
# Use the Kalman filter to evaluate the likelihood # Use the Kalman filter to evaluate the likelihood
grad_calc_params = {} grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf grad_calc_params['dP_inf'] = dP_inf
grad_calc_params['dF'] = dF grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0 grad_calc_params['dP_init'] = dP0
kalman_filter_type = self.kalman_filter_type kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y # The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered. # becomes 3D even though is must be 2D. The reason is undescovered.
Y = self.Y Y = self.Y
@ -140,63 +131,63 @@ class StateSpace(Model):
Y.shape = (self.num_data,1) Y.shape = (self.num_data,1)
else: else:
Y.shape = (self.num_data,1,self.ts_number) Y.shape = (self.num_data,1,self.ts_number)
(filter_means, filter_covs, log_likelihood, (filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H, grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None, float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True, P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
calc_grad_log_likelihood=True, calc_grad_log_likelihood=True,
grad_params_no=grad_params_no, grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params) grad_calc_params=grad_calc_params)
if np.any( np.isfinite(log_likelihood) == False): if np.any( np.isfinite(log_likelihood) == False):
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the log_likelihood") print("State-Space: NaN valkues in the log_likelihood")
if np.any( np.isfinite(grad_log_likelihood) == False): if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood") print("State-Space: NaN valkues in the grad_log_likelihood")
#print(grad_log_likelihood) #print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1) grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1) grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 ) self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0]) self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0]) self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False): def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
""" """
Performs the actual prediction for new X points. Performs the actual prediction for new X points.
Inner function. It is called only from inside this class. Inner function. It is called only from inside this class.
Input: Input:
--------------------- ---------------------
Xnews: vector or (n_points,1) matrix Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions. New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order This matrix can substitude the original training points (in order
to use only the parameters of the model). to use only the parameters of the model).
filteronly: bool filteronly: bool
Use only Kalman Filter for prediction. In this case the output does Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process. not coincide with corresponding Gaussian process.
Output: Output:
-------------------- --------------------
m: vector m: vector
Mean prediction Mean prediction
V: vector V: vector
Variance in every point Variance in every point
""" """
# Set defaults # Set defaults
if Ynew is None: if Ynew is None:
Ynew = self.Y Ynew = self.Y
@ -209,8 +200,8 @@ class StateSpace(Model):
else: else:
X = self.X X = self.X
Y = Ynew Y = Ynew
predict_only_training = True predict_only_training = True
# Sort the matrix (save the order) # Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True) _, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index] # TODO they are not used X = X[return_index] # TODO they are not used
@ -218,37 +209,37 @@ class StateSpace(Model):
# Get the model matrices from the kernel # Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde() (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0] state_dim = F.shape[0]
#Y = self.Y[:, 0,0] #Y = self.Y[:, 0,0]
# Run the Kalman filter # Run the Kalman filter
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
kalman_filter_type = self.kalman_filter_type kalman_filter_type = self.kalman_filter_type
(M, P, log_likelihood, (M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter( grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None, F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type, P_init=P0, p_kalman_filter_type = kalman_filter_type,
calc_log_likelihood=False, calc_log_likelihood=False,
calc_grad_log_likelihood=False) calc_grad_log_likelihood=False)
# (filter_means, filter_covs, log_likelihood, # (filter_means, filter_covs, log_likelihood,
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H, # grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None, # float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True, # P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
# calc_grad_log_likelihood=True, # calc_grad_log_likelihood=True,
# grad_params_no=grad_params_no, # grad_params_no=grad_params_no,
# grad_calc_params=grad_calc_params) # grad_calc_params=grad_calc_params)
# Run the Rauch-Tung-Striebel smoother # Run the Rauch-Tung-Striebel smoother
if not filteronly: if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc) p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values # remove initial values
M = M[1:,:,:] M = M[1:,:,:]
P = P[1:,:,:] P = P[1:,:,:]
# Put the data back in the original order # Put the data back in the original order
M = M[return_inverse,:,:] M = M[return_inverse,:,:]
P = P[return_inverse,:,:] P = P[return_inverse,:,:]
@ -257,40 +248,41 @@ class StateSpace(Model):
if not predict_only_training: if not predict_only_training:
M = M[self.num_data:,:,:] M = M[self.num_data:,:,:]
P = P[self.num_data:,:,:] P = P[self.num_data:,:,:]
# Calculate the mean and variance # Calculate the mean and variance
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no) # after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T) m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
V = np.einsum('ij,ajk,kl', H, P, H.T) V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state # Return the posterior of the state
return (m, V) return (m, V)
def predict(self, Xnew=None, filteronly=False): def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
# Run the Kalman filter to get the state # Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly) (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance # Add the noise variance to the state variance
V += float(self.Gaussian_noise.variance) if include_likelihood:
V += float(self.likelihood.variance)
# Lower and upper bounds # Lower and upper bounds
lower = m - 2*np.sqrt(V) #lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V) #upper = m + 2*np.sqrt(V)
# Return mean and variance # Return mean and variance
return (m, V, lower, upper) return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5)): def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
mu, var = self._raw_predict(Xnew) mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles] return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None, # def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False, # ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): # linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
@ -399,8 +391,8 @@ class StateSpace(Model):
# #
# # Return trajectory # # Return trajectory
# return Y # return Y
# #
# #
# def simulate(self,F,L,Qc,Pinf,X,size=1): # def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model # # Simulate a trajectory using the state space model
# #