ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
This commit is contained in:
Alexander Grigorievskiy 2015-07-14 16:44:21 +03:00
parent 06a7fedd22
commit 82cb626cd6
10 changed files with 1740 additions and 777 deletions

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@
import numpy as np
from scipy import linalg
from scipy import stats
from ..core import Model
from .. import kern
from GPy.plotting.matplot_dep.models_plots import gpplot
@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param
import GPy
from .. import likelihoods
import GPy.models.state_space_main as ssm
#import state_space_main as ssm
reload(ssm)
print ssm.__file__
from . import state_space_main as ssm
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
self.num_data, input_dim = X.shape
assert input_dim==1, "State space methods for time only"
num_data_Y, self.output_dim = Y.shape
if len(Y.shape) ==2: # TODO make this nice
num_data_Y, self.output_dim = Y.shape
elif len(Y.shape) ==3:
num_data_Y, self.output_dim, ts_number = Y.shape
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods for single outputs only"
@ -68,8 +70,8 @@ class StateSpace(Model):
"""
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters
measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -78,17 +80,19 @@ class StateSpace(Model):
dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t
# The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim)
#(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
@ -96,26 +100,53 @@ class StateSpace(Model):
grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0
(filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
P_init=None, calc_log_likelihood=True,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
P_init=P0, calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
self._log_marginal_likelihood = log_likelihood
#gradients = self.compute_gradients()
self.likelihood.update_gradients(grad_log_likelihood[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self):
return self._log_marginal_likelihood
def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
"""
Inner function. It is called only from inside this class
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
Input:
---------------------
Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order
to use only the parameters of the model).
filteronly: bool
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
Output:
--------------------
m: vector
Mean prediction
V: vector
Variance in every point
"""
# Set defaults
@ -128,41 +159,44 @@ class StateSpace(Model):
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index]
X = X[return_index] # TODO they are not used
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
#import pdb; pdb.set_trace()
#Y = self.Y[:, 0,0]
# Run the Kalman filter
(M, P, tmp_log_likelihood,
tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
P_init=None, calc_log_likelihood=False,
calc_grad_log_likelihood=False)
#import pdb; pdb.set_trace()
(M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, calc_log_likelihood=False,
calc_grad_log_likelihood=False)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values
M = M[:,1:]
P = P[:,:,1:]
M = M[1:,:]
P = P[1:,:,:]
# Put the data back in the original order
M = M[:,return_inverse]
P = P[:,:,return_inverse]
M = M[return_inverse,:]
P = P[return_inverse,:,:]
# Only return the values for Xnew
M = M[:,self.num_data:]
P = P[:,:,self.num_data:]
M = M[self.num_data:,:]
P = P[self.num_data:,:,:]
# Calculate the mean and variance
m = H.dot(M).T
V = np.tensordot(H[0],P,(0,0))
V = np.tensordot(V,H[0],(0,0))
V = V[:,None]
m = np.dot(M,H.T)
V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state
return (m, V)
@ -170,10 +204,10 @@ class StateSpace(Model):
def predict(self, Xnew, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self._predict_raw(Xnew,filteronly=filteronly)
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += self.sigma2
V += float(self.Gaussian_noise.variance)
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
@ -181,143 +215,149 @@ class StateSpace(Model):
# Return mean and variance
return (m, V, lower, upper)
def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
ax=None, resolution=None, plot_raw=False, plot_filter=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
# Deal with optional parameters
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
# Define the frame on which to plot
resolution = resolution or 200
Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
# Make a prediction on the frame and plot it
if plot_raw:
m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.Y
else:
m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
Y = self.Y
# Plot the values
gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(self.X, self.Y, 'kx', mew=1.5)
# Optionally plot some samples
if samples:
if plot_raw:
Ysim = self.posterior_samples_f(Xgrid, samples)
else:
Ysim = self.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
# Set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
def prior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Allocate space for results
Y = np.empty((size,X.shape[0]))
# Simulate random draws
#for j in range(0,size):
# Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
Y = self.simulate(F,L,Qc,Pinf,X.T,size)
# Only observations
Y = np.tensordot(H[0],Y,(0,0))
# Reorder simulated values
Y = Y[:,return_inverse]
# Return trajectory
return Y.T
def posterior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run smoother on original data
(m,V) = self.predict_raw(X)
# Simulate random draws from the GP prior
y = self.prior_samples_f(np.vstack((self.X, X)),size)
# Allocate space for sample trajectories
Y = np.empty((size,X.shape[0]))
# Run the RTS smoother on each of these values
for j in range(0,size):
yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
(m2,V2) = self.predict_raw(X,Ynew=yobs)
Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
# Reorder simulated values
Y = Y[:,return_inverse]
# Return posterior sample trajectories
return Y.T
def posterior_samples(self, X, size=10):
# Make samples of f
Y = self.posterior_samples_f(X,size)
# Add noise
Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
# Return trajectory
return Y
def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
def simulate(self,F,L,Qc,Pinf,X,size=1):
# Simulate a trajectory using the state space model
# Allocate space for results
f = np.zeros((F.shape[0],size,X.shape[1]))
# Initial state
f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
# Sweep through remaining time points
for k in range(1,X.shape[1]):
# Form discrete-time model
A = As[:,:,index[1-k]]
Q = Qs[:,:,index[1-k]]
# Draw the state
f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
# Return values
return f
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
#
# # Deal with optional parameters
# if ax is None:
# fig = pb.figure(num=fignum)
# ax = fig.add_subplot(111)
#
# # Define the frame on which to plot
# resolution = resolution or 200
# Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
#
# # Make a prediction on the frame and plot it
# if plot_raw:
# m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
# lower = m - 2*np.sqrt(v)
# upper = m + 2*np.sqrt(v)
# Y = self.Y
# else:
# m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
# Y = self.Y
#
# # Plot the values
# gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
# ax.plot(self.X, self.Y, 'kx', mew=1.5)
#
# # Optionally plot some samples
# if samples:
# if plot_raw:
# Ysim = self.posterior_samples_f(Xgrid, samples)
# else:
# Ysim = self.posterior_samples(Xgrid, samples)
# for yi in Ysim.T:
# ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
#
# # Set the limits of the plot to some sensible values
# ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
# ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
#
# def prior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Allocate space for results
# Y = np.empty((size,X.shape[0]))
#
# # Simulate random draws
# #for j in range(0,size):
# # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
# Y = self.simulate(F,L,Qc,Pinf,X.T,size)
#
# # Only observations
# Y = np.tensordot(H[0],Y,(0,0))
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return trajectory
# return Y.T
#
# def posterior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Run smoother on original data
# (m,V) = self.predict_raw(X)
#
# # Simulate random draws from the GP prior
# y = self.prior_samples_f(np.vstack((self.X, X)),size)
#
# # Allocate space for sample trajectories
# Y = np.empty((size,X.shape[0]))
#
# # Run the RTS smoother on each of these values
# for j in range(0,size):
# yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
# (m2,V2) = self.predict_raw(X,Ynew=yobs)
# Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return posterior sample trajectories
# return Y.T
#
# def posterior_samples(self, X, size=10):
#
# # Make samples of f
# Y = self.posterior_samples_f(X,size)
#
# # Add noise
# Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
#
# # Return trajectory
# return Y
#
#
# def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model
#
# # Allocate space for results
# f = np.zeros((F.shape[0],size,X.shape[1]))
#
# # Initial state
# f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
#
# # Time step lengths
# dt = np.empty(X.shape)
# dt[:,0] = X[:,1]-X[:,0]
# dt[:,1:] = np.diff(X)
#
# # Solve the LTI SDE for these time steps
# As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
#
# # Sweep through remaining time points
# for k in range(1,X.shape[1]):
#
# # Form discrete-time model
# A = As[:,:,index[1-k]]
# Q = Qs[:,:,index[1-k]]
#
# # Draw the state
# f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
#
# # Return values
# return f