ENH: Added SDE for all basic kernels except Rationale Quadratic.

Some necessary modifications for the previous code are performed.
2026-05-09 12:02:38 +02:00 · 2015-07-14 16:44:21 +03:00 · 2015-07-14 16:44:21 +03:00 · 82cb626cd6
commit 82cb626cd6
parent 06a7fedd22
10 changed files with 1740 additions and 777 deletions
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
--- a/GPy/models/state_space_new.py
+++ b/GPy/models/state_space_new.py
@ -16,6 +16,7 @@

 import numpy as np
 from scipy import linalg
+from scipy import stats
 from ..core import Model
 from .. import kern
 from GPy.plotting.matplot_dep.models_plots import gpplot
@ -26,17 +27,18 @@ from GPy.core.parameterization.param import Param

 import GPy
 from .. import likelihoods
-import GPy.models.state_space_main as ssm
-#import state_space_main as ssm
-reload(ssm)
-print ssm.__file__
+
+from . import state_space_main as ssm

 class StateSpace(Model):
    def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
        super(StateSpace, self).__init__(name=name)
        self.num_data, input_dim = X.shape
        assert input_dim==1, "State space methods for time only"
-        num_data_Y, self.output_dim = Y.shape
+        if len(Y.shape) ==2: # TODO make this nice
+            num_data_Y, self.output_dim = Y.shape
+        elif len(Y.shape) ==3:
+            num_data_Y, self.output_dim, ts_number = Y.shape
        assert num_data_Y == self.num_data, "X and Y data don't match"
        assert self.output_dim == 1, "State space methods for single outputs only"

@ -68,8 +70,8 @@ class StateSpace(Model):
        """
        
        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dFt,dQct,dP_inft) = self.kern.sde()
-
+        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
+        
        # necessary parameters
        measurement_dim = self.output_dim
        grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -78,17 +80,19 @@ class StateSpace(Model):
        dF    = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
        dQc   = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
        dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
-
+        dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
+        
        # Assign the values for the kernel function
        dF[:,:,:-1] = dFt
        dQc[:,:,:-1] = dQct
        dP_inf[:,:,:-1] = dP_inft
-
+        dP0[:,:,:-1] = dP0t
+        
        # The sigma2 derivative
        dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
        dR[:,:,-1] = np.eye(measurement_dim)

-
+        #(F,L,Qc,H,P_inf,dF,dQc,dP_inf) = ssm.balance_ss_model(F,L,Qc,H,P_inf,dF,dQc,dP_inf)
        # Use the Kalman filter to evaluate the likelihood
        
        grad_calc_params = {}
@ -96,26 +100,53 @@ class StateSpace(Model):
        grad_calc_params['dF'] = dF
        grad_calc_params['dQc'] = dQc
        grad_calc_params['dR'] = dR
+        grad_calc_params['dP_init'] = dP0
        
        (filter_means, filter_covs, log_likelihood, 
-         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=True, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
+                                      float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=True, 
                                      calc_grad_log_likelihood=True, 
                                      grad_params_no=grad_params_no, 
                                      grad_calc_params=grad_calc_params)
-                      
-        self._log_marginal_likelihood = log_likelihood
-        #gradients  = self.compute_gradients()
-        self.likelihood.update_gradients(grad_log_likelihood[-1,0])
        
-        self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
+        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
+        grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
+        self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
+        self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
+        
+        self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
        
    def log_likelihood(self):
        return self._log_marginal_likelihood

-    def _predict_raw(self, Xnew, Ynew=None, filteronly=False):
+    def _raw_predict(self, Xnew, Ynew=None, filteronly=False):
        """
-        Inner function. It is called only from inside this class
+        Performs the actual prediction for new X points.
+        Inner function. It is called only from inside this class.
+        
+        Input:
+        ---------------------
+        
+        Xnews: vector or (n_points,1) matrix
+            New time points where to evaluate predictions.
+            
+        Ynews: (n_train_points, ts_no) matrix
+            This matrix can substitude the original training points (in order 
+            to use only the parameters of the model).
+            
+        filteronly: bool
+            Use only Kalman Filter for prediction. In this case the output does
+            not coincide with corresponding Gaussian process.
+            
+        Output:
+        --------------------
+        
+        m: vector
+            Mean prediction
+        
+        V: vector
+            Variance in every point
        """
        
        # Set defaults
@ -128,41 +159,44 @@ class StateSpace(Model):

        # Sort the matrix (save the order)
        _, return_index, return_inverse = np.unique(X,True,True)
-        X = X[return_index]
+        X = X[return_index] # TODO they are not used
        Y = Y[return_index]

        # Get the model matrices from the kernel
-        (F,L,Qc,H,P_inf,dF,dQc,dP_inf) = self.kern.sde()
+        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
        state_dim = F.shape[0]        
        
+        #import pdb; pdb.set_trace()
+        #Y = self.Y[:, 0,0]
        # Run the Kalman filter
-        (M, P, tmp_log_likelihood, 
-         tmp_grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
-                                      P_init=None, calc_log_likelihood=False, 
-                                      calc_grad_log_likelihood=False) 
-                                      
+        #import pdb; pdb.set_trace()
+        (M, P, log_likelihood, 
+         grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
+                                      F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
+                                      P_init=P0, calc_log_likelihood=False, 
+                                      calc_grad_log_likelihood=False)                              
        # Run the Rauch-Tung-Striebel smoother
        if not filteronly:
            (M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P, 
                                AQcomp=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
        
        # remove initial values        
-        M = M[:,1:]
-        P = P[:,:,1:]        
+        M = M[1:,:]
+        P = P[1:,:,:]        
        
        # Put the data back in the original order
-        M = M[:,return_inverse]
-        P = P[:,:,return_inverse]
+        M = M[return_inverse,:]
+        P = P[return_inverse,:,:]

        # Only return the values for Xnew
-        M = M[:,self.num_data:]
-        P = P[:,:,self.num_data:]
+        M = M[self.num_data:,:]
+        P = P[self.num_data:,:,:]

        # Calculate the mean and variance
-        m = H.dot(M).T
-        V = np.tensordot(H[0],P,(0,0))
-        V = np.tensordot(V,H[0],(0,0))
-        V = V[:,None]
+        m = np.dot(M,H.T)
+        V = np.einsum('ij,ajk,kl', H, P, H.T)
+        
+        V.shape = (V.shape[0], V.shape[1]) # remove the third dimension

        # Return the posterior of the state
        return (m, V)
@ -170,10 +204,10 @@ class StateSpace(Model):
    def predict(self, Xnew, filteronly=False):

        # Run the Kalman filter to get the state
-        (m, V) = self._predict_raw(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)

        # Add the noise variance to the state variance
-        V += self.sigma2
+        V += float(self.Gaussian_noise.variance)

        # Lower and upper bounds
        lower = m - 2*np.sqrt(V)
@ -181,143 +215,149 @@ class StateSpace(Model):

        # Return mean and variance
        return (m, V, lower, upper)
-
-    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
-            ax=None, resolution=None, plot_raw=False, plot_filter=False,
-            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
-
-        # Deal with optional parameters
-        if ax is None:
-            fig = pb.figure(num=fignum)
-            ax = fig.add_subplot(111)
-
-        # Define the frame on which to plot
-        resolution = resolution or 200
-        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-
-        # Make a prediction on the frame and plot it
-        if plot_raw:
-            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
-            lower = m - 2*np.sqrt(v)
-            upper = m + 2*np.sqrt(v)
-            Y = self.Y
-        else:
-            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
-            Y = self.Y
-
-        # Plot the values
-        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
-        ax.plot(self.X, self.Y, 'kx', mew=1.5)
-
-        # Optionally plot some samples
-        if samples:
-            if plot_raw:
-                Ysim = self.posterior_samples_f(Xgrid, samples)
-            else:
-                Ysim = self.posterior_samples(Xgrid, samples)
-            for yi in Ysim.T:
-                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
-
-        # Set the limits of the plot to some sensible values
-        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
-        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-        ax.set_xlim(xmin, xmax)
-        ax.set_ylim(ymin, ymax)
-
-    def prior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Allocate space for results
-        Y = np.empty((size,X.shape[0]))
-
-        # Simulate random draws
-        #for j in range(0,size):
-        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
-        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
-
-        # Only observations
-        Y = np.tensordot(H[0],Y,(0,0))
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return trajectory
-        return Y.T
-
-    def posterior_samples_f(self,X,size=10):
-
-        # Sort the matrix (save the order)
-        (_, return_index, return_inverse) = np.unique(X,True,True)
-        X = X[return_index]
-
-        # Get the model matrices from the kernel
-        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
-
-        # Run smoother on original data
-        (m,V) = self.predict_raw(X)
-
-        # Simulate random draws from the GP prior
-        y = self.prior_samples_f(np.vstack((self.X, X)),size)
-
-        # Allocate space for sample trajectories
-        Y = np.empty((size,X.shape[0]))
-
-        # Run the RTS smoother on each of these values
-        for j in range(0,size):
-            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
-            (m2,V2) = self.predict_raw(X,Ynew=yobs)
-            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
-
-        # Reorder simulated values
-        Y = Y[:,return_inverse]
-
-        # Return posterior sample trajectories
-        return Y.T
-
-    def posterior_samples(self, X, size=10):
-
-        # Make samples of f
-        Y = self.posterior_samples_f(X,size)
-
-        # Add noise
-        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
-
-        # Return trajectory
-        return Y
+        
+    def predict_quantiles(self, Xnew, quantiles=(2.5, 97.5)):
+        mu, var = self._raw_predict(Xnew)
+        #import pdb; pdb.set_trace()
+        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
        
        
-    def simulate(self,F,L,Qc,Pinf,X,size=1):
-        # Simulate a trajectory using the state space model
-
-        # Allocate space for results
-        f = np.zeros((F.shape[0],size,X.shape[1]))
-
-        # Initial state
-        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
-
-        # Time step lengths
-        dt = np.empty(X.shape)
-        dt[:,0] = X[:,1]-X[:,0]
-        dt[:,1:] = np.diff(X)
-
-        # Solve the LTI SDE for these time steps
-        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
-
-        # Sweep through remaining time points
-        for k in range(1,X.shape[1]):
-
-            # Form discrete-time model
-            A = As[:,:,index[1-k]]
-            Q = Qs[:,:,index[1-k]]
-
-            # Draw the state
-            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
-
-        # Return values
-        return f
+#    def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
+#            ax=None, resolution=None, plot_raw=False, plot_filter=False,
+#            linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
+#
+#        # Deal with optional parameters
+#        if ax is None:
+#            fig = pb.figure(num=fignum)
+#            ax = fig.add_subplot(111)
+#
+#        # Define the frame on which to plot
+#        resolution = resolution or 200
+#        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+#
+#        # Make a prediction on the frame and plot it
+#        if plot_raw:
+#            m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
+#            lower = m - 2*np.sqrt(v)
+#            upper = m + 2*np.sqrt(v)
+#            Y = self.Y
+#        else:
+#            m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
+#            Y = self.Y
+#
+#        # Plot the values
+#        gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
+#        ax.plot(self.X, self.Y, 'kx', mew=1.5)
+#
+#        # Optionally plot some samples
+#        if samples:
+#            if plot_raw:
+#                Ysim = self.posterior_samples_f(Xgrid, samples)
+#            else:
+#                Ysim = self.posterior_samples(Xgrid, samples)
+#            for yi in Ysim.T:
+#                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
+#
+#        # Set the limits of the plot to some sensible values
+#        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
+#        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+#        ax.set_xlim(xmin, xmax)
+#        ax.set_ylim(ymin, ymax)
+#
+#    def prior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Allocate space for results
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Simulate random draws
+#        #for j in range(0,size):
+#        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
+#        Y = self.simulate(F,L,Qc,Pinf,X.T,size)
+#
+#        # Only observations
+#        Y = np.tensordot(H[0],Y,(0,0))
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return trajectory
+#        return Y.T
+#
+#    def posterior_samples_f(self,X,size=10):
+#
+#        # Sort the matrix (save the order)
+#        (_, return_index, return_inverse) = np.unique(X,True,True)
+#        X = X[return_index]
+#
+#        # Get the model matrices from the kernel
+#        (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
+#
+#        # Run smoother on original data
+#        (m,V) = self.predict_raw(X)
+#
+#        # Simulate random draws from the GP prior
+#        y = self.prior_samples_f(np.vstack((self.X, X)),size)
+#
+#        # Allocate space for sample trajectories
+#        Y = np.empty((size,X.shape[0]))
+#
+#        # Run the RTS smoother on each of these values
+#        for j in range(0,size):
+#            yobs =  y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
+#            (m2,V2) = self.predict_raw(X,Ynew=yobs)
+#            Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
+#
+#        # Reorder simulated values
+#        Y = Y[:,return_inverse]
+#
+#        # Return posterior sample trajectories
+#        return Y.T
+#
+#    def posterior_samples(self, X, size=10):
+#
+#        # Make samples of f
+#        Y = self.posterior_samples_f(X,size)
+#
+#        # Add noise
+#        Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
+#
+#        # Return trajectory
+#        return Y
+#        
+#        
+#    def simulate(self,F,L,Qc,Pinf,X,size=1):
+#        # Simulate a trajectory using the state space model
+#
+#        # Allocate space for results
+#        f = np.zeros((F.shape[0],size,X.shape[1]))
+#
+#        # Initial state
+#        f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
+#
+#        # Time step lengths
+#        dt = np.empty(X.shape)
+#        dt[:,0] = X[:,1]-X[:,0]
+#        dt[:,1:] = np.diff(X)
+#
+#        # Solve the LTI SDE for these time steps
+#        As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
+#
+#        # Sweep through remaining time points
+#        for k in range(1,X.shape[1]):
+#
+#            # Form discrete-time model
+#            A = As[:,:,index[1-k]]
+#            Q = Qs[:,:,index[1-k]]
+#
+#            # Draw the state
+#            f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
+#
+#        # Return values
+#        return f