From 5fc80202ab2db8a928de11c3ba13d2a6c6bc1518 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 28 Nov 2013 14:03:45 +0200 Subject: [PATCH 1/9] predict_raw can take arbitary Ys. --- GPy/models/state_space.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index 27ea7ca3..df3201ae 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -77,11 +77,15 @@ class StateSpace(Model): #return self.kf_likelihood_g(F,L,Qc,self.sigma2,H,Pinf,dF,dQc,dPinf,self.X,self.Y) return False - def predict_raw(self, Xnew, filteronly=False): + def predict_raw(self, Xnew, Y=None, filteronly=False): + + # Set defaults + if Y is None: + Y = self.Y # Make a single matrix containing training and testing points X = np.vstack((self.X, Xnew)) - Y = np.vstack((self.Y, np.nan*np.zeros(Xnew.shape))) + Y = np.vstack((Y, np.nan*np.zeros(Xnew.shape))) # Sort the matrix (save the order) _, return_index, return_inverse = np.unique(X,True,True) From 95948c760ee26034c68b6f97a75307d4dac9c797 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 28 Nov 2013 14:11:44 +0200 Subject: [PATCH 2/9] fixed bug that prevented the smoother from running. --- GPy/models/state_space.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index df3201ae..313f4846 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -77,15 +77,15 @@ class StateSpace(Model): #return self.kf_likelihood_g(F,L,Qc,self.sigma2,H,Pinf,dF,dQc,dPinf,self.X,self.Y) return False - def predict_raw(self, Xnew, Y=None, filteronly=False): + def predict_raw(self, Xnew, Ynew=None, filteronly=False): # Set defaults - if Y is None: - Y = self.Y + if Ynew is None: + Ynew = self.Y # Make a single matrix containing training and testing points X = np.vstack((self.X, Xnew)) - Y = np.vstack((Y, np.nan*np.zeros(Xnew.shape))) + Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape))) # Sort the matrix (save the order) _, return_index, return_inverse = np.unique(X,True,True) @@ -99,7 +99,7 @@ class StateSpace(Model): (M, P) = self.kalman_filter(F,L,Qc,H,self.sigma2,Pinf,X.T,Y.T) # Run the Rauch-Tung-Striebel smoother - if not filter: + if not filteronly: (M, P) = self.rts_smoother(F,L,Qc,X.T,M,P) # Put the data back in the original order From 68af1db76a7b885439aa438b553def6583b78c53 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 28 Nov 2013 15:08:51 +0200 Subject: [PATCH 3/9] Posterior and prior samples are now handled. --- GPy/models/state_space.py | 50 +++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index 313f4846..d9b3e6e9 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -163,7 +163,10 @@ class StateSpace(Model): # Optionally plot some samples if samples: - Ysim = self.posterior_samples(Xgrid, samples) + if plot_raw: + Ysim = self.posterior_samples_f(Xgrid, samples) + else: + Ysim = self.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25) @@ -173,11 +176,15 @@ class StateSpace(Model): ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - def posterior_samples_f(self,X,size=10): + def prior_samples_f(self,X,size=10): # Reorder X values - sort_index = np.argsort(X[:,0]) - X = X[sort_index] + #sort_index = np.argsort(X[:,0]) + #X = X[sort_index] + + # Sort the matrix (save the order) + _, return_index, return_inverse = np.unique(X,True,True) + X = X[return_index] # Get the model matrices from the kernel (F,L,Qc,H,Pinf) = self.kern.sde() @@ -190,11 +197,44 @@ class StateSpace(Model): Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T)) # Reorder simulated values - Y[:,sort_index] = Y[:,:] + Y = Y[:,return_inverse] # Return trajectory return Y.T + def posterior_samples_f(self,X,size=10): + + # Reorder X values + #sort_index = np.argsort(X[:,0]) + #X = X[sort_index] + # Sort the matrix (save the order) + (_, return_index, return_inverse) = np.unique(X,True,True) + X = X[return_index] + + # Get the model matrices from the kernel + (F,L,Qc,H,Pinf) = self.kern.sde() + + # Run smoother on original data + (m,V) = self.predict_raw(X) + + # Simulate random draws from the GP prior + y = self.prior_samples_f(np.vstack((self.X, X)),size) + + # Allocate space for sample trajectories + Y = np.empty((size,X.shape[0])) + + # Run the RTS smoother on each of these values + for j in range(0,size): + yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1) + (m2,V2) = self.predict_raw(X,Ynew=yobs) + Y[j,:] = m.T + y[self.num_data:,j].T - m2.T + + # Reorder simulated values + Y = Y[:,return_inverse] + + # Return posterior sample trajectories + return Y.T + def posterior_samples(self, X, size=10): # Make samples of f From 59afe14434331ad1406b10d6d8907d78db3fc769 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 28 Nov 2013 15:17:21 +0200 Subject: [PATCH 4/9] Removed unused code. --- GPy/models/state_space.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index d9b3e6e9..8535bafb 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -178,12 +178,8 @@ class StateSpace(Model): def prior_samples_f(self,X,size=10): - # Reorder X values - #sort_index = np.argsort(X[:,0]) - #X = X[sort_index] - # Sort the matrix (save the order) - _, return_index, return_inverse = np.unique(X,True,True) + (_, return_index, return_inverse) = np.unique(X,True,True) X = X[return_index] # Get the model matrices from the kernel @@ -204,9 +200,6 @@ class StateSpace(Model): def posterior_samples_f(self,X,size=10): - # Reorder X values - #sort_index = np.argsort(X[:,0]) - #X = X[sort_index] # Sort the matrix (save the order) (_, return_index, return_inverse) = np.unique(X,True,True) X = X[return_index] From 9729a9a3a548e686298d2e099de7a9d742e17288 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Tue, 3 Dec 2013 20:50:22 +0200 Subject: [PATCH 5/9] Now several trajectories can be simulated in one go. --- GPy/models/state_space.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index 8535bafb..c1acd6af 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -189,8 +189,12 @@ class StateSpace(Model): Y = np.empty((size,X.shape[0])) # Simulate random draws - for j in range(0,size): - Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T)) + #for j in range(0,size): + # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T)) + Y = self.simulate(F,L,Qc,Pinf,X.T,size) + + # Only observations + Y = np.tensordot(H[0],Y,(0,0)) # Reorder simulated values Y = Y[:,return_inverse] @@ -360,23 +364,32 @@ class StateSpace(Model): # Return likelihood return lik[0,0] - def simulate(self,F,L,Qc,Pinf,X): + def simulate(self,F,L,Qc,Pinf,X,size=1): # Simulate a trajectory using the state space model # Allocate space for results - f = np.zeros((F.shape[0],X.shape[1])) + f = np.zeros((F.shape[0],size,X.shape[1])) # Initial state - f[:,0:1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],1)) + f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size)) + + # Time step lengths + dt = np.empty(X.shape) + dt[:,0] = X[:,1]-X[:,0] + dt[:,1:] = np.diff(X) + + # Solve the LTI SDE for these time steps + As, Qs, index = self.lti_disc(F,L,Qc,dt) # Sweep through remaining time points for k in range(1,X.shape[1]): # Form discrete-time model - (A,Q) = self.lti_disc(F,L,Qc,X[:,k]-X[:,k-1]) + A = As[:,:,index[1-k]] + Q = Qs[:,:,index[1-k]] # Draw the state - f[:,k] = A.dot(f[:,k-1]).T + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],1)).T + f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size)) # Return values return f From c7a09de80c6b43110425f1161ad861e23fc65f66 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 30 Jan 2014 18:50:54 +0200 Subject: [PATCH 6/9] Now also the derivative matrices can be summed. --- GPy/kern/kern.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 7b2c01e4..b7a9f832 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -577,38 +577,50 @@ class kern(Parameterized): def sde(self): # TODO: should support adding kernels together - #raise NameError('HiThere') + #raise NameError('Problem') # Find out state dimensions - n = 0; - nq = 0; + n = 0 + nq = 0 + nd = 0 for p in self.parts: - (F,L,Qc,H,Pinf) = p.sde() - n += F.shape[0] + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde() + n += F.shape[0] nq += Qc.shape[0] + nd += dF.shape[2] # Allocate space for the matrices - F = np.zeros((n,n)) - L = np.zeros((n,nq)) - Qc = np.zeros((nq,nq)) - H = np.zeros((1,n)) - Pinf = np.zeros((n,n)) - n = 0; - nq = 0; + F = np.zeros((n,n)) + L = np.zeros((n,nq)) + Qc = np.zeros((nq,nq)) + H = np.zeros((1,n)) + Pinf = np.zeros((n,n)) + dF = np.zeros((n,n,nd)) + dQc = np.zeros((nq,nq,nd)) + dPinf = np.zeros((n,n,nd)) + n = 0 + nq = 0 + nd = 0 # Assign models for p in self.parts: - (Ft,Lt,Qct,Ht,Pinft) = p.sde() + (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde() F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct H[0,n:n+Ht.shape[1]] = Ht Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft + dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt + dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct + dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft n += Ft.shape[0] nq += Qct.shape[0] + nd += dFt.shape[2] - return (F,L,Qc,H,Pinf) - #self.parts[0].sde() + return (F,L,Qc,H,Pinf,dF,dQc,dPinf) + + # To test with only one kernel + # return self.parts[0].sde() from GPy.core.model import Model From 388ba68dcfabab66cef7803d49ad00ec8ebe07e7 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 30 Jan 2014 18:51:42 +0200 Subject: [PATCH 7/9] Added the derivatives of the state space matrices for likelihood gradient evaluation. --- GPy/kern/parts/Matern32.py | 40 ++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/parts/Matern32.py index 2828a455..481bcfd6 100644 --- a/GPy/kern/parts/Matern32.py +++ b/GPy/kern/parts/Matern32.py @@ -142,13 +142,37 @@ class Matern32(Kernpart): """ Return the state space representation of the covariance. """ - foo = np.sqrt(3)/self.lengthscale - F = np.array([[0, 1], [-foo**2, -2*foo]]) - L = np.array([[0], [1]]) - Qc = np.array([12*np.sqrt(3) / self.lengthscale**3 * self.variance]) - H = np.array([[1, 0]]) + foo = np.sqrt(3.)/self.lengthscale + F = np.array([[0, 1], [-foo**2, -2*foo]]) + L = np.array([[0], [1]]) + Qc = np.array([12.*np.sqrt(3) / self.lengthscale**3 * self.variance]) + H = np.array([[1, 0]]) Pinf = np.array([[self.variance, 0], - [0, 3*self.variance/(self.lengthscale**2)]]) - # TODO: return the derivatives as well - return (F, L, Qc, H, Pinf) + [0, 3.*self.variance/(self.lengthscale**2)]]) + + # Allocate space for the derivatives + dF = np.empty([F.shape[0],F.shape[1],2]) + dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) + dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) + + # The partial derivatives + dFvariance = np.zeros([2,2]) + dFlengthscale = np.array([[0,0], + [6./self.lengthscale**3,2*np.sqrt(3)/self.lengthscale**2]]) + dQcvariance = np.array([12.*np.sqrt(3)/self.lengthscale**3]) + dQclengthscale = np.array([-3*12*np.sqrt(3)/self.lengthscale**4*self.variance]) + dPinfvariance = np.array([[1,0],[0,3./self.lengthscale**2]]) + dPinflengthscale = np.array([[0,0], + [0,-6*self.variance/self.lengthscale**3]]) + + # Combine the derivatives + dF[:,:,0] = dFvariance + dF[:,:,1] = dFlengthscale + dQc[:,:,0] = dQcvariance + dQc[:,:,1] = dQclengthscale + dPinf[:,:,0] = dPinfvariance + dPinf[:,:,1] = dPinflengthscale + + # TODO: return the derivatives as well + return (F, L, Qc, H, Pinf, dF, dQc, dPinf) From 51cbaced9461b02d28de01b13f348ce190138428 Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Thu, 30 Jan 2014 18:52:26 +0200 Subject: [PATCH 8/9] Initial version of the likelihood gradient evaluation. --- GPy/models/state_space.py | 193 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 184 insertions(+), 9 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index c1acd6af..af92d5d4 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -63,7 +63,7 @@ class StateSpace(Model): def log_likelihood(self): # Get the model matrices from the kernel - (F,L,Qc,H,Pinf) = self.kern.sde() + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() # Use the Kalman filter to evaluate the likelihood return self.kf_likelihood(F,L,Qc,H,self.sigma2,Pinf,self.X.T,self.Y.T) @@ -71,12 +71,25 @@ class StateSpace(Model): def _log_likelihood_gradients(self): # Get the model matrices from the kernel - (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() + (F,L,Qc,H,Pinf,dFt,dQct,dPinft) = self.kern.sde() - # Calculate the likelihood gradients TODO - #return self.kf_likelihood_g(F,L,Qc,self.sigma2,H,Pinf,dF,dQc,dPinf,self.X,self.Y) - return False + # Allocate space for the full partial derivative matrices + dF = np.zeros([dFt.shape[0],dFt.shape[1],dFt.shape[2]+1]) + dQc = np.zeros([dQct.shape[0],dQct.shape[1],dQct.shape[2]+1]) + dPinf = np.zeros([dPinft.shape[0],dPinft.shape[1],dPinft.shape[2]+1]) + + # Assign the values for the kernel function + dF[:,:,:-1] = dFt + dQc[:,:,:-1] = dQct + dPinf[:,:,:-1] = dPinft + + # The sigma2 derivative + dR = np.zeros([1,1,dF.shape[2]]) + dR[:,:,-1] = 1 + # Calculate the likelihood gradients + return self.kf_likelihood_g(F,L,Qc,H,self.sigma2,Pinf,dF,dQc,dPinf,dR,self.X.T,self.Y.T) + def predict_raw(self, Xnew, Ynew=None, filteronly=False): # Set defaults @@ -93,7 +106,7 @@ class StateSpace(Model): Y = Y[return_index] # Get the model matrices from the kernel - (F,L,Qc,H,Pinf) = self.kern.sde() + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() # Run the Kalman filter (M, P) = self.kalman_filter(F,L,Qc,H,self.sigma2,Pinf,X.T,Y.T) @@ -101,7 +114,7 @@ class StateSpace(Model): # Run the Rauch-Tung-Striebel smoother if not filteronly: (M, P) = self.rts_smoother(F,L,Qc,X.T,M,P) - + # Put the data back in the original order M = M[:,return_inverse] P = P[:,:,return_inverse] @@ -183,7 +196,7 @@ class StateSpace(Model): X = X[return_index] # Get the model matrices from the kernel - (F,L,Qc,H,Pinf) = self.kern.sde() + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() # Allocate space for results Y = np.empty((size,X.shape[0])) @@ -209,7 +222,7 @@ class StateSpace(Model): X = X[return_index] # Get the model matrices from the kernel - (F,L,Qc,H,Pinf) = self.kern.sde() + (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde() # Run smoother on original data (m,V) = self.predict_raw(X) @@ -364,6 +377,168 @@ class StateSpace(Model): # Return likelihood return lik[0,0] + def kf_likelihood_g(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y): + # Evaluate marginal likelihood gradient + + # Loop lengths + steps = Y.shape[1] + nparam = dF.shape[2] + n = F.shape[0] + + # Time steps + t = X.squeeze() + + # Allocate space + e = 0 + eg = np.zeros(nparam) + + # Set up + Z = np.zeros(F.shape) + QC = L.dot(Qc).dot(L.T) + m = np.zeros([n,1]) + P = Pinf.copy() + dm = np.zeros([n,nparam]) + dP = dPinf.copy() + mm = m.copy() + PP = P.copy() + + # % Initial dt + dt = -np.Inf + + # Allocate space for expm results + AA = np.zeros([2*F.shape[0], 2*F.shape[0], nparam]) + AAA = np.zeros([4*F.shape[0], 4*F.shape[0], nparam]) + FF = np.zeros([2*F.shape[0], 2*F.shape[0]]) + FFF = np.zeros([4*F.shape[0], 4*F.shape[0]]) + + # Loop over all observations + for k in range(0,steps): + + # The previous time step + dt_old = dt; + + # The time discretization step length + if k>0: + dt = t[k]-t[k-1] + else: + dt = t[1]-t[0] + + # Loop through all parameters (Kalman filter prediction step) + for j in range(0,nparam): + + # Should we recalculate the matrix exponential? + if abs(dt-dt_old) > 1e-9: + + # The first matrix for the matrix factor decomposition + FF[:n,:n] = F + FF[n:,:n] = dF[:,:,j] + FF[n:,n:] = F + + # Solve the matrix exponential + AA[:,:,j] = linalg.expm3(FF*dt) + + # Solve using matrix fraction decomposition + foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]])) + + # Pick the parts + mm = foo[:n,:] + dm[:,j:j+1] = foo[n:,:] + + # Should we recalculate the matrix exponential? + if abs(dt-dt_old) > 1e-9: + + # Define W and G + W = L.dot(dQc[:,:,j]).dot(L.T) + G = dF[:,:,j]; + + # The second matrix for the matrix factor decomposition + FFF[:n,:n] = F + FFF[2*n:-n,:n] = G + FFF[:n, n:2*n] = QC + FFF[n:2*n, n:2*n] = -F.T + FFF[2*n:-n,n:2*n] = W + FFF[-n:, n:2*n] = -G.T + FFF[2*n:-n,2*n:-n] = F + FFF[2*n:-n,-n:] = QC + FFF[-n:,-n:] = -F.T + + # Solve the matrix exponential + AAA[:,:,j] = linalg.expm3(FFF*dt) + + # Solve using matrix fraction decomposition + foo = AAA[:,:,j].dot(np.vstack([P, np.eye(n), dP[:,:,j], np.zeros([n,n])])) + + # Pick the parts + C = foo[:n, :] + D = foo[n:2*n, :] + dC = foo[2*n:-n,:] + dD = foo[-n:, :] + + # The prediction step covariance (PP = C/D) + if j==0: + PP = linalg.solve(D.T,C.T).T + PP = (PP + PP.T)/2 + + # Sove dP for j (C/D == P_{k|k-1}) + dP[:,:,j] = linalg.solve(D.T,(dC - PP.dot(dD)).T).T + + # Set predicted m and P + m = mm + P = PP + + # Start the Kalman filter update step and precalculate variables + S = H.dot(P).dot(H.T) + R + + # We should calculate the Cholesky factor if S is a matrix + # [LS,notposdef] = chol(S,'lower'); + + # The Kalman filter update (S is scalar) + HtiS = H.T/S + iS = 1/S + K = P.dot(HtiS) + v = Y[:,k]-H.dot(m) + vtiS = v.T/S + + # Loop through all parameters (Kalman filter update step derivative) + for j in range(0,nparam): + + # Innovation covariance derivative + dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j]; + + # Evaluate the energy derivative for j + eg[j] = eg[j] \ + - .5*np.sum(iS*dS) \ + + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \ + + .5*vtiS.dot(dS).dot(vtiS.T) \ + + .5*vtiS.dot(H.dot(dm[:,j:j+1])) + + # Kalman filter update step derivatives + dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S + dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1]) + dKSKt = dK.dot(S).dot(K.T) + dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T + + # Evaluate the energy + # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v); + e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v) + + # Finish Kalman filter update step + m = m + K.dot(v) + P = P - K.dot(S).dot(K.T) + + # Make sure the covariances stay symmetric + P = (P+P.T)/2 + dP = (dP + dP.transpose([1,0,2]))/2 + + # raise NameError('Debug me') + + # Report + #print e + #print eg + + # Return the gradient + return eg + def simulate(self,F,L,Qc,Pinf,X,size=1): # Simulate a trajectory using the state space model From 4e8c92407e6fdf1075a7fc5a699f0d6cf9b0484a Mon Sep 17 00:00:00 2001 From: Arno Solin Date: Tue, 4 Feb 2014 13:48:35 +0200 Subject: [PATCH 9/9] Added a new way of calculating the likelihood gradient. The code is still a draft, but looks promising. --- GPy/models/state_space.py | 193 +++++++++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 32 deletions(-) diff --git a/GPy/models/state_space.py b/GPy/models/state_space.py index af92d5d4..85582cde 100644 --- a/GPy/models/state_space.py +++ b/GPy/models/state_space.py @@ -82,14 +82,14 @@ class StateSpace(Model): dF[:,:,:-1] = dFt dQc[:,:,:-1] = dQct dPinf[:,:,:-1] = dPinft - + # The sigma2 derivative dR = np.zeros([1,1,dF.shape[2]]) dR[:,:,-1] = 1 # Calculate the likelihood gradients return self.kf_likelihood_g(F,L,Qc,H,self.sigma2,Pinf,dF,dQc,dPinf,dR,self.X.T,self.Y.T) - + def predict_raw(self, Xnew, Ynew=None, filteronly=False): # Set defaults @@ -114,7 +114,7 @@ class StateSpace(Model): # Run the Rauch-Tung-Striebel smoother if not filteronly: (M, P) = self.rts_smoother(F,L,Qc,X.T,M,P) - + # Put the data back in the original order M = M[:,return_inverse] P = P[:,:,return_inverse] @@ -380,10 +380,10 @@ class StateSpace(Model): def kf_likelihood_g(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y): # Evaluate marginal likelihood gradient - # Loop lengths + # State dimension, number of data points and number of parameters + n = F.shape[0] steps = Y.shape[1] nparam = dF.shape[2] - n = F.shape[0] # Time steps t = X.squeeze() @@ -392,6 +392,135 @@ class StateSpace(Model): e = 0 eg = np.zeros(nparam) + # Set up + m = np.zeros([n,1]) + P = Pinf.copy() + dm = np.zeros([n,nparam]) + dP = dPinf.copy() + mm = m.copy() + PP = P.copy() + + # Initial dt + dt = -np.Inf + + # Allocate space for expm results + AA = np.zeros([2*n, 2*n, nparam]) + FF = np.zeros([2*n, 2*n]) + + # Loop over all observations + for k in range(0,steps): + + # The previous time step + dt_old = dt; + + # The time discretization step length + if k>0: + dt = t[k]-t[k-1] + else: + dt = 0 + + # Loop through all parameters (Kalman filter prediction step) + for j in range(0,nparam): + + # Should we recalculate the matrix exponential? + if abs(dt-dt_old) > 1e-9: + + # The first matrix for the matrix factor decomposition + FF[:n,:n] = F + FF[n:,:n] = dF[:,:,j] + FF[n:,n:] = F + + # Solve the matrix exponential + AA[:,:,j] = linalg.expm3(FF*dt) + + # Solve the differential equation + foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]])) + mm = foo[:n,:] + dm[:,j:j+1] = foo[n:,:] + + # The discrete-time dynamical model + if j==0: + A = AA[:n,:n,j] + Q = Pinf - A.dot(Pinf).dot(A.T) + PP = A.dot(P).dot(A.T) + Q + + # The derivatives of A and Q + dA = AA[n:,:n,j] + dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \ + - A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T) + + # The derivatives of P + dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \ + + A.dot(P).dot(dA.T) + dQ + + # Set predicted m and P + m = mm + P = PP + + # Start the Kalman filter update step and precalculate variables + S = H.dot(P).dot(H.T) + R + + # We should calculate the Cholesky factor if S is a matrix + # [LS,notposdef] = chol(S,'lower'); + + # The Kalman filter update (S is scalar) + HtiS = H.T/S + iS = 1/S + K = P.dot(HtiS) + v = Y[:,k]-H.dot(m) + vtiS = v.T/S + + # Loop through all parameters (Kalman filter update step derivative) + for j in range(0,nparam): + + # Innovation covariance derivative + dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j]; + + # Evaluate the energy derivative for j + eg[j] = eg[j] \ + - .5*np.sum(iS*dS) \ + + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \ + + .5*vtiS.dot(dS).dot(vtiS.T) \ + + .5*vtiS.dot(H.dot(dm[:,j:j+1])) + + # Kalman filter update step derivatives + dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S + dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1]) + dKSKt = dK.dot(S).dot(K.T) + dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T + + # Evaluate the energy + # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v); + e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v) + + # Finish Kalman filter update step + m = m + K.dot(v) + P = P - K.dot(S).dot(K.T) + + # Make sure the covariances stay symmetric + P = (P+P.T)/2 + dP = (dP + dP.transpose([1,0,2]))/2 + + # raise NameError('Debug me') + + # Return the gradient + return eg + + def kf_likelihood_g_notstable(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y): + # Evaluate marginal likelihood gradient + + # State dimension, number of data points and number of parameters + steps = Y.shape[1] + nparam = dF.shape[2] + n = F.shape[0] + + # Time steps + t = X.squeeze() + + # Allocate space + e = 0 + eg = np.zeros(nparam) + # Set up Z = np.zeros(F.shape) QC = L.dot(Qc).dot(L.T) @@ -401,10 +530,10 @@ class StateSpace(Model): dP = dPinf.copy() mm = m.copy() PP = P.copy() - + # % Initial dt dt = -np.Inf - + # Allocate space for expm results AA = np.zeros([2*F.shape[0], 2*F.shape[0], nparam]) AAA = np.zeros([4*F.shape[0], 4*F.shape[0], nparam]) @@ -413,14 +542,14 @@ class StateSpace(Model): # Loop over all observations for k in range(0,steps): - + # The previous time step dt_old = dt; - + # The time discretization step length - if k>0: + if k>0: dt = t[k]-t[k-1] - else: + else: dt = t[1]-t[0] # Loop through all parameters (Kalman filter prediction step) @@ -428,10 +557,10 @@ class StateSpace(Model): # Should we recalculate the matrix exponential? if abs(dt-dt_old) > 1e-9: - + # The first matrix for the matrix factor decomposition FF[:n,:n] = F - FF[n:,:n] = dF[:,:,j] + FF[n:,:n] = dF[:,:,j] FF[n:,n:] = F # Solve the matrix exponential @@ -443,14 +572,14 @@ class StateSpace(Model): # Pick the parts mm = foo[:n,:] dm[:,j:j+1] = foo[n:,:] - + # Should we recalculate the matrix exponential? if abs(dt-dt_old) > 1e-9: - + # Define W and G - W = L.dot(dQc[:,:,j]).dot(L.T) + W = L.dot(dQc[:,:,j]).dot(L.T) G = dF[:,:,j]; - + # The second matrix for the matrix factor decomposition FFF[:n,:n] = F FFF[2*n:-n,:n] = G @@ -467,10 +596,10 @@ class StateSpace(Model): # Solve using matrix fraction decomposition foo = AAA[:,:,j].dot(np.vstack([P, np.eye(n), dP[:,:,j], np.zeros([n,n])])) - + # Pick the parts - C = foo[:n, :] - D = foo[n:2*n, :] + C = foo[:n, :] + D = foo[n:2*n, :] dC = foo[2*n:-n,:] dD = foo[-n:, :] @@ -478,50 +607,50 @@ class StateSpace(Model): if j==0: PP = linalg.solve(D.T,C.T).T PP = (PP + PP.T)/2 - + # Sove dP for j (C/D == P_{k|k-1}) dP[:,:,j] = linalg.solve(D.T,(dC - PP.dot(dD)).T).T - + # Set predicted m and P m = mm P = PP # Start the Kalman filter update step and precalculate variables S = H.dot(P).dot(H.T) + R - + # We should calculate the Cholesky factor if S is a matrix # [LS,notposdef] = chol(S,'lower'); - + # The Kalman filter update (S is scalar) HtiS = H.T/S iS = 1/S - K = P.dot(HtiS) + K = P.dot(HtiS) v = Y[:,k]-H.dot(m) vtiS = v.T/S - + # Loop through all parameters (Kalman filter update step derivative) - for j in range(0,nparam): - + for j in range(0,nparam): + # Innovation covariance derivative dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j]; - + # Evaluate the energy derivative for j eg[j] = eg[j] \ - .5*np.sum(iS*dS) \ + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \ + .5*vtiS.dot(dS).dot(vtiS.T) \ + .5*vtiS.dot(H.dot(dm[:,j:j+1])) - + # Kalman filter update step derivatives dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1]) dKSKt = dK.dot(S).dot(K.T) dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T - + # Evaluate the energy # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v); e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v) - + # Finish Kalman filter update step m = m + K.dot(v) P = P - K.dot(S).dot(K.T)