From 06a7fedd22100ad070e95488908c993d8c0c95dd Mon Sep 17 00:00:00 2001 From: Alexander Grigorievskiy Date: Tue, 19 May 2015 14:03:12 +0300 Subject: [PATCH] ENH: Adding SDE representation of addition, sumation and standard periodic kernel. All changes have been tested tests are added in later commits. --- GPy/examples/state_space.py | 16 +-- GPy/kern/_src/sde_matern.py | 134 ++++++++++++++++++++-- GPy/kern/_src/sde_standard_periodic.py | 152 +++++++++++++++++++++++-- GPy/kern/src/add.py | 78 +++++++++++++ GPy/kern/src/kern.py | 45 -------- GPy/kern/src/prod.py | 107 +++++++++++++++++ GPy/models/state_space_main.py | 6 +- GPy/models/state_space_new.py | 21 ++-- 8 files changed, 477 insertions(+), 82 deletions(-) diff --git a/GPy/examples/state_space.py b/GPy/examples/state_space.py index 071fb628..a9d770e6 100644 --- a/GPy/examples/state_space.py +++ b/GPy/examples/state_space.py @@ -4,7 +4,7 @@ import matplotlib.pyplot as plt import GPy.models.state_space_new as SS_new -X = np.linspace(0, 10, 4000)[:, None] +X = np.linspace(0, 10, 2000)[:, None] Y = np.sin(X) + np.random.randn(*X.shape)*0.1 # Need to run these lines when X and Y are imported -> @@ -23,14 +23,14 @@ Y = np.sin(X) + np.random.randn(*X.shape)*0.1 #plt.plot( X, Y) #plt.show() -#kernel = GPy.kern.Matern32(X.shape[1]) -#m = GPy.models.StateSpace(X,Y, kernel) +kernel = GPy.kern.Matern32(X.shape[1]) +m = GPy.models.StateSpace(X,Y, kernel) + +print m # -#print m -## -#m.optimize(optimizer='bfgs',messages=True) -## -#print m +m.optimize(optimizer='bfgs',messages=True) +# +print m kernel1 = GPy.kern.Matern32(X.shape[1]) m1 = GPy.models.GPRegression(X,Y, kernel1) diff --git a/GPy/kern/_src/sde_matern.py b/GPy/kern/_src/sde_matern.py index 102691b4..0016c5b8 100644 --- a/GPy/kern/_src/sde_matern.py +++ b/GPy/kern/_src/sde_matern.py @@ -20,7 +20,15 @@ class sde_Matern32(Matern32): k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } """ - + def sde_update_gradient_full(self, gradients): + """ + Update gradient in the order in which parameters are represented in the + kernel + """ + + self.variance.gradient = gradients[0] + self.lengthscale.gradient = gradients[1] + def sde(self): """ Return the state space representation of the covariance. @@ -28,6 +36,7 @@ class sde_Matern32(Matern32): variance = float(self.variance.values) lengthscale = float(self.lengthscale.values) + foo = np.sqrt(3.)/lengthscale F = np.array([[0, 1], [-foo**2, -2*foo]]) L = np.array([[0], [1]]) @@ -71,16 +80,125 @@ class sde_Matern52(Matern52): k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } """ - + def sde_update_gradient_full(self, gradients): + """ + Update gradient in the order in which parameters are represented in the + kernel + """ + + self.variance.gradient = gradients[0] + self.lengthscale.gradient = gradients[1] + def sde(self): """ Return the state space representation of the covariance. """ - # Arno, insert your code here + variance = float(self.variance.values) + lengthscale = float(self.lengthscale.values) - # Params to use: - # self.lengthscale - # self.variance - - #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) \ No newline at end of file + lamda = np.sqrt(5.0)/lengthscale + kappa = 5.0/3.0*variance/lengthscale**2 + + F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda))) + L = np.array(((0,),(0,),(1,))) + Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),)) + H = np.array(((1,0,0),)) + + Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4))) + + # Allocate space for the derivatives + dF = np.empty((3,3,2)) + dQc = np.empty((1,1,2)) + dPinf = np.empty((3,3,2)) + + # The partial derivatives + dFvariance = np.zeros((3,3)) + dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, + 30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2))) + dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),))) + dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),))) + + dPinf_variance = Pinf/variance + kappa2 = -2.0*kappa/lengthscale + dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, + 0,-100*variance/lengthscale**5))) + # Combine the derivatives + dF[:,:,0] = dFvariance + dF[:,:,1] = dFlengthscale + dQc[:,:,0] = dQcvariance + dQc[:,:,1] = dQclengthscale + dPinf[:,:,0] = dPinf_variance + dPinf[:,:,1] = dPinf_lengthscale + +# % Derivative of F w.r.t. parameter magnSigma2 +# dFmagnSigma2 = [0, 0, 0; +# 0, 0, 0; +# 0, 0, 0]; +# +# % Derivative of F w.r.t parameter lengthScale +# dFlengthScale = [0, 0, 0; +# 0, 0, 0; +# 15*sqrt(5)/lengthScale^4, 30/lengthScale^3, 3*sqrt(5)/lengthScale^2]; +# +# % Derivative of Qc w.r.t. parameter magnSigma2 +# dQcmagnSigma2 = 400*sqrt(5)/3/lengthScale^5; +# +# % Derivative of Qc w.r.t. parameter lengthScale +# dQclengthScale = -magnSigma2*2000*sqrt(5)/3/lengthScale^6; +# +# % Derivative of Pinf w.r.t. parameter magnSigma2 +# dPinfmagnSigma2 = Pinf/magnSigma2; +# +# % Derivative of Pinf w.r.t. parameter lengthScale +# kappa2 = -2*kappa/lengthScale; +# dPinflengthScale = [0, 0, -kappa2; +# 0, kappa2, 0; +# -kappa2, 0, -100*magnSigma2/lengthScale^5]; +# +# % Stack all derivatives +# dF = zeros(3,3,2); +# dQc = zeros(1,1,2); +# dPinf = zeros(3,3,2); +# +# dF(:,:,1) = dFmagnSigma2; +# dF(:,:,2) = dFlengthScale; +# dQc(:,:,1) = dQcmagnSigma2; +# dQc(:,:,2) = dQclengthScale; +# dPinf(:,:,1) = dPinfmagnSigma2; +# dPinf(:,:,2) = dPinflengthScale; + +# % Derived constants +# lambda = sqrt(5)/lengthScale; +# +# % Feedback matrix +# F = [ 0, 1, 0; +# 0, 0, 1; +# -lambda^3, -3*lambda^2, -3*lambda]; +# +# % Noise effect matrix +# L = [0; 0; 1]; +# +# % Spectral density +# Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5; +# +# % Observation model +# H = [1, 0, 0]; + + +# %% Stationary covariance +# +# % Calculate Pinf only if requested +# if nargout > 4, +# +# % Derived constant +# kappa = 5/3*magnSigma2/lengthScale^2; +# +# % Stationary covariance +# Pinf = [magnSigma2, 0, -kappa; +# 0, kappa, 0; +# -kappa, 0, 25*magnSigma2/lengthScale^4]; +# +# end + + return (F, L, Qc, H, Pinf, dF, dQc, dPinf) \ No newline at end of file diff --git a/GPy/kern/_src/sde_standard_periodic.py b/GPy/kern/_src/sde_standard_periodic.py index d72c22af..3829b816 100644 --- a/GPy/kern/_src/sde_standard_periodic.py +++ b/GPy/kern/_src/sde_standard_periodic.py @@ -6,6 +6,9 @@ Stochastic Differential Equation (SDE) functionality. from .standard_periodic import StdPeriodic import numpy as np +import scipy as sp + +from scipy import special as special class sde_StdPeriodic(StdPeriodic): """ @@ -21,20 +24,153 @@ class sde_StdPeriodic(StdPeriodic): \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } """ - + def sde_update_gradient_full(self, gradients): + """ + Update gradient in the order in which parameters are represented in the + kernel + """ + + self.variance.gradient = gradients[0] + self.wavelengths.gradient = gradients[1] + self.lengthscales.gradient = gradients[2] + def sde(self): """ - Return the state space representation of the covariance. + Return the state space representation of the covariance. + + + ! Note: one must constrain lengthscale not to drop below 0.25. + After this bessel functions of the first kind grows to very high. + + ! Note: one must keep wevelength also not very low. Because then + the gradients wrt wavelength become ustable. + However this might depend on the data. For test example with + 300 data points the low limit is 0.15. """ - # Arno, insert your code here - - # Params to use: + # Params to use: (in that order) #self.variance #self.wavelengths #self.lengthscales + N = 7 # approximation order - # Arno, you could visualize the Latex version of the kernel formula - # and assume inputs are 1D, so no ARD is used. Then use parameters aboove. - #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) \ No newline at end of file + w0 = 2*np.pi/self.wavelengths # frequency + + [q2,dq2l] = seriescoeff(N,2*self.lengthscales,self.variance) + # lengthscale is multiplied by 2 because of slightly different + # formula for periodic covariance function. + # For the same reason: + + dq2l = 2*dq2l + + if np.any( np.isnan(q2)): + raise ValueError("SDE periodic covariance error1") + + if np.any( np.isnan(dq2l)): + raise ValueError("SDE periodic covariance error1") + + F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) ) + L = np.eye(2*(N+1)) + Qc = np.zeros((2*(N+1), 2*(N+1))) + P_inf = np.kron(np.diag(q2),np.eye(2)) + H = np.kron(np.ones((1,N+1)),np.array((1,0)) ) + + + # Derivatives + dF = np.empty((F.shape[0], F.shape[1], 3)) + dQc = np.empty((Qc.shape[0], Qc.shape[1], 3)) + dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3)) + + # Derivatives wrt self.variance + dF[:,:,0] = np.zeros(F.shape) + dQc[:,:,0] = np.zeros(Qc.shape) + dP_inf[:,:,0] = P_inf / self.variance + + # Derivatives self.wavelengths + dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths ); + dQc[:,:,1] = np.zeros(Qc.shape) + dP_inf[:,:,1] = np.zeros(P_inf.shape) + + # Derivatives self.lengthscales + dF[:,:,2] = np.zeros(F.shape) + dQc[:,:,2] = np.zeros(Qc.shape) + dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2)) + + + return (F, L, Qc, H, P_inf, dF, dQc, dP_inf) + + + + +def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False): + """ + Calculate the coefficients q_j^2 for the covariance function + approximation: + + k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau) + + Reference is: + + [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic + covariance functions and state space models. In Proceedings of the + Seventeenth International Conference on Artifcial Intelligence and + Statistics (AISTATS 2014). JMLR: W&CP, volume 33. + + Note! Only the infinite approximation (through Bessel function) + is currently implemented. + + Input: + ---------------- + + m: int + Degree of approximation. Default 6. + lengthScale: float + Length scale parameter in the kerenl + magnSigma2:float + Multiplier in front of the kernel. + + + Output: + ----------------- + + coeffs: array(m+1) + Covariance series coefficients + + coeffs_dl: array(m+1) + Derivatives of the coefficients with respect to lengthscale. + + """ + + if true_covariance: + + bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\ + sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\ + np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64) + + M,J = np.meshgrid(range(0,m+1),range(0,m+1)) + + coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\ + (lengthScale**(-2))**J *magnSigma2 + + coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0) + + coeffs = np.sum(coeffs,0) + + else: + coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2)) + if np.any( np.isnan(coeffs)): + pass + coeffs[0] = 0.5*coeffs[0] + + # Derivatives wrt (lengthScale) + coeffs_dl = np.zeros(m+1) + coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\ + (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) ) + + # The first element + coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\ + (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) ) + + + return coeffs, coeffs_dl diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index 8f91d639..46e86e13 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -263,4 +263,82 @@ class Add(CombinationKernel): i_s[k._all_dims_active] += k.input_sensitivity(summarize) return i_s else: + return super(Add, self).input_sensitivity(summarize) + + def sde_update_gradient_full(self, gradients): + """ + Update gradient in the order in which parameters are represented in the + kernel + """ + part_start_param_index = 0 + for p in self.parts: + if not p.is_fixed: + part_param_num = len(p.param_array) # number of parameters in the part + p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)]) + part_start_param_index += part_param_num + + def sde(self): + """ + Support adding kernels for sde representation + """ + + import scipy.linalg as la + + F = None + L = None + Qc = None + H = None + Pinf = None + dF = None + dQc = None + dPinf = None + n = 0 + nq = 0 + nd = 0 + + # Assign models + for p in self.parts: + (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde() + F = la.block_diag(F,Ft) if (F is not None) else Ft + L = la.block_diag(L,Lt) if (L is not None) else Lt + Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct + H = np.hstack((H,Ht)) if (H is not None) else Ht + + Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft + + if dF is not None: + dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])), + 'constant', constant_values=0) + dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt + else: + dF = dFt + + if dQc is not None: + dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])), + 'constant', constant_values=0) + dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct + else: + dQc = dQct + + if dPinf is not None: + dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])), + 'constant', constant_values=0) + dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft + else: + dPinf = dPinft + + n += Ft.shape[0] + nq += Qct.shape[0] + nd += dFt.shape[2] + + assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed" + assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed" + assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed" + assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed" + assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed" + assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed" + assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed" + assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed" + + return (F,L,Qc,H,Pinf,dF,dQc,dPinf) diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index e690dfc4..f170ec1b 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -305,51 +305,6 @@ class Kern(Parameterized): def _check_active_dims(self, X): assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape) - def sde(self): - # TODO: should support adding kernels together - - #raise NameError('Problem') - - # Find out state dimensions - n = 0 - nq = 0 - nd = 0 - for p in self.parts: - (F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde() - n += F.shape[0] - nq += Qc.shape[0] - nd += dF.shape[2] - - # Allocate space for the matrices - F = np.zeros((n,n)) - L = np.zeros((n,nq)) - Qc = np.zeros((nq,nq)) - H = np.zeros((1,n)) - Pinf = np.zeros((n,n)) - dF = np.zeros((n,n,nd)) - dQc = np.zeros((nq,nq,nd)) - dPinf = np.zeros((n,n,nd)) - n = 0 - nq = 0 - nd = 0 - - # Assign models - for p in self.parts: - (Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde() - F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft - L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt - Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct - H[0,n:n+Ht.shape[1]] = Ht - Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft - dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt - dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct - dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft - n += Ft.shape[0] - nq += Qct.shape[0] - nd += dFt.shape[2] - - return (F,L,Qc,H,Pinf,dF,dQc,dPinf) - class CombinationKernel(Kern): """ Abstract super class for combination kernels. diff --git a/GPy/kern/src/prod.py b/GPy/kern/src/prod.py index 1e18b405..2b2588a0 100644 --- a/GPy/kern/src/prod.py +++ b/GPy/kern/src/prod.py @@ -105,3 +105,110 @@ class Prod(CombinationKernel): return i_s else: return super(Prod, self).input_sensitivity(summarize) + + def sde_update_gradient_full(self, gradients): + """ + Update gradient in the order in which parameters are represented in the + kernel + """ + part_start_param_index = 0 + for p in self.parts: + if not p.is_fixed: + part_param_num = len(p.param_array) # number of parameters in the part + p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)]) + part_start_param_index += part_param_num + + def sde(self): + """ + """ + F = np.array((0,), ndmin=2) + L = np.array((1,), ndmin=2) + Qc = np.array((1,), ndmin=2) + H = np.array((1,), ndmin=2) + Pinf = np.array((1,), ndmin=2) + dF = None + dQc = None + dPinf = None + + # Assign models + for p in self.parts: + (Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde() + + # check derivative dimensions -> + number_of_parameters = len(p.param_array) + assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong" + assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong" + assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong" + # check derivative dimensions <- + + # exception for periodic kernel + if (p.name == 'std_periodic'): + Qct = P_inft + dQct = dP_inft + + dF = dkron(F,dF,Ft,dFt,'sum') + dQc = dkron(Qc,dQc,Qct,dQct,'prod') + dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod') + + F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft) + L = np.kron(L,Lt) + Qc = np.kron(Qc,Qct) + Pinf = np.kron(Pinf,P_inft) + H = np.kron(H,Ht) + + return (F,L,Qc,H,Pinf,dF,dQc,dPinf) + +def dkron(A,dA,B,dB, operation='prod'): + """ + Function computes the derivative of Kronecker product A*B + (or Kronecker sum A+B). + + Input: + ----------------------- + + A: 2D matrix + Some matrix + dA: 3D (or 2D matrix) + Derivarives of A + B: 2D matrix + Some matrix + dB: 3D (or 2D matrix) + Derivarives of B + + operation: str 'prod' or 'sum' + Which operation is considered. If the operation is 'sum' it is assumed + that A and are square matrices.s + + Output: + dC: 3D matrix + Derivative of Kronecker product A*B (or Kronecker sum A+B) + """ + + if dA is None: + dA_param_num = 0 + dA = np.zeros((A.shape[0], A.shape[1],1)) + else: + dA_param_num = dA.shape[2] + + if dB is None: + dB_param_num = 0 + dB = np.zeros((B.shape[0], B.shape[1],1)) + else: + dB_param_num = dB.shape[2] + + # Space allocation for derivative matrix + dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num)) + + for k in range(dA_param_num): + if operation == 'prod': + dC[:,:,k] = np.kron(dA[:,:,k],B); + else: + dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] )) + + for k in range(dB_param_num): + if operation == 'prod': + dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k]) + else: + dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k]) + + return dC diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py index b903ef34..6273423b 100644 --- a/GPy/models/state_space_main.py +++ b/GPy/models/state_space_main.py @@ -723,7 +723,7 @@ class DescreteStateSpace(object): v*v / S) log_likelihood_update = log_likelihood_update[0,0] # to make int if np.isnan(log_likelihood_update): - pass + raise ValueError("Errrrr 1") LL = None; islower = None else: LL,islower = linalg.cho_factor(S) @@ -806,7 +806,7 @@ class DescreteStateSpace(object): P_upd = K.dot(S).dot(K.T) P_upd = 0.5*(P_upd + P_upd.T) P_upd = P_pred - P_upd# this update matrix is symmetric - + return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update @staticmethod @@ -1278,7 +1278,7 @@ class ContDescrStateSpace(DescreteStateSpace): dQc = None dH = None dR = None - + # TODO: Defaults for m_init, P_init, dm_init, dP_init. !!! # Also for dH, dR and probably for all derivatives diff --git a/GPy/models/state_space_new.py b/GPy/models/state_space_new.py index 567ee434..d3d17e66 100644 --- a/GPy/models/state_space_new.py +++ b/GPy/models/state_space_new.py @@ -25,6 +25,7 @@ import pylab as pb from GPy.core.parameterization.param import Param import GPy +from .. import likelihoods import GPy.models.state_space_main as ssm #import state_space_main as ssm reload(ssm) @@ -45,17 +46,16 @@ class StateSpace(Model): self.Y = Y[sort_index] # Noise variance - self.sigma2 = Param('Gaussian_noise', sigma2) - self.link_parameter(self.sigma2) - + self.likelihood = likelihoods.Gaussian() + # Default kernel if kernel is None: self.kern = kern.Matern32(1) else: self.kern = kernel self.link_parameter(self.kern) - - self.sigma2.constrain_positive() + self.link_parameter(self.likelihood) + self.posterior = None # Assert that the kernel is supported if not hasattr(self.kern, 'sde'): @@ -98,7 +98,7 @@ class StateSpace(Model): grad_calc_params['dR'] = dR (filter_means, filter_covs, log_likelihood, - grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None, + grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None, P_init=None, calc_log_likelihood=True, calc_grad_log_likelihood=True, grad_params_no=grad_params_no, @@ -106,9 +106,10 @@ class StateSpace(Model): self._log_marginal_likelihood = log_likelihood #gradients = self.compute_gradients() - self.sigma2.gradient_full[:] = grad_log_likelihood[-1,0] - self.kern.gradient_full[:] = grad_log_likelihood[:-1,0] - + self.likelihood.update_gradients(grad_log_likelihood[-1,0]) + + self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0]) + def log_likelihood(self): return self._log_marginal_likelihood @@ -319,4 +320,4 @@ class StateSpace(Model): f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size)) # Return values - return f \ No newline at end of file + return f