diff --git a/GPy/kern/_src/sde_brownian.py b/GPy/kern/_src/sde_brownian.py deleted file mode 100644 index 55950143..00000000 --- a/GPy/kern/_src/sde_brownian.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance Brownian motion covariance function with the -Stochastic Differential Equation (SDE) functionality. -""" - -from .brownian import Brownian - -import numpy as np - -class sde_Brownian(Brownian): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Linear kernel: - - .. math:: - - k(x,y) = \sigma^2 min(x,y) - - """ - - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression - - F = np.array( ((0,1.0),(0,0) )) - L = np.array( ((1.0,),(0,)) ) - Qc = np.array( ((variance,),) ) - H = np.array( ((1.0,0),) ) - - Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) ) - #P0 = Pinf.copy() - P0 = np.zeros((2,2)) - #Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance - dF = np.zeros((2,2,1)) - dQc = np.ones( (1,1,1) ) - - dPinf = np.zeros((2,2,1)) - dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) ) - #dP0 = dPinf.copy() - dP0 = np.zeros((2,2,1)) - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) diff --git a/GPy/kern/_src/sde_linear.py b/GPy/kern/_src/sde_linear.py deleted file mode 100644 index 031f0f5f..00000000 --- a/GPy/kern/_src/sde_linear.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance Linear covariance function with the -Stochastic Differential Equation (SDE) functionality. -""" -from .linear import Linear - -import numpy as np - -class sde_Linear(Linear): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Linear kernel: - - .. math:: - - k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i - - """ - def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'): - """ - Modify the init method, because one extra parameter is required. X - points - on the X axis. - """ - - super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name) - - self.t0 = np.min(X) - - - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variances.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression - t0 = float(self.t0) - - F = np.array( ((0,1.0),(0,0) )) - L = np.array( ((0,),(1.0,)) ) - Qc = np.zeros((1,1)) - H = np.array( ((1.0,0),) ) - - Pinf = np.zeros((2,2)) - P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance - dF = np.zeros((2,2,1)) - dQc = np.zeros( (1,1,1) ) - - dPinf = np.zeros((2,2,1)) - dP0 = np.zeros((2,2,1)) - dP0[:,:,0] = P0 / variance - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) diff --git a/GPy/kern/_src/sde_matern.py b/GPy/kern/_src/sde_matern.py deleted file mode 100644 index 0ce1cf98..00000000 --- a/GPy/kern/_src/sde_matern.py +++ /dev/null @@ -1,135 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance Matern covariance functions with the -Stochastic Differential Equation (SDE) functionality. -""" -from .stationary import Matern32 -from .stationary import Matern52 -import numpy as np - -class sde_Matern32(Matern32): - """ - - Class provide extra functionality to transfer this covariance function into - SDE forrm. - - Matern 3/2 kernel: - - .. math:: - - k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } - - """ - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variance.values) - lengthscale = float(self.lengthscale.values) - - foo = np.sqrt(3.)/lengthscale - F = np.array(((0, 1.0), (-foo**2, -2*foo))) - L = np.array(( (0,), (1.0,) )) - Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) - H = np.array(((1.0, 0),)) - Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2)))) - P0 = Pinf.copy() - - # Allocate space for the derivatives - dF = np.empty([F.shape[0],F.shape[1],2]) - dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) - dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) - # The partial derivatives - dFvariance = np.zeros((2,2)) - dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) - dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3)) - dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) - dPinfvariance = np.array(((1,0),(0,3./lengthscale**2))) - dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) - # Combine the derivatives - dF[:,:,0] = dFvariance - dF[:,:,1] = dFlengthscale - dQc[:,:,0] = dQcvariance - dQc[:,:,1] = dQclengthscale - dPinf[:,:,0] = dPinfvariance - dPinf[:,:,1] = dPinflengthscale - dP0 = dPinf.copy() - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) - -class sde_Matern52(Matern52): - """ - - Class provide extra functionality to transfer this covariance function into - SDE forrm. - - Matern 5/2 kernel: - - .. math:: - - k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } - - """ - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variance.values) - lengthscale = float(self.lengthscale.values) - - lamda = np.sqrt(5.0)/lengthscale - kappa = 5.0/3.0*variance/lengthscale**2 - - F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda))) - L = np.array(((0,),(0,),(1,))) - Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),)) - H = np.array(((1,0,0),)) - - Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4))) - P0 = Pinf.copy() - # Allocate space for the derivatives - dF = np.empty((3,3,2)) - dQc = np.empty((1,1,2)) - dPinf = np.empty((3,3,2)) - - # The partial derivatives - dFvariance = np.zeros((3,3)) - dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, - 30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2))) - dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),))) - dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),))) - - dPinf_variance = Pinf/variance - kappa2 = -2.0*kappa/lengthscale - dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, - 0,-100*variance/lengthscale**5))) - # Combine the derivatives - dF[:,:,0] = dFvariance - dF[:,:,1] = dFlengthscale - dQc[:,:,0] = dQcvariance - dQc[:,:,1] = dQclengthscale - dPinf[:,:,0] = dPinf_variance - dPinf[:,:,1] = dPinf_lengthscale - dP0 = dPinf.copy() - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) \ No newline at end of file diff --git a/GPy/kern/_src/sde_standard_periodic.py b/GPy/kern/_src/sde_standard_periodic.py deleted file mode 100644 index c3df7d92..00000000 --- a/GPy/kern/_src/sde_standard_periodic.py +++ /dev/null @@ -1,178 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance Matern covariance functions with the -Stochastic Differential Equation (SDE) functionality. -""" -from .standard_periodic import StdPeriodic - -import numpy as np -import scipy as sp - -from scipy import special as special - -class sde_StdPeriodic(StdPeriodic): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Standard Periodic kernel: - - .. math:: - - k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim} - \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } - - """ - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - self.wavelengths.gradient = gradients[1] - self.lengthscales.gradient = gradients[2] - - def sde(self): - """ - Return the state space representation of the covariance. - - - ! Note: one must constrain lengthscale not to drop below 0.25. - After this bessel functions of the first kind grows to very high. - - ! Note: one must keep wevelength also not very low. Because then - the gradients wrt wavelength become ustable. - However this might depend on the data. For test example with - 300 data points the low limit is 0.15. - """ - - # Params to use: (in that order) - #self.variance - #self.wavelengths - #self.lengthscales - N = 7 # approximation order - - - w0 = 2*np.pi/self.wavelengths # frequency - lengthscales = 2*self.lengthscales - - [q2,dq2l] = seriescoeff(N,lengthscales,self.variance) - # lengthscale is multiplied by 2 because of slightly different - # formula for periodic covariance function. - # For the same reason: - - dq2l = 2*dq2l - - if np.any( np.isfinite(q2) == False): - raise ValueError("SDE periodic covariance error 1") - - if np.any( np.isfinite(dq2l) == False): - raise ValueError("SDE periodic covariance error 2") - - F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) ) - L = np.eye(2*(N+1)) - Qc = np.zeros((2*(N+1), 2*(N+1))) - P_inf = np.kron(np.diag(q2),np.eye(2)) - H = np.kron(np.ones((1,N+1)),np.array((1,0)) ) - P0 = P_inf.copy() - - # Derivatives - dF = np.empty((F.shape[0], F.shape[1], 3)) - dQc = np.empty((Qc.shape[0], Qc.shape[1], 3)) - dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3)) - - # Derivatives wrt self.variance - dF[:,:,0] = np.zeros(F.shape) - dQc[:,:,0] = np.zeros(Qc.shape) - dP_inf[:,:,0] = P_inf / self.variance - - # Derivatives self.wavelengths - dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths ); - dQc[:,:,1] = np.zeros(Qc.shape) - dP_inf[:,:,1] = np.zeros(P_inf.shape) - - # Derivatives self.lengthscales - dF[:,:,2] = np.zeros(F.shape) - dQc[:,:,2] = np.zeros(Qc.shape) - dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2)) - dP0 = dP_inf.copy() - - return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0) - - - - -def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False): - """ - Calculate the coefficients q_j^2 for the covariance function - approximation: - - k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau) - - Reference is: - - [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic - covariance functions and state space models. In Proceedings of the - Seventeenth International Conference on Artifcial Intelligence and - Statistics (AISTATS 2014). JMLR: W&CP, volume 33. - - Note! Only the infinite approximation (through Bessel function) - is currently implemented. - - Input: - ---------------- - - m: int - Degree of approximation. Default 6. - lengthScale: float - Length scale parameter in the kerenl - magnSigma2:float - Multiplier in front of the kernel. - - - Output: - ----------------- - - coeffs: array(m+1) - Covariance series coefficients - - coeffs_dl: array(m+1) - Derivatives of the coefficients with respect to lengthscale. - - """ - - if true_covariance: - - bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\ - sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\ - np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64) - - M,J = np.meshgrid(range(0,m+1),range(0,m+1)) - - coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\ - (lengthScale**(-2))**J *magnSigma2 - - coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0) - - coeffs = np.sum(coeffs,0) - - else: - coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2)) - if np.any( np.isfinite(coeffs) == False): - raise ValueError("sde_standard_periodic: Coefficients are not finite!") - #import pdb; pdb.set_trace() - coeffs[0] = 0.5*coeffs[0] - - # Derivatives wrt (lengthScale) - coeffs_dl = np.zeros(m+1) - coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\ - (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) ) - - # The first element - coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\ - (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) ) - - - return coeffs, coeffs_dl diff --git a/GPy/kern/_src/sde_static.py b/GPy/kern/_src/sde_static.py deleted file mode 100644 index ae8ed194..00000000 --- a/GPy/kern/_src/sde_static.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance Static covariance functions with the -Stochastic Differential Equation (SDE) functionality. -""" -from .static import White -from .static import Bias - -import numpy as np - -class sde_White(White): - """ - - Class provide extra functionality to transfer this covariance function into - SDE forrm. - - White kernel: - - .. math:: - - k(x,y) = \alpha*\delta(x-y) - - """ - - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variance.values) - - F = np.array( ((-np.inf,),) ) - L = np.array( ((1.0,),) ) - Qc = np.array( ((variance,),) ) - H = np.array( ((1.0,),) ) - - Pinf = np.array( ((variance,),) ) - P0 = Pinf.copy() - - dF = np.zeros((1,1,1)) - dQc = np.zeros((1,1,1)) - dQc[:,:,0] = np.array( ((1.0,),) ) - - dPinf = np.zeros((1,1,1)) - dPinf[:,:,0] = np.array( ((1.0,),) ) - dP0 = dPinf.copy() - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) - - -class sde_Bias(Bias): - """ - - Class provide extra functionality to transfer this covariance function into - SDE forrm. - - Bias kernel: - - .. math:: - - k(x,y) = \alpha - - """ - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - variance = float(self.variance.values) - - F = np.array( ((0.0,),)) - L = np.array( ((1.0,),)) - Qc = np.zeros((1,1)) - H = np.array( ((1.0,),)) - - Pinf = np.zeros((1,1)) - P0 = np.array( ((variance,),) ) - - dF = np.zeros((1,1,1)) - dQc = np.zeros((1,1,1)) - - dPinf = np.zeros((1,1,1)) - dP0 = np.zeros((1,1,1)) - dP0[:,:,0] = np.array( ((1.0,),) ) - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) \ No newline at end of file diff --git a/GPy/kern/_src/sde_stationary.py b/GPy/kern/_src/sde_stationary.py deleted file mode 100644 index aeb77010..00000000 --- a/GPy/kern/_src/sde_stationary.py +++ /dev/null @@ -1,194 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Classes in this module enhance several stationary covariance functions with the -Stochastic Differential Equation (SDE) functionality. -""" -from .rbf import RBF -from .stationary import Exponential -from .stationary import RatQuad - -import numpy as np -import scipy as sp -try: - from scipy.linalg import solve_continuous_lyapunov as lyap -except ImportError: - from scipy.linalg import solve_lyapunov as lyap - -class sde_RBF(RBF): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Radial Basis Function kernel: - - .. math:: - - k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } - - """ - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - N = 10# approximation order ( number of terms in exponent series expansion) - roots_rounding_decimals = 6 - - fn = np.math.factorial(N) - - kappa = 1.0/2.0/self.lengthscale**2 - - Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) - - pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower - - for n in range(0, N+1): # (2N+1) - number of polynomial coefficients - pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n - - pp = sp.poly1d(pp) - roots = sp.roots(pp) - - neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0] - aa = sp.poly1d(neg_real_part_roots, r=True).coeffs - - F = np.diag(np.ones((N-1,)),1) - F[-1,:] = -aa[-1:0:-1] - - L= np.zeros((N,1)) - L[N-1,0] = 1 - - H = np.zeros((1,N)) - H[0,0] = 1 - - # Infinite covariance: - Pinf = lyap(F, -np.dot(L,np.dot( Qc[0,0],L.T))) - Pinf = 0.5*(Pinf + Pinf.T) - # Allocating space for derivatives - dF = np.empty([F.shape[0],F.shape[1],2]) - dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) - dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) - - # Derivatives: - dFvariance = np.zeros(F.shape) - dFlengthscale = np.zeros(F.shape) - dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1) - - dQcvariance = Qc/self.variance - dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),))) - - dPinf_variance = Pinf/self.variance - - lp = Pinf.shape[0] - coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2 - coeff[np.mod(coeff,2) != 0] = 0 - dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff - - dF[:,:,0] = dFvariance - dF[:,:,1] = dFlengthscale - dQc[:,:,0] = dQcvariance - dQc[:,:,1] = dQclengthscale - dPinf[:,:,0] = dPinf_variance - dPinf[:,:,1] = dPinf_lengthscale - - P0 = Pinf.copy() - dP0 = dPinf.copy() - - # Benefits of this are not very sound. Helps only in one case: - # SVD Kalman + RBF kernel - import GPy.models.state_space_main as ssm - (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 ) - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) - -class sde_Exponential(Exponential): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Exponential kernel: - - .. math:: - - k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } - - """ - - def sde_update_gradient_full(self, gradients): - """ - Update gradient in the order in which parameters are represented in the - kernel - """ - - self.variance.gradient = gradients[0] - self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - variance = float(self.variance.values) - lengthscale = float(self.lengthscale) - - F = np.array(((-1.0/lengthscale,),)) - L = np.array(((1.0,),)) - Qc = np.array( ((2.0*variance/lengthscale,),) ) - H = np.array(((1.0,),)) - Pinf = np.array(((variance,),)) - P0 = Pinf.copy() - - dF = np.zeros((1,1,2)); - dQc = np.zeros((1,1,2)); - dPinf = np.zeros((1,1,2)); - - dF[:,:,0] = 0.0 - dF[:,:,1] = 1.0/lengthscale**2 - - dQc[:,:,0] = 2.0/lengthscale - dQc[:,:,1] = -2.0*variance/lengthscale**2 - - dPinf[:,:,0] = 1.0 - dPinf[:,:,1] = 0.0 - - dP0 = dPinf.copy() - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) - -class sde_RatQuad(RatQuad): - """ - - Class provide extra functionality to transfer this covariance function into - SDE form. - - Rational Quadratic kernel: - - .. math:: - - k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } - - """ - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - assert False, 'Not Implemented' - - # Params to use: - - # self.lengthscale - # self.variance - #self.power - - #return (F, L, Qc, H, Pinf, dF, dQc, dPinf) diff --git a/GPy/kern/src/sde_standard_periodic.py b/GPy/kern/src/sde_standard_periodic.py index 3729bf57..be32f7b2 100644 --- a/GPy/kern/src/sde_standard_periodic.py +++ b/GPy/kern/src/sde_standard_periodic.py @@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic import numpy as np import scipy as sp +import warnings from scipy import special as special @@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic): \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } """ + # TODO: write comment to the constructor arguments + def __init__(self, *args, **kwargs): + """ + Init constructior. + + Two optinal extra parameters are added in addition to the ones in + StdPeriodic kernel. + + :param approx_order: approximation order for the RBF covariance. (Default 7) + :type approx_order: int + + :param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing. + :type balance: bool + """ + + #import pdb; pdb.set_trace() + + if 'approx_order' in kwargs: + self.approx_order = kwargs.get('approx_order') + del kwargs['approx_order'] + else: + self.approx_order = 7 + + + if 'balance' in kwargs: + self.balance = bool( kwargs.get('balance') ) + del kwargs['balance'] + else: + self.balance = False + + super(sde_StdPeriodic, self).__init__(*args, **kwargs) + def sde_update_gradient_full(self, gradients): """ Update gradient in the order in which parameters are represented in the @@ -38,41 +71,48 @@ class sde_StdPeriodic(StdPeriodic): def sde(self): """ - Return the state space representation of the covariance. + Return the state space representation of the standard periodic covariance. - ! Note: one must constrain lengthscale not to drop below 0.25. - After this bessel functions of the first kind grows to very high. + ! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order) + After this Bessel functions of the first becomes NaN. Rescaling + time variable might help. - ! Note: one must keep wevelength also not very low. Because then + ! Note: one must keep period also not very low. Because then the gradients wrt wavelength become ustable. However this might depend on the data. For test example with - 300 data points the low limit is 0.15. + 300 data points the low limit is 0.15. """ + #import pdb; pdb.set_trace() # Params to use: (in that order) #self.variance #self.period #self.lengthscale - N = 7 # approximation order + if self.approx_order is not None: + N = int(self.approx_order) + else: + N = 7 # approximation order + p_period = float(self.period) + p_lengthscale = 2*float(self.lengthscale) + p_variance = float(self.variance) - w0 = 2*np.pi/self.period # frequency - lengthscale = 2*self.lengthscale + w0 = 2*np.pi/p_period # frequency + # lengthscale is multiplied by 2 because of different definition of lengthscale - [q2,dq2l] = seriescoeff(N,lengthscale,self.variance) - # lengthscale is multiplied by 2 because of slightly different - # formula for periodic covariance function. - # For the same reason: + [q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance) - dq2l = 2*dq2l - - if np.any( np.isfinite(q2) == False): - raise ValueError("SDE periodic covariance error 1") - - if np.any( np.isfinite(dq2l) == False): - raise ValueError("SDE periodic covariance error 2") + dq2l = 2*dq2l # This is because the lengthscale if multiplied by 2. + eps = 1e-12 + if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps): + warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") ) + + if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps): + warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") ) + + F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) ) L = np.eye(2*(N+1)) Qc = np.zeros((2*(N+1), 2*(N+1))) @@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic): # Derivatives wrt self.variance dF[:,:,0] = np.zeros(F.shape) dQc[:,:,0] = np.zeros(Qc.shape) - dP_inf[:,:,0] = P_inf / self.variance + dP_inf[:,:,0] = P_inf / p_variance # Derivatives self.period - dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period ); + dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / p_period ); dQc[:,:,1] = np.zeros(Qc.shape) dP_inf[:,:,1] = np.zeros(P_inf.shape) @@ -100,7 +140,12 @@ class sde_StdPeriodic(StdPeriodic): dQc[:,:,2] = np.zeros(Qc.shape) dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2)) dP0 = dP_inf.copy() - + + if self.balance: + # Benefits of this are not very sound. + import GPy.models.state_space_main as ssm + (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 ) + return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0) @@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False): coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2)) if np.any( np.isfinite(coeffs) == False): raise ValueError("sde_standard_periodic: Coefficients are not finite!") - #import pdb; pdb.set_trace() + #import pdb; pdb.set_trace() coeffs[0] = 0.5*coeffs[0] - + #print(coeffs) # Derivatives wrt (lengthScale) coeffs_dl = np.zeros(m+1) coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\ @@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False): (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) ) - return coeffs, coeffs_dl + return coeffs.squeeze(), coeffs_dl.squeeze() diff --git a/GPy/kern/src/sde_stationary.py b/GPy/kern/src/sde_stationary.py index ae3dd89c..9802905c 100644 --- a/GPy/kern/src/sde_stationary.py +++ b/GPy/kern/src/sde_stationary.py @@ -15,6 +15,7 @@ try: from scipy.linalg import solve_continuous_lyapunov as lyap except ImportError: from scipy.linalg import solve_lyapunov as lyap +import warnings class sde_RBF(RBF): """ @@ -29,6 +30,37 @@ class sde_RBF(RBF): k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } """ + def __init__(self, *args, **kwargs): + """ + Init constructior. + + Two optinal extra parameters are added in addition to the ones in + RBF kernel. + + :param approx_order: approximation order for the RBF covariance. (Default 10) + :type approx_order: int + + :param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing. + :type balance: bool + """ + + if 'balance' in kwargs: + self.balance = bool( kwargs.get('balance') ) + del kwargs['balance'] + else: + self.balance = True + + + if 'approx_order' in kwargs: + self.approx_order = kwargs.get('approx_order') + del kwargs['approx_order'] + else: + self.approx_order = 6 + + + + super(sde_RBF, self).__init__(*args, **kwargs) + def sde_update_gradient_full(self, gradients): """ Update gradient in the order in which parameters are represented in the @@ -41,23 +73,43 @@ class sde_RBF(RBF): def sde(self): """ Return the state space representation of the covariance. + + Note! For Sparse GP inference too small or two high values of lengthscale + lead to instabilities. This is because Qc are too high or too low + and P_inf are not full rank. This effect depends on approximatio order. + For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted. + N=6: (0.06,31) + Variance should be within reasonable bounds as well, but its dependence is linear. + + The above facts do not take into accout regularization. """ - - N = 10# approximation order ( number of terms in exponent series expansion) + #import pdb; pdb.set_trace() + if self.approx_order is not None: + N = self.approx_order + else: + N = 10# approximation order ( number of terms in exponent series expansion) + roots_rounding_decimals = 6 fn = np.math.factorial(N) - kappa = 1.0/2.0/self.lengthscale**2 + p_lengthscale = float( self.lengthscale ) + p_variance = float(self.variance) + kappa = 1.0/2.0/p_lengthscale**2 - Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) + Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) ) + + eps = 1e-12 + if (float(Qc) > 1.0/eps) or (float(Qc) < eps): + warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small. + It influece conditioning of P_inf: {0:e}""".format(float(Qc)) ) - pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower + pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower for n in range(0, N+1): # (2N+1) - number of polynomial coefficients - pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n - - pp = sp.poly1d(pp) + pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n + + pp = sp.poly1d(pp1) roots = sp.roots(pp) neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0] @@ -83,17 +135,17 @@ class sde_RBF(RBF): # Derivatives: dFvariance = np.zeros(F.shape) dFlengthscale = np.zeros(F.shape) - dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1) + dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1) - dQcvariance = Qc/self.variance - dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),))) - - dPinf_variance = Pinf/self.variance + dQcvariance = Qc/p_variance + dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),)) + + dPinf_variance = Pinf/p_variance lp = Pinf.shape[0] coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2 coeff[np.mod(coeff,2) != 0] = 0 - dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff + dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff dF[:,:,0] = dFvariance dF[:,:,1] = dFlengthscale @@ -105,10 +157,11 @@ class sde_RBF(RBF): P0 = Pinf.copy() dP0 = dPinf.copy() - # Benefits of this are not very sound. Helps only in one case: - # SVD Kalman + RBF kernel - import GPy.models.state_space_main as ssm - (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 ) + if self.balance: + # Benefits of this are not very sound. Helps only in one case: + # SVD Kalman + RBF kernel + import GPy.models.state_space_main as ssm + (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 ) return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) diff --git a/GPy/models/state_space_cython.pyx b/GPy/models/state_space_cython.pyx index ae09d1cd..2626b9e7 100644 --- a/GPy/models/state_space_cython.pyx +++ b/GPy/models/state_space_cython.pyx @@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython): (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) self.Q_svd_dict = {} + self.Q_square_root_dict = {} + self.Q_inverse_dict = {} self.last_k = 0 # !!!Print statistics! Which object is created # !!!Print statistics! Print sizes of matrices @@ -477,19 +479,54 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython): cdef np.ndarray[DTYPE_t, ndim=2] U cdef np.ndarray[DTYPE_t, ndim=1] S cdef np.ndarray[DTYPE_t, ndim=2] Vh + - if matrix_index in self.Q_svd_dict: - square_root = self.Q_svd_dict[matrix_index] + if matrix_index in self.Q_square_root_dict: + square_root = self.Q_square_root_dict[matrix_index] else: - U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], + if matrix_index not in self.Q_svd_dict + U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], full_matrices=False, compute_uv=True, - overwrite_a=False, check_finite=False) - + overwrite_a=False, check_finite=False) + self.Q_svd_dict[matrix_index] = (U,S,Vh) + else: + U,S,Vh = self.Q_svd_dict[matrix_index] + square_root = U * np.sqrt(S) - self.Q_svd_dict[matrix_index] = square_root + self.Q_suqare_root_dict[matrix_index] = square_root return square_root - + + + cpdef Q_inverse(self, int k, float jitter=0.0): + """ + Square root of the noise matrix Q + """ + + cdef int matrix_index = self.reconstruct_indices[k] + cdef np.ndarray[DTYPE_t, ndim=2] square_root + + cdef np.ndarray[DTYPE_t, ndim=2] U + cdef np.ndarray[DTYPE_t, ndim=1] S + cdef np.ndarray[DTYPE_t, ndim=2] Vh + + + if matrix_index in self.Q_inverse_dict: + Q_inverse = self.Q_inverse_dict[matrix_index] + else: + if matrix_index not in self.Q_svd_dict + U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], + full_matrices=False, compute_uv=True, + overwrite_a=False, check_finite=False) + self.Q_svd_dict[matrix_index] = (U,S,Vh) + else: + U,S,Vh = self.Q_svd_dict[matrix_index] + + Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T ) + self.Q_inverse_dict[matrix_index] = Q_inverse + + return Q_inverse + # def return_last(self): # """ # Function returns last available matrices. diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py index 65763a05..6ed2fbeb 100644 --- a/GPy/models/state_space_main.py +++ b/GPy/models/state_space_main.py @@ -12,6 +12,8 @@ import numpy as np import scipy as sp import scipy.linalg as linalg +import warnings + try: from . import state_space_setup setup_available = True @@ -41,6 +43,10 @@ if print_verbose: else: print("state_space: cython is NOT used") +# When debugging external module can set some value to this variable (e.g.) +# 'model' and in this module this variable can be seen.s +tmp_buffer = None + class Dynamic_Callables_Python(object): @@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class): self.R_square_root = {} def Rk(self, k): - return self.R[:, :, self.index[self.R_time_var_index, k]] + return self.R[:, :, int(self.index[self.R_time_var_index, k])] def dRk(self, k): if self.dR is None: @@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class): P: parameter for Jacobian, usually covariance matrix. """ - return self.H[:, :, self.index[self.H_time_var_index, k]] + return self.H[:, :, int(self.index[self.H_time_var_index, k])] def dHk(self, k): if self.dH is None: @@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace): self.v_dQk = None self.square_root_computed = False + self.Q_inverse_computed = False + self.Q_svd_computed = False # !!!Print statistics! Which object is created def f_a(self, k,m,A): @@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace): self.v_Qk = v_Qk self.v_dAk = v_dAk self.v_dQk = v_dQk + self.Q_square_root_computed = False + self.Q_inverse_computed = False + self.Q_svd_computed = False else: v_Ak = self.v_Ak v_Qk = self.v_Qk @@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace): self.last_k = 0 self.last_k_computed = False self.compute_derivatives = compute_derivatives + self.Q_square_root_computed = False - + self.Q_inverse_computed = False + self.Q_svd_computed = False + self.Q_eigen_computed = False return self def Ak(self,k,m,P): @@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace): def Q_srk(self,k): """ + Check square root, maybe rewriting for Spectral decomposition is needed. Square root of the noise matrix Q """ if ((self.last_k == k) and (self.last_k_computed == True)): if not self.Q_square_root_computed: - (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + if not self.Q_svd_computed: + (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + self.Q_svd = (U, S, Vh) + self.Q_svd_computed = True + else: + (U, S, Vh) = self.Q_svd + square_root = U * np.sqrt(S) self.square_root_computed = True self.Q_square_root = square_root @@ -2396,7 +2417,56 @@ class ContDescrStateSpace(DescreteStateSpace): raise ValueError("Square root of Q can not be computed") return square_root + + def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): + """ + Function inverts Q matrix and regularizes the inverse. + Regularization is useful when original matrix is badly conditioned. + Function is currently used only in SparseGP code. + + Inputs: + ------------------------------ + k: int + Iteration number. + + p_largest_cond_num: float + Largest condition value for the inverted matrix. If cond. number is smaller than that + no regularization happen. + + regularization_type: 1 or 2 + Regularization type. + + regularization_type: int (1 or 2) + + type 1: 1/(S[k] + regularizer) regularizer is computed + type 2: S[k]/(S^2[k] + regularizer) regularizer is computed + """ + + #import pdb; pdb.set_trace() + + if ((self.last_k == k) and (self.last_k_computed == True)): + if not self.Q_inverse_computed: + if not self.Q_svd_computed: + (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + self.Q_svd = (U, S, Vh) + self.Q_svd_computed = True + else: + (U, S, Vh) = self.Q_svd + Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type) + + self.Q_inverse_computed = True + self.Q_inverse_r = Q_inverse_r + + else: + Q_inverse_r = self.Q_inverse_r + else: + raise ValueError("""Inverse of Q can not be computed, because Q has not been computed. + This requires some programming""") + + return Q_inverse_r + + def return_last(self): """ Function returns last computed matrices. @@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace): (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) self.Q_svd_dict = {} + self.Q_square_root_dict = {} + self.Q_inverse_dict = {} + self.last_k = None # !!!Print statistics! Which object is created # !!!Print statistics! Print sizes of matrices @@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace): Square root of the noise matrix Q """ matrix_index = self.reconstruct_indices[k] - if matrix_index in self.Q_svd_dict: - square_root = self.Q_svd_dict[matrix_index] + if matrix_index in self.Q_square_root_dict: + square_root = self.Q_square_root_dict[matrix_index] else: - (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], + if matrix_index in self.Q_svd_dict: + (U, S, Vh) = self.Q_svd_dict[matrix_index] + else: + (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + self.Q_svd_dict[matrix_index] = (U,S,Vh) + square_root = U * np.sqrt(S) - self.Q_svd_dict[matrix_index] = square_root + self.Q_square_root_dict[matrix_index] = square_root return square_root + + def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): + """ + Function inverts Q matrix and regularizes the inverse. + Regularization is useful when original matrix is badly conditioned. + Function is currently used only in SparseGP code. + + Inputs: + ------------------------------ + k: int + Iteration number. + + p_largest_cond_num: float + Largest condition value for the inverted matrix. If cond. number is smaller than that + no regularization happen. + + regularization_type: 1 or 2 + Regularization type. + + regularization_type: int (1 or 2) + + type 1: 1/(S[k] + regularizer) regularizer is computed + type 2: S[k]/(S^2[k] + regularizer) regularizer is computed + """ + #import pdb; pdb.set_trace() + + matrix_index = self.reconstruct_indices[k] + if matrix_index in self.Q_inverse_dict: + Q_inverse_r = self.Q_inverse_dict[matrix_index] + else: + + if matrix_index in self.Q_svd_dict: + (U, S, Vh) = self.Q_svd_dict[matrix_index] + else: + (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], + full_matrices=False, compute_uv=True, + overwrite_a=False, check_finite=False) + self.Q_svd_dict[matrix_index] = (U,S,Vh) + + Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type) + self.Q_inverse_dict[matrix_index] = Q_inverse_r + return Q_inverse_r + + def return_last(self): """ Function returns last available matrices. @@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace): @classmethod def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False, grad_params_no=None, - P_inf=None, dP_inf=None, dF = None, dQc=None): + P_inf=None, dP_inf=None, dF = None, dQc=None, + dt0=None): """ Function return the object which is used in Kalman filter and/or smoother to obtain matrices A, Q and their derivatives for discrete model @@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace): threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time dt = np.empty((X.shape[0],)) dt[1:] = np.diff(X[:,0],axis=0) - dt[0] = 0#dt[1] + if dt0 is None: + dt[0] = 0#dt[1] + else: + if isinstance(dt0,str): + dt = dt[1:] + else: + dt[0] = dt0 + unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) number_unique_indices = len(unique_indices) @@ -3161,7 +3291,10 @@ class ContDescrStateSpace(DescreteStateSpace): x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) - + TODO: this function can be redone to "preprocess dataset", when + close time points are handeled properly (with rounding parameter) and + values are averaged accordingly. + Input: -------------- F,L: LTI SDE matrices of corresponding dimensions @@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace): n = F.shape[0] if not isinstance(dt, collections.Iterable): # not iterable, scalar - + #import pdb; pdb.set_trace() # The dynamical model A = matrix_exponent(F*dt) - if np.any( np.isnan(A)): - A = linalg.expm3(F*dt) # The covariance matrix Q by matrix fraction decomposition -> Phi = np.zeros((2*n,2*n)) @@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace): # The discrete-time dynamical model* if p==0: A = AA[:n,:n,p] - Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) - Q_noise = Q_noise_2 + Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T) + Q_noise = Q_noise_3 #PP = A.dot(P).dot(A.T) + Q_noise_2 # The derivatives of A and Q dA[:,:,p] = AA[n:,:n,p] - dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \ - - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times - + tmp = dA[:,:,p].dot(P_inf).dot(A.T) + dQ[:,:,p] = dP_inf[:,:,p] - tmp \ + - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T + + dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize else: dA = None dQ = None @@ -3282,7 +3415,7 @@ class ContDescrStateSpace(DescreteStateSpace): #Q_noise = Q_noise_1 - # Return + Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize return A, Q_noise,None, dA, dQ else: # iterable, array @@ -3486,4 +3619,4 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) - return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T + return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0 diff --git a/GPy/models/state_space_model.py b/GPy/models/state_space_model.py index 5d22c0fc..d16b5adc 100644 --- a/GPy/models/state_space_model.py +++ b/GPy/models/state_space_model.py @@ -1,6 +1,6 @@ # Copyright (c) 2013, Arno Solin. # Licensed under the BSD 3-clause license (see LICENSE.txt) -# +# # This implementation of converting GPs to state space models is based on the article: # # @article{Sarkka+Solin+Hartikainen:2013, @@ -23,7 +23,16 @@ from . import state_space_main as ssm from . import state_space_setup as ss_setup class StateSpace(Model): - def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'): + def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'): + """ + Inputs: + ------------------ + + balance: bool + Whether to balance or not the model as a whole + + """ + super(StateSpace, self).__init__(name=name) if len(X.shape) == 1: @@ -51,15 +60,16 @@ class StateSpace(Model): ss_setup.use_cython = use_cython #import pdb; pdb.set_trace() - + self.balance = balance + global ssm #from . import state_space_main as ssm if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython): reload(ssm) # Make sure the observations are ordered in time sort_index = np.argsort(X[:,0]) - self.X = X[sort_index] - self.Y = Y[sort_index] + self.X = X[sort_index,:] + self.Y = Y[sort_index,:] # Noise variance self.likelihood = likelihoods.Gaussian(variance=noise_var) @@ -86,11 +96,12 @@ class StateSpace(Model): #np.set_printoptions(16) #print(self.param_array) - #import pdb; pdb.set_trace() + # Get the model matrices from the kernel (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde() - + + # necessary parameters measurement_dim = self.output_dim grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter @@ -112,8 +123,9 @@ class StateSpace(Model): dR[:,:,-1] = np.eye(measurement_dim) # Balancing - #(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0) - + if self.balance: + (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0) + print("SSM parameters_changed balancing!") # Use the Kalman filter to evaluate the likelihood grad_calc_params = {} grad_calc_params['dP_inf'] = dP_inf @@ -125,7 +137,7 @@ class StateSpace(Model): kalman_filter_type = self.kalman_filter_type # The following code is required because sometimes the shapes of self.Y - # becomes 3D even though is must be 2D. The reason is undescovered. + # becomes 3D even though is must be 2D. The reason is undiscovered. Y = self.Y if self.ts_number is None: Y.shape = (self.num_data,1) @@ -146,7 +158,7 @@ class StateSpace(Model): if np.any( np.isfinite(grad_log_likelihood) == False): #import pdb; pdb.set_trace() - print("State-Space: NaN valkues in the grad_log_likelihood") + print("State-Space: NaN values in the grad_log_likelihood") #print(grad_log_likelihood) grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1) @@ -159,7 +171,7 @@ class StateSpace(Model): def log_likelihood(self): return self._log_marginal_likelihood - def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw): + def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw): """ Performs the actual prediction for new X points. Inner function. It is called only from inside this class. @@ -177,7 +189,10 @@ class StateSpace(Model): filteronly: bool Use only Kalman Filter for prediction. In this case the output does not coincide with corresponding Gaussian process. - + + balance: bool + Whether to balance or not the model as a whole + Output: -------------------- @@ -210,7 +225,12 @@ class StateSpace(Model): # Get the model matrices from the kernel (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde() state_dim = F.shape[0] - + + # Balancing + if (p_balance==True): + (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0) + print("SSM _raw_predict balancing!") + #Y = self.Y[:, 0,0] # Run the Kalman filter #import pdb; pdb.set_trace() @@ -261,10 +281,23 @@ class StateSpace(Model): # Return the posterior of the state return (m, V) - def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw): - + def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw): + """ + Inputs: + ------------------ + + balance: bool + Whether to balance or not the model as a whole + + """ + + if balance is None: + p_balance = self.balance + else: + p_balance = balance + # Run the Kalman filter to get the state - (m, V) = self._raw_predict(Xnew,filteronly=filteronly) + (m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance) # Add the noise variance to the state variance if include_likelihood: @@ -277,8 +310,22 @@ class StateSpace(Model): # Return mean and variance return m, V - def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw): - mu, var = self._raw_predict(Xnew) + def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw): + """ + Inputs: + ------------------ + + balance: bool + Whether to balance or not the model as a whole + + """ + if balance is None: + p_balance = self.balance + else: + p_balance = balance + + + mu, var = self._raw_predict(Xnew, p_balance=p_balance) #import pdb; pdb.set_trace() return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles] diff --git a/GPy/testing/gpy_kernels_state_space_tests.py b/GPy/testing/gpy_kernels_state_space_tests.py index c06093dd..1e48b168 100644 --- a/GPy/testing/gpy_kernels_state_space_tests.py +++ b/GPy/testing/gpy_kernels_state_space_tests.py @@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase): mean_compare_decimal=5, var_compare_decimal=5) def test_RBF_kernel(self,): + #import pdb;pdb.set_trace() + np.random.seed(234) # seed the random number generator (X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0, plot = False, points_num=50, x_interval = (0, 20), random=True) X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1) - ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,]) + ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10) gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,]) self.run_for_model(X, Y, ss_kernel, check_gradients=True, @@ -267,7 +269,7 @@ class StateSpaceKernelsTests(np.testing.TestCase): gp_kernel=gp_kernel, mean_compare_decimal=2, var_compare_decimal=2) except AssertionError: - raise SkipTest("Skipping Regular kalman filter for kernel addition, as it seems to be bugged for some python versions") + raise SkipTest("Skipping Regular kalman filter for kernel addition, because it is not stable (normal situation) for this data.") def test_kernel_multiplication(self,): @@ -436,7 +438,7 @@ if __name__ == "__main__": print("Running state-space inference tests...") unittest.main() - #tt = StateSpaceKernelsTests('test_periodic_kernel') + #tt = StateSpaceKernelsTests('test_RBF_kernel') #import pdb; pdb.set_trace() #tt.test_Matern32_kernel() #tt.test_Matern52_kernel()