This commit is contained in:
mzwiessele 2018-09-02 21:32:47 +01:00
commit dbaf9b868c
12 changed files with 410 additions and 822 deletions

View file

@ -1,57 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -1,64 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -1,135 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -1,178 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.wavelengths.gradient = gradients[1]
self.lengthscales.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.wavelengths
#self.lengthscales
N = 7 # approximation order
w0 = 2*np.pi/self.wavelengths # frequency
lengthscales = 2*self.lengthscales
[q2,dq2l] = seriescoeff(N,lengthscales,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.wavelengths
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

View file

@ -1,101 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -1,194 +0,0 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
try:
from scipy.linalg import solve_continuous_lyapunov as lyap
except ImportError:
from scipy.linalg import solve_lyapunov as lyap
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = lyap(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import warnings
from scipy import special as special from scipy import special as special
@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic):
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
""" """
# TODO: write comment to the constructor arguments
def __init__(self, *args, **kwargs):
"""
Init constructior.
Two optinal extra parameters are added in addition to the ones in
StdPeriodic kernel.
:param approx_order: approximation order for the RBF covariance. (Default 7)
:type approx_order: int
:param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing.
:type balance: bool
"""
#import pdb; pdb.set_trace()
if 'approx_order' in kwargs:
self.approx_order = kwargs.get('approx_order')
del kwargs['approx_order']
else:
self.approx_order = 7
if 'balance' in kwargs:
self.balance = bool( kwargs.get('balance') )
del kwargs['balance']
else:
self.balance = False
super(sde_StdPeriodic, self).__init__(*args, **kwargs)
def sde_update_gradient_full(self, gradients): def sde_update_gradient_full(self, gradients):
""" """
Update gradient in the order in which parameters are represented in the Update gradient in the order in which parameters are represented in the
@ -38,41 +71,48 @@ class sde_StdPeriodic(StdPeriodic):
def sde(self): def sde(self):
""" """
Return the state space representation of the covariance. Return the state space representation of the standard periodic covariance.
! Note: one must constrain lengthscale not to drop below 0.25. ! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order)
After this bessel functions of the first kind grows to very high. After this Bessel functions of the first becomes NaN. Rescaling
time variable might help.
! Note: one must keep wevelength also not very low. Because then ! Note: one must keep period also not very low. Because then
the gradients wrt wavelength become ustable. the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with However this might depend on the data. For test example with
300 data points the low limit is 0.15. 300 data points the low limit is 0.15.
""" """
#import pdb; pdb.set_trace()
# Params to use: (in that order) # Params to use: (in that order)
#self.variance #self.variance
#self.period #self.period
#self.lengthscale #self.lengthscale
N = 7 # approximation order if self.approx_order is not None:
N = int(self.approx_order)
else:
N = 7 # approximation order
p_period = float(self.period)
p_lengthscale = 2*float(self.lengthscale)
p_variance = float(self.variance)
w0 = 2*np.pi/self.period # frequency w0 = 2*np.pi/p_period # frequency
lengthscale = 2*self.lengthscale # lengthscale is multiplied by 2 because of different definition of lengthscale
[q2,dq2l] = seriescoeff(N,lengthscale,self.variance) [q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l dq2l = 2*dq2l # This is because the lengthscale if multiplied by 2.
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
eps = 1e-12
if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps):
warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") )
if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps):
warnings.warn("sde_Periodic: Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") )
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) ) F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1)) L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1))) Qc = np.zeros((2*(N+1), 2*(N+1)))
@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic):
# Derivatives wrt self.variance # Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape) dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape) dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance dP_inf[:,:,0] = P_inf / p_variance
# Derivatives self.period # Derivatives self.period
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period ); dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / p_period );
dQc[:,:,1] = np.zeros(Qc.shape) dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape) dP_inf[:,:,1] = np.zeros(P_inf.shape)
@ -100,7 +140,12 @@ class sde_StdPeriodic(StdPeriodic):
dQc[:,:,2] = np.zeros(Qc.shape) dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2)) dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy() dP0 = dP_inf.copy()
if self.balance:
# Benefits of this are not very sound.
import GPy.models.state_space_main as ssm
(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 )
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0) return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2)) coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False): if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!") raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0] coeffs[0] = 0.5*coeffs[0]
#print(coeffs)
# Derivatives wrt (lengthScale) # Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1) coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\ coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) ) (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl return coeffs.squeeze(), coeffs_dl.squeeze()

View file

@ -15,6 +15,7 @@ try:
from scipy.linalg import solve_continuous_lyapunov as lyap from scipy.linalg import solve_continuous_lyapunov as lyap
except ImportError: except ImportError:
from scipy.linalg import solve_lyapunov as lyap from scipy.linalg import solve_lyapunov as lyap
import warnings
class sde_RBF(RBF): class sde_RBF(RBF):
""" """
@ -29,6 +30,37 @@ class sde_RBF(RBF):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
""" """
def __init__(self, *args, **kwargs):
"""
Init constructior.
Two optinal extra parameters are added in addition to the ones in
RBF kernel.
:param approx_order: approximation order for the RBF covariance. (Default 10)
:type approx_order: int
:param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing.
:type balance: bool
"""
if 'balance' in kwargs:
self.balance = bool( kwargs.get('balance') )
del kwargs['balance']
else:
self.balance = True
if 'approx_order' in kwargs:
self.approx_order = kwargs.get('approx_order')
del kwargs['approx_order']
else:
self.approx_order = 6
super(sde_RBF, self).__init__(*args, **kwargs)
def sde_update_gradient_full(self, gradients): def sde_update_gradient_full(self, gradients):
""" """
Update gradient in the order in which parameters are represented in the Update gradient in the order in which parameters are represented in the
@ -41,23 +73,43 @@ class sde_RBF(RBF):
def sde(self): def sde(self):
""" """
Return the state space representation of the covariance. Return the state space representation of the covariance.
Note! For Sparse GP inference too small or two high values of lengthscale
lead to instabilities. This is because Qc are too high or too low
and P_inf are not full rank. This effect depends on approximatio order.
For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted.
N=6: (0.06,31)
Variance should be within reasonable bounds as well, but its dependence is linear.
The above facts do not take into accout regularization.
""" """
#import pdb; pdb.set_trace()
N = 10# approximation order ( number of terms in exponent series expansion) if self.approx_order is not None:
N = self.approx_order
else:
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6 roots_rounding_decimals = 6
fn = np.math.factorial(N) fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2 p_lengthscale = float( self.lengthscale )
p_variance = float(self.variance)
kappa = 1.0/2.0/p_lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) )
eps = 1e-12
if (float(Qc) > 1.0/eps) or (float(Qc) < eps):
warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small.
It influece conditioning of P_inf: {0:e}""".format(float(Qc)) )
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp) pp = sp.poly1d(pp1)
roots = sp.roots(pp) roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0] neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
@ -83,17 +135,17 @@ class sde_RBF(RBF):
# Derivatives: # Derivatives:
dFvariance = np.zeros(F.shape) dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape) dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1) dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance dQcvariance = Qc/p_variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),))) dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),))
dPinf_variance = Pinf/self.variance dPinf_variance = Pinf/p_variance
lp = Pinf.shape[0] lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2 coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0 coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale dF[:,:,1] = dFlengthscale
@ -105,10 +157,11 @@ class sde_RBF(RBF):
P0 = Pinf.copy() P0 = Pinf.copy()
dP0 = dPinf.copy() dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case: if self.balance:
# SVD Kalman + RBF kernel # Benefits of this are not very sound. Helps only in one case:
import GPy.models.state_space_main as ssm # SVD Kalman + RBF kernel
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 ) import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {} self.Q_svd_dict = {}
self.Q_square_root_dict = {}
self.Q_inverse_dict = {}
self.last_k = 0 self.last_k = 0
# !!!Print statistics! Which object is created # !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print sizes of matrices
@ -477,19 +479,54 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
cdef np.ndarray[DTYPE_t, ndim=2] U cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_svd_dict: if matrix_index in self.Q_square_root_dict:
square_root = self.Q_svd_dict[matrix_index] square_root = self.Q_square_root_dict[matrix_index]
else: else:
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], if matrix_index not in self.Q_svd_dict
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True, full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False) overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
else:
U,S,Vh = self.Q_svd_dict[matrix_index]
square_root = U * np.sqrt(S) square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root self.Q_suqare_root_dict[matrix_index] = square_root
return square_root return square_root
cpdef Q_inverse(self, int k, float jitter=0.0):
"""
Square root of the noise matrix Q
"""
cdef int matrix_index = <int>self.reconstruct_indices[k]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_inverse_dict:
Q_inverse = self.Q_inverse_dict[matrix_index]
else:
if matrix_index not in self.Q_svd_dict
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
else:
U,S,Vh = self.Q_svd_dict[matrix_index]
Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T )
self.Q_inverse_dict[matrix_index] = Q_inverse
return Q_inverse
# def return_last(self): # def return_last(self):
# """ # """
# Function returns last available matrices. # Function returns last available matrices.

View file

@ -12,6 +12,8 @@ import numpy as np
import scipy as sp import scipy as sp
import scipy.linalg as linalg import scipy.linalg as linalg
import warnings
try: try:
from . import state_space_setup from . import state_space_setup
setup_available = True setup_available = True
@ -41,6 +43,10 @@ if print_verbose:
else: else:
print("state_space: cython is NOT used") print("state_space: cython is NOT used")
# When debugging external module can set some value to this variable (e.g.)
# 'model' and in this module this variable can be seen.s
tmp_buffer = None
class Dynamic_Callables_Python(object): class Dynamic_Callables_Python(object):
@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class):
self.R_square_root = {} self.R_square_root = {}
def Rk(self, k): def Rk(self, k):
return self.R[:, :, self.index[self.R_time_var_index, k]] return self.R[:, :, int(self.index[self.R_time_var_index, k])]
def dRk(self, k): def dRk(self, k):
if self.dR is None: if self.dR is None:
@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class):
P: parameter for Jacobian, usually covariance matrix. P: parameter for Jacobian, usually covariance matrix.
""" """
return self.H[:, :, self.index[self.H_time_var_index, k]] return self.H[:, :, int(self.index[self.H_time_var_index, k])]
def dHk(self, k): def dHk(self, k):
if self.dH is None: if self.dH is None:
@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace):
self.v_dQk = None self.v_dQk = None
self.square_root_computed = False self.square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
# !!!Print statistics! Which object is created # !!!Print statistics! Which object is created
def f_a(self, k,m,A): def f_a(self, k,m,A):
@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace):
self.v_Qk = v_Qk self.v_Qk = v_Qk
self.v_dAk = v_dAk self.v_dAk = v_dAk
self.v_dQk = v_dQk self.v_dQk = v_dQk
self.Q_square_root_computed = False self.Q_square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
else: else:
v_Ak = self.v_Ak v_Ak = self.v_Ak
v_Qk = self.v_Qk v_Qk = self.v_Qk
@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace):
self.last_k = 0 self.last_k = 0
self.last_k_computed = False self.last_k_computed = False
self.compute_derivatives = compute_derivatives self.compute_derivatives = compute_derivatives
self.Q_square_root_computed = False self.Q_square_root_computed = False
self.Q_inverse_computed = False
self.Q_svd_computed = False
self.Q_eigen_computed = False
return self return self
def Ak(self,k,m,P): def Ak(self,k,m,P):
@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace):
def Q_srk(self,k): def Q_srk(self,k):
""" """
Check square root, maybe rewriting for Spectral decomposition is needed.
Square root of the noise matrix Q Square root of the noise matrix Q
""" """
if ((self.last_k == k) and (self.last_k_computed == True)): if ((self.last_k == k) and (self.last_k_computed == True)):
if not self.Q_square_root_computed: if not self.Q_square_root_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) if not self.Q_svd_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
self.Q_svd = (U, S, Vh)
self.Q_svd_computed = True
else:
(U, S, Vh) = self.Q_svd
square_root = U * np.sqrt(S) square_root = U * np.sqrt(S)
self.square_root_computed = True self.square_root_computed = True
self.Q_square_root = square_root self.Q_square_root = square_root
@ -2396,7 +2417,56 @@ class ContDescrStateSpace(DescreteStateSpace):
raise ValueError("Square root of Q can not be computed") raise ValueError("Square root of Q can not be computed")
return square_root return square_root
def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
"""
Function inverts Q matrix and regularizes the inverse.
Regularization is useful when original matrix is badly conditioned.
Function is currently used only in SparseGP code.
Inputs:
------------------------------
k: int
Iteration number.
p_largest_cond_num: float
Largest condition value for the inverted matrix. If cond. number is smaller than that
no regularization happen.
regularization_type: 1 or 2
Regularization type.
regularization_type: int (1 or 2)
type 1: 1/(S[k] + regularizer) regularizer is computed
type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
"""
#import pdb; pdb.set_trace()
if ((self.last_k == k) and (self.last_k_computed == True)):
if not self.Q_inverse_computed:
if not self.Q_svd_computed:
(U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
self.Q_svd = (U, S, Vh)
self.Q_svd_computed = True
else:
(U, S, Vh) = self.Q_svd
Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
self.Q_inverse_computed = True
self.Q_inverse_r = Q_inverse_r
else:
Q_inverse_r = self.Q_inverse_r
else:
raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
This requires some programming""")
return Q_inverse_r
def return_last(self): def return_last(self):
""" """
Function returns last computed matrices. Function returns last computed matrices.
@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace):
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {} self.Q_svd_dict = {}
self.Q_square_root_dict = {}
self.Q_inverse_dict = {}
self.last_k = None self.last_k = None
# !!!Print statistics! Which object is created # !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print sizes of matrices
@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace):
Square root of the noise matrix Q Square root of the noise matrix Q
""" """
matrix_index = self.reconstruct_indices[k] matrix_index = self.reconstruct_indices[k]
if matrix_index in self.Q_svd_dict: if matrix_index in self.Q_square_root_dict:
square_root = self.Q_svd_dict[matrix_index] square_root = self.Q_square_root_dict[matrix_index]
else: else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], if matrix_index in self.Q_svd_dict:
(U, S, Vh) = self.Q_svd_dict[matrix_index]
else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True, full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False) overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
square_root = U * np.sqrt(S) square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root self.Q_square_root_dict[matrix_index] = square_root
return square_root return square_root
def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
"""
Function inverts Q matrix and regularizes the inverse.
Regularization is useful when original matrix is badly conditioned.
Function is currently used only in SparseGP code.
Inputs:
------------------------------
k: int
Iteration number.
p_largest_cond_num: float
Largest condition value for the inverted matrix. If cond. number is smaller than that
no regularization happen.
regularization_type: 1 or 2
Regularization type.
regularization_type: int (1 or 2)
type 1: 1/(S[k] + regularizer) regularizer is computed
type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
"""
#import pdb; pdb.set_trace()
matrix_index = self.reconstruct_indices[k]
if matrix_index in self.Q_inverse_dict:
Q_inverse_r = self.Q_inverse_dict[matrix_index]
else:
if matrix_index in self.Q_svd_dict:
(U, S, Vh) = self.Q_svd_dict[matrix_index]
else:
(U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
self.Q_svd_dict[matrix_index] = (U,S,Vh)
Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
self.Q_inverse_dict[matrix_index] = Q_inverse_r
return Q_inverse_r
def return_last(self): def return_last(self):
""" """
Function returns last available matrices. Function returns last available matrices.
@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace):
@classmethod @classmethod
def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False, def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
grad_params_no=None, grad_params_no=None,
P_inf=None, dP_inf=None, dF = None, dQc=None): P_inf=None, dP_inf=None, dF = None, dQc=None,
dt0=None):
""" """
Function return the object which is used in Kalman filter and/or Function return the object which is used in Kalman filter and/or
smoother to obtain matrices A, Q and their derivatives for discrete model smoother to obtain matrices A, Q and their derivatives for discrete model
@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace):
threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
dt = np.empty((X.shape[0],)) dt = np.empty((X.shape[0],))
dt[1:] = np.diff(X[:,0],axis=0) dt[1:] = np.diff(X[:,0],axis=0)
dt[0] = 0#dt[1] if dt0 is None:
dt[0] = 0#dt[1]
else:
if isinstance(dt0,str):
dt = dt[1:]
else:
dt[0] = dt0
unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
number_unique_indices = len(unique_indices) number_unique_indices = len(unique_indices)
@ -3161,7 +3291,10 @@ class ContDescrStateSpace(DescreteStateSpace):
x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1})
TODO: this function can be redone to "preprocess dataset", when
close time points are handeled properly (with rounding parameter) and
values are averaged accordingly.
Input: Input:
-------------- --------------
F,L: LTI SDE matrices of corresponding dimensions F,L: LTI SDE matrices of corresponding dimensions
@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace):
n = F.shape[0] n = F.shape[0]
if not isinstance(dt, collections.Iterable): # not iterable, scalar if not isinstance(dt, collections.Iterable): # not iterable, scalar
#import pdb; pdb.set_trace()
# The dynamical model # The dynamical model
A = matrix_exponent(F*dt) A = matrix_exponent(F*dt)
if np.any( np.isnan(A)):
A = linalg.expm3(F*dt)
# The covariance matrix Q by matrix fraction decomposition -> # The covariance matrix Q by matrix fraction decomposition ->
Phi = np.zeros((2*n,2*n)) Phi = np.zeros((2*n,2*n))
@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace):
# The discrete-time dynamical model* # The discrete-time dynamical model*
if p==0: if p==0:
A = AA[:n,:n,p] A = AA[:n,:n,p]
Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T)
Q_noise = Q_noise_2 Q_noise = Q_noise_3
#PP = A.dot(P).dot(A.T) + Q_noise_2 #PP = A.dot(P).dot(A.T) + Q_noise_2
# The derivatives of A and Q # The derivatives of A and Q
dA[:,:,p] = AA[n:,:n,p] dA[:,:,p] = AA[n:,:n,p]
dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \ tmp = dA[:,:,p].dot(P_inf).dot(A.T)
- A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times dQ[:,:,p] = dP_inf[:,:,p] - tmp \
- A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
else: else:
dA = None dA = None
dQ = None dQ = None
@ -3282,7 +3415,7 @@ class ContDescrStateSpace(DescreteStateSpace):
#Q_noise = Q_noise_1 #Q_noise = Q_noise_1
# Return Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
return A, Q_noise,None, dA, dQ return A, Q_noise,None, dA, dQ
else: # iterable, array else: # iterable, array
@ -3486,4 +3619,4 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
# (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0

View file

@ -1,6 +1,6 @@
# Copyright (c) 2013, Arno Solin. # Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
# #
# This implementation of converting GPs to state space models is based on the article: # This implementation of converting GPs to state space models is based on the article:
# #
# @article{Sarkka+Solin+Hartikainen:2013, # @article{Sarkka+Solin+Hartikainen:2013,
@ -23,7 +23,16 @@ from . import state_space_main as ssm
from . import state_space_setup as ss_setup from . import state_space_setup as ss_setup
class StateSpace(Model): class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'): def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'):
"""
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
super(StateSpace, self).__init__(name=name) super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1: if len(X.shape) == 1:
@ -51,15 +60,16 @@ class StateSpace(Model):
ss_setup.use_cython = use_cython ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
self.balance = balance
global ssm global ssm
#from . import state_space_main as ssm #from . import state_space_main as ssm
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython): if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
reload(ssm) reload(ssm)
# Make sure the observations are ordered in time # Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0]) sort_index = np.argsort(X[:,0])
self.X = X[sort_index] self.X = X[sort_index,:]
self.Y = Y[sort_index] self.Y = Y[sort_index,:]
# Noise variance # Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var) self.likelihood = likelihoods.Gaussian(variance=noise_var)
@ -86,11 +96,12 @@ class StateSpace(Model):
#np.set_printoptions(16) #np.set_printoptions(16)
#print(self.param_array) #print(self.param_array)
#import pdb; pdb.set_trace()
# Get the model matrices from the kernel # Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde() (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters # necessary parameters
measurement_dim = self.output_dim measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -112,8 +123,9 @@ class StateSpace(Model):
dR[:,:,-1] = np.eye(measurement_dim) dR[:,:,-1] = np.eye(measurement_dim)
# Balancing # Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0) if self.balance:
(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
print("SSM parameters_changed balancing!")
# Use the Kalman filter to evaluate the likelihood # Use the Kalman filter to evaluate the likelihood
grad_calc_params = {} grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf grad_calc_params['dP_inf'] = dP_inf
@ -125,7 +137,7 @@ class StateSpace(Model):
kalman_filter_type = self.kalman_filter_type kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y # The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered. # becomes 3D even though is must be 2D. The reason is undiscovered.
Y = self.Y Y = self.Y
if self.ts_number is None: if self.ts_number is None:
Y.shape = (self.num_data,1) Y.shape = (self.num_data,1)
@ -146,7 +158,7 @@ class StateSpace(Model):
if np.any( np.isfinite(grad_log_likelihood) == False): if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood") print("State-Space: NaN values in the grad_log_likelihood")
#print(grad_log_likelihood) #print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1) grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
@ -159,7 +171,7 @@ class StateSpace(Model):
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw): def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw):
""" """
Performs the actual prediction for new X points. Performs the actual prediction for new X points.
Inner function. It is called only from inside this class. Inner function. It is called only from inside this class.
@ -177,7 +189,10 @@ class StateSpace(Model):
filteronly: bool filteronly: bool
Use only Kalman Filter for prediction. In this case the output does Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process. not coincide with corresponding Gaussian process.
balance: bool
Whether to balance or not the model as a whole
Output: Output:
-------------------- --------------------
@ -210,7 +225,12 @@ class StateSpace(Model):
# Get the model matrices from the kernel # Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde() (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0] state_dim = F.shape[0]
# Balancing
if (p_balance==True):
(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
print("SSM _raw_predict balancing!")
#Y = self.Y[:, 0,0] #Y = self.Y[:, 0,0]
# Run the Kalman filter # Run the Kalman filter
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
@ -261,10 +281,23 @@ class StateSpace(Model):
# Return the posterior of the state # Return the posterior of the state
return (m, V) return (m, V)
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw): def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw):
"""
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
if balance is None:
p_balance = self.balance
else:
p_balance = balance
# Run the Kalman filter to get the state # Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly) (m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance)
# Add the noise variance to the state variance # Add the noise variance to the state variance
if include_likelihood: if include_likelihood:
@ -277,8 +310,22 @@ class StateSpace(Model):
# Return mean and variance # Return mean and variance
return m, V return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw): def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw):
mu, var = self._raw_predict(Xnew) """
Inputs:
------------------
balance: bool
Whether to balance or not the model as a whole
"""
if balance is None:
p_balance = self.balance
else:
p_balance = balance
mu, var = self._raw_predict(Xnew, p_balance=p_balance)
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles] return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]

View file

@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase):
mean_compare_decimal=5, var_compare_decimal=5) mean_compare_decimal=5, var_compare_decimal=5)
def test_RBF_kernel(self,): def test_RBF_kernel(self,):
#import pdb;pdb.set_trace()
np.random.seed(234) # seed the random number generator np.random.seed(234) # seed the random number generator
(X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0, (X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
plot = False, points_num=50, x_interval = (0, 20), random=True) plot = False, points_num=50, x_interval = (0, 20), random=True)
X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1) X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)
ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,]) ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10)
gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,]) gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,])
self.run_for_model(X, Y, ss_kernel, check_gradients=True, self.run_for_model(X, Y, ss_kernel, check_gradients=True,
@ -267,7 +269,7 @@ class StateSpaceKernelsTests(np.testing.TestCase):
gp_kernel=gp_kernel, gp_kernel=gp_kernel,
mean_compare_decimal=2, var_compare_decimal=2) mean_compare_decimal=2, var_compare_decimal=2)
except AssertionError: except AssertionError:
raise SkipTest("Skipping Regular kalman filter for kernel addition, as it seems to be bugged for some python versions") raise SkipTest("Skipping Regular kalman filter for kernel addition, because it is not stable (normal situation) for this data.")
def test_kernel_multiplication(self,): def test_kernel_multiplication(self,):
@ -436,7 +438,7 @@ if __name__ == "__main__":
print("Running state-space inference tests...") print("Running state-space inference tests...")
unittest.main() unittest.main()
#tt = StateSpaceKernelsTests('test_periodic_kernel') #tt = StateSpaceKernelsTests('test_RBF_kernel')
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
#tt.test_Matern32_kernel() #tt.test_Matern32_kernel()
#tt.test_Matern52_kernel() #tt.test_Matern52_kernel()