mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 11:32:39 +02:00
ENH: Adding SDE representation of addition, sumation and standard periodic kernel.
All changes have been tested tests are added in later commits.
This commit is contained in:
parent
00e95f957d
commit
06a7fedd22
8 changed files with 477 additions and 82 deletions
|
|
@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
|
||||||
|
|
||||||
import GPy.models.state_space_new as SS_new
|
import GPy.models.state_space_new as SS_new
|
||||||
|
|
||||||
X = np.linspace(0, 10, 4000)[:, None]
|
X = np.linspace(0, 10, 2000)[:, None]
|
||||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
||||||
|
|
||||||
# Need to run these lines when X and Y are imported ->
|
# Need to run these lines when X and Y are imported ->
|
||||||
|
|
@ -23,14 +23,14 @@ Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
||||||
#plt.plot( X, Y)
|
#plt.plot( X, Y)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
|
||||||
#kernel = GPy.kern.Matern32(X.shape[1])
|
kernel = GPy.kern.Matern32(X.shape[1])
|
||||||
#m = GPy.models.StateSpace(X,Y, kernel)
|
m = GPy.models.StateSpace(X,Y, kernel)
|
||||||
|
|
||||||
|
print m
|
||||||
#
|
#
|
||||||
#print m
|
m.optimize(optimizer='bfgs',messages=True)
|
||||||
##
|
#
|
||||||
#m.optimize(optimizer='bfgs',messages=True)
|
print m
|
||||||
##
|
|
||||||
#print m
|
|
||||||
|
|
||||||
kernel1 = GPy.kern.Matern32(X.shape[1])
|
kernel1 = GPy.kern.Matern32(X.shape[1])
|
||||||
m1 = GPy.models.GPRegression(X,Y, kernel1)
|
m1 = GPy.models.GPRegression(X,Y, kernel1)
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,14 @@ class sde_Matern32(Matern32):
|
||||||
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
def sde_update_gradient_full(self, gradients):
|
||||||
|
"""
|
||||||
|
Update gradient in the order in which parameters are represented in the
|
||||||
|
kernel
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.variance.gradient = gradients[0]
|
||||||
|
self.lengthscale.gradient = gradients[1]
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -28,6 +36,7 @@ class sde_Matern32(Matern32):
|
||||||
|
|
||||||
variance = float(self.variance.values)
|
variance = float(self.variance.values)
|
||||||
lengthscale = float(self.lengthscale.values)
|
lengthscale = float(self.lengthscale.values)
|
||||||
|
|
||||||
foo = np.sqrt(3.)/lengthscale
|
foo = np.sqrt(3.)/lengthscale
|
||||||
F = np.array([[0, 1], [-foo**2, -2*foo]])
|
F = np.array([[0, 1], [-foo**2, -2*foo]])
|
||||||
L = np.array([[0], [1]])
|
L = np.array([[0], [1]])
|
||||||
|
|
@ -71,16 +80,125 @@ class sde_Matern52(Matern52):
|
||||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
def sde_update_gradient_full(self, gradients):
|
||||||
|
"""
|
||||||
|
Update gradient in the order in which parameters are represented in the
|
||||||
|
kernel
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.variance.gradient = gradients[0]
|
||||||
|
self.lengthscale.gradient = gradients[1]
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
Return the state space representation of the covariance.
|
Return the state space representation of the covariance.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Arno, insert your code here
|
variance = float(self.variance.values)
|
||||||
|
lengthscale = float(self.lengthscale.values)
|
||||||
|
|
||||||
# Params to use:
|
lamda = np.sqrt(5.0)/lengthscale
|
||||||
# self.lengthscale
|
kappa = 5.0/3.0*variance/lengthscale**2
|
||||||
# self.variance
|
|
||||||
|
|
||||||
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
|
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
|
||||||
|
L = np.array(((0,),(0,),(1,)))
|
||||||
|
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
|
||||||
|
H = np.array(((1,0,0),))
|
||||||
|
|
||||||
|
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
|
||||||
|
|
||||||
|
# Allocate space for the derivatives
|
||||||
|
dF = np.empty((3,3,2))
|
||||||
|
dQc = np.empty((1,1,2))
|
||||||
|
dPinf = np.empty((3,3,2))
|
||||||
|
|
||||||
|
# The partial derivatives
|
||||||
|
dFvariance = np.zeros((3,3))
|
||||||
|
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
|
||||||
|
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
|
||||||
|
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
|
||||||
|
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
|
||||||
|
|
||||||
|
dPinf_variance = Pinf/variance
|
||||||
|
kappa2 = -2.0*kappa/lengthscale
|
||||||
|
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
|
||||||
|
0,-100*variance/lengthscale**5)))
|
||||||
|
# Combine the derivatives
|
||||||
|
dF[:,:,0] = dFvariance
|
||||||
|
dF[:,:,1] = dFlengthscale
|
||||||
|
dQc[:,:,0] = dQcvariance
|
||||||
|
dQc[:,:,1] = dQclengthscale
|
||||||
|
dPinf[:,:,0] = dPinf_variance
|
||||||
|
dPinf[:,:,1] = dPinf_lengthscale
|
||||||
|
|
||||||
|
# % Derivative of F w.r.t. parameter magnSigma2
|
||||||
|
# dFmagnSigma2 = [0, 0, 0;
|
||||||
|
# 0, 0, 0;
|
||||||
|
# 0, 0, 0];
|
||||||
|
#
|
||||||
|
# % Derivative of F w.r.t parameter lengthScale
|
||||||
|
# dFlengthScale = [0, 0, 0;
|
||||||
|
# 0, 0, 0;
|
||||||
|
# 15*sqrt(5)/lengthScale^4, 30/lengthScale^3, 3*sqrt(5)/lengthScale^2];
|
||||||
|
#
|
||||||
|
# % Derivative of Qc w.r.t. parameter magnSigma2
|
||||||
|
# dQcmagnSigma2 = 400*sqrt(5)/3/lengthScale^5;
|
||||||
|
#
|
||||||
|
# % Derivative of Qc w.r.t. parameter lengthScale
|
||||||
|
# dQclengthScale = -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
|
||||||
|
#
|
||||||
|
# % Derivative of Pinf w.r.t. parameter magnSigma2
|
||||||
|
# dPinfmagnSigma2 = Pinf/magnSigma2;
|
||||||
|
#
|
||||||
|
# % Derivative of Pinf w.r.t. parameter lengthScale
|
||||||
|
# kappa2 = -2*kappa/lengthScale;
|
||||||
|
# dPinflengthScale = [0, 0, -kappa2;
|
||||||
|
# 0, kappa2, 0;
|
||||||
|
# -kappa2, 0, -100*magnSigma2/lengthScale^5];
|
||||||
|
#
|
||||||
|
# % Stack all derivatives
|
||||||
|
# dF = zeros(3,3,2);
|
||||||
|
# dQc = zeros(1,1,2);
|
||||||
|
# dPinf = zeros(3,3,2);
|
||||||
|
#
|
||||||
|
# dF(:,:,1) = dFmagnSigma2;
|
||||||
|
# dF(:,:,2) = dFlengthScale;
|
||||||
|
# dQc(:,:,1) = dQcmagnSigma2;
|
||||||
|
# dQc(:,:,2) = dQclengthScale;
|
||||||
|
# dPinf(:,:,1) = dPinfmagnSigma2;
|
||||||
|
# dPinf(:,:,2) = dPinflengthScale;
|
||||||
|
|
||||||
|
# % Derived constants
|
||||||
|
# lambda = sqrt(5)/lengthScale;
|
||||||
|
#
|
||||||
|
# % Feedback matrix
|
||||||
|
# F = [ 0, 1, 0;
|
||||||
|
# 0, 0, 1;
|
||||||
|
# -lambda^3, -3*lambda^2, -3*lambda];
|
||||||
|
#
|
||||||
|
# % Noise effect matrix
|
||||||
|
# L = [0; 0; 1];
|
||||||
|
#
|
||||||
|
# % Spectral density
|
||||||
|
# Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
|
||||||
|
#
|
||||||
|
# % Observation model
|
||||||
|
# H = [1, 0, 0];
|
||||||
|
|
||||||
|
|
||||||
|
# %% Stationary covariance
|
||||||
|
#
|
||||||
|
# % Calculate Pinf only if requested
|
||||||
|
# if nargout > 4,
|
||||||
|
#
|
||||||
|
# % Derived constant
|
||||||
|
# kappa = 5/3*magnSigma2/lengthScale^2;
|
||||||
|
#
|
||||||
|
# % Stationary covariance
|
||||||
|
# Pinf = [magnSigma2, 0, -kappa;
|
||||||
|
# 0, kappa, 0;
|
||||||
|
# -kappa, 0, 25*magnSigma2/lengthScale^4];
|
||||||
|
#
|
||||||
|
# end
|
||||||
|
|
||||||
|
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
|
||||||
|
|
@ -6,6 +6,9 @@ Stochastic Differential Equation (SDE) functionality.
|
||||||
from .standard_periodic import StdPeriodic
|
from .standard_periodic import StdPeriodic
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
|
||||||
|
from scipy import special as special
|
||||||
|
|
||||||
class sde_StdPeriodic(StdPeriodic):
|
class sde_StdPeriodic(StdPeriodic):
|
||||||
"""
|
"""
|
||||||
|
|
@ -21,20 +24,153 @@ class sde_StdPeriodic(StdPeriodic):
|
||||||
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
def sde_update_gradient_full(self, gradients):
|
||||||
|
"""
|
||||||
|
Update gradient in the order in which parameters are represented in the
|
||||||
|
kernel
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.variance.gradient = gradients[0]
|
||||||
|
self.wavelengths.gradient = gradients[1]
|
||||||
|
self.lengthscales.gradient = gradients[2]
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
Return the state space representation of the covariance.
|
Return the state space representation of the covariance.
|
||||||
|
|
||||||
|
|
||||||
|
! Note: one must constrain lengthscale not to drop below 0.25.
|
||||||
|
After this bessel functions of the first kind grows to very high.
|
||||||
|
|
||||||
|
! Note: one must keep wevelength also not very low. Because then
|
||||||
|
the gradients wrt wavelength become ustable.
|
||||||
|
However this might depend on the data. For test example with
|
||||||
|
300 data points the low limit is 0.15.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Arno, insert your code here
|
# Params to use: (in that order)
|
||||||
|
|
||||||
# Params to use:
|
|
||||||
#self.variance
|
#self.variance
|
||||||
#self.wavelengths
|
#self.wavelengths
|
||||||
#self.lengthscales
|
#self.lengthscales
|
||||||
|
N = 7 # approximation order
|
||||||
|
|
||||||
# Arno, you could visualize the Latex version of the kernel formula
|
|
||||||
# and assume inputs are 1D, so no ARD is used. Then use parameters aboove.
|
|
||||||
|
|
||||||
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
|
w0 = 2*np.pi/self.wavelengths # frequency
|
||||||
|
|
||||||
|
[q2,dq2l] = seriescoeff(N,2*self.lengthscales,self.variance)
|
||||||
|
# lengthscale is multiplied by 2 because of slightly different
|
||||||
|
# formula for periodic covariance function.
|
||||||
|
# For the same reason:
|
||||||
|
|
||||||
|
dq2l = 2*dq2l
|
||||||
|
|
||||||
|
if np.any( np.isnan(q2)):
|
||||||
|
raise ValueError("SDE periodic covariance error1")
|
||||||
|
|
||||||
|
if np.any( np.isnan(dq2l)):
|
||||||
|
raise ValueError("SDE periodic covariance error1")
|
||||||
|
|
||||||
|
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
|
||||||
|
L = np.eye(2*(N+1))
|
||||||
|
Qc = np.zeros((2*(N+1), 2*(N+1)))
|
||||||
|
P_inf = np.kron(np.diag(q2),np.eye(2))
|
||||||
|
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
|
||||||
|
|
||||||
|
|
||||||
|
# Derivatives
|
||||||
|
dF = np.empty((F.shape[0], F.shape[1], 3))
|
||||||
|
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
|
||||||
|
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
|
||||||
|
|
||||||
|
# Derivatives wrt self.variance
|
||||||
|
dF[:,:,0] = np.zeros(F.shape)
|
||||||
|
dQc[:,:,0] = np.zeros(Qc.shape)
|
||||||
|
dP_inf[:,:,0] = P_inf / self.variance
|
||||||
|
|
||||||
|
# Derivatives self.wavelengths
|
||||||
|
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
|
||||||
|
dQc[:,:,1] = np.zeros(Qc.shape)
|
||||||
|
dP_inf[:,:,1] = np.zeros(P_inf.shape)
|
||||||
|
|
||||||
|
# Derivatives self.lengthscales
|
||||||
|
dF[:,:,2] = np.zeros(F.shape)
|
||||||
|
dQc[:,:,2] = np.zeros(Qc.shape)
|
||||||
|
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
|
||||||
|
|
||||||
|
|
||||||
|
return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
|
||||||
|
"""
|
||||||
|
Calculate the coefficients q_j^2 for the covariance function
|
||||||
|
approximation:
|
||||||
|
|
||||||
|
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
|
||||||
|
|
||||||
|
Reference is:
|
||||||
|
|
||||||
|
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
|
||||||
|
covariance functions and state space models. In Proceedings of the
|
||||||
|
Seventeenth International Conference on Artifcial Intelligence and
|
||||||
|
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
|
||||||
|
|
||||||
|
Note! Only the infinite approximation (through Bessel function)
|
||||||
|
is currently implemented.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
----------------
|
||||||
|
|
||||||
|
m: int
|
||||||
|
Degree of approximation. Default 6.
|
||||||
|
lengthScale: float
|
||||||
|
Length scale parameter in the kerenl
|
||||||
|
magnSigma2:float
|
||||||
|
Multiplier in front of the kernel.
|
||||||
|
|
||||||
|
|
||||||
|
Output:
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
coeffs: array(m+1)
|
||||||
|
Covariance series coefficients
|
||||||
|
|
||||||
|
coeffs_dl: array(m+1)
|
||||||
|
Derivatives of the coefficients with respect to lengthscale.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if true_covariance:
|
||||||
|
|
||||||
|
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
|
||||||
|
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
|
||||||
|
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
|
||||||
|
|
||||||
|
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
|
||||||
|
|
||||||
|
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
|
||||||
|
(lengthScale**(-2))**J *magnSigma2
|
||||||
|
|
||||||
|
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
|
||||||
|
|
||||||
|
coeffs = np.sum(coeffs,0)
|
||||||
|
|
||||||
|
else:
|
||||||
|
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
|
||||||
|
if np.any( np.isnan(coeffs)):
|
||||||
|
pass
|
||||||
|
coeffs[0] = 0.5*coeffs[0]
|
||||||
|
|
||||||
|
# Derivatives wrt (lengthScale)
|
||||||
|
coeffs_dl = np.zeros(m+1)
|
||||||
|
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
|
||||||
|
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
|
||||||
|
|
||||||
|
# The first element
|
||||||
|
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
|
||||||
|
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
|
||||||
|
|
||||||
|
|
||||||
|
return coeffs, coeffs_dl
|
||||||
|
|
|
||||||
|
|
@ -263,4 +263,82 @@ class Add(CombinationKernel):
|
||||||
i_s[k._all_dims_active] += k.input_sensitivity(summarize)
|
i_s[k._all_dims_active] += k.input_sensitivity(summarize)
|
||||||
return i_s
|
return i_s
|
||||||
else:
|
else:
|
||||||
|
|
||||||
return super(Add, self).input_sensitivity(summarize)
|
return super(Add, self).input_sensitivity(summarize)
|
||||||
|
|
||||||
|
def sde_update_gradient_full(self, gradients):
|
||||||
|
"""
|
||||||
|
Update gradient in the order in which parameters are represented in the
|
||||||
|
kernel
|
||||||
|
"""
|
||||||
|
part_start_param_index = 0
|
||||||
|
for p in self.parts:
|
||||||
|
if not p.is_fixed:
|
||||||
|
part_param_num = len(p.param_array) # number of parameters in the part
|
||||||
|
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
|
||||||
|
part_start_param_index += part_param_num
|
||||||
|
|
||||||
|
def sde(self):
|
||||||
|
"""
|
||||||
|
Support adding kernels for sde representation
|
||||||
|
"""
|
||||||
|
|
||||||
|
import scipy.linalg as la
|
||||||
|
|
||||||
|
F = None
|
||||||
|
L = None
|
||||||
|
Qc = None
|
||||||
|
H = None
|
||||||
|
Pinf = None
|
||||||
|
dF = None
|
||||||
|
dQc = None
|
||||||
|
dPinf = None
|
||||||
|
n = 0
|
||||||
|
nq = 0
|
||||||
|
nd = 0
|
||||||
|
|
||||||
|
# Assign models
|
||||||
|
for p in self.parts:
|
||||||
|
(Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
|
||||||
|
F = la.block_diag(F,Ft) if (F is not None) else Ft
|
||||||
|
L = la.block_diag(L,Lt) if (L is not None) else Lt
|
||||||
|
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
|
||||||
|
H = np.hstack((H,Ht)) if (H is not None) else Ht
|
||||||
|
|
||||||
|
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
|
||||||
|
|
||||||
|
if dF is not None:
|
||||||
|
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
|
||||||
|
'constant', constant_values=0)
|
||||||
|
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
|
||||||
|
else:
|
||||||
|
dF = dFt
|
||||||
|
|
||||||
|
if dQc is not None:
|
||||||
|
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
|
||||||
|
'constant', constant_values=0)
|
||||||
|
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
|
||||||
|
else:
|
||||||
|
dQc = dQct
|
||||||
|
|
||||||
|
if dPinf is not None:
|
||||||
|
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
|
||||||
|
'constant', constant_values=0)
|
||||||
|
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
|
||||||
|
else:
|
||||||
|
dPinf = dPinft
|
||||||
|
|
||||||
|
n += Ft.shape[0]
|
||||||
|
nq += Qct.shape[0]
|
||||||
|
nd += dFt.shape[2]
|
||||||
|
|
||||||
|
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
|
||||||
|
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
|
||||||
|
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
|
||||||
|
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
|
||||||
|
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
|
||||||
|
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
|
||||||
|
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
|
||||||
|
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
|
||||||
|
|
||||||
|
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
|
||||||
|
|
|
||||||
|
|
@ -305,51 +305,6 @@ class Kern(Parameterized):
|
||||||
def _check_active_dims(self, X):
|
def _check_active_dims(self, X):
|
||||||
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
|
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
|
||||||
|
|
||||||
def sde(self):
|
|
||||||
# TODO: should support adding kernels together
|
|
||||||
|
|
||||||
#raise NameError('Problem')
|
|
||||||
|
|
||||||
# Find out state dimensions
|
|
||||||
n = 0
|
|
||||||
nq = 0
|
|
||||||
nd = 0
|
|
||||||
for p in self.parts:
|
|
||||||
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde()
|
|
||||||
n += F.shape[0]
|
|
||||||
nq += Qc.shape[0]
|
|
||||||
nd += dF.shape[2]
|
|
||||||
|
|
||||||
# Allocate space for the matrices
|
|
||||||
F = np.zeros((n,n))
|
|
||||||
L = np.zeros((n,nq))
|
|
||||||
Qc = np.zeros((nq,nq))
|
|
||||||
H = np.zeros((1,n))
|
|
||||||
Pinf = np.zeros((n,n))
|
|
||||||
dF = np.zeros((n,n,nd))
|
|
||||||
dQc = np.zeros((nq,nq,nd))
|
|
||||||
dPinf = np.zeros((n,n,nd))
|
|
||||||
n = 0
|
|
||||||
nq = 0
|
|
||||||
nd = 0
|
|
||||||
|
|
||||||
# Assign models
|
|
||||||
for p in self.parts:
|
|
||||||
(Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
|
|
||||||
F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft
|
|
||||||
L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt
|
|
||||||
Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct
|
|
||||||
H[0,n:n+Ht.shape[1]] = Ht
|
|
||||||
Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft
|
|
||||||
dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt
|
|
||||||
dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct
|
|
||||||
dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft
|
|
||||||
n += Ft.shape[0]
|
|
||||||
nq += Qct.shape[0]
|
|
||||||
nd += dFt.shape[2]
|
|
||||||
|
|
||||||
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
|
|
||||||
|
|
||||||
class CombinationKernel(Kern):
|
class CombinationKernel(Kern):
|
||||||
"""
|
"""
|
||||||
Abstract super class for combination kernels.
|
Abstract super class for combination kernels.
|
||||||
|
|
|
||||||
|
|
@ -105,3 +105,110 @@ class Prod(CombinationKernel):
|
||||||
return i_s
|
return i_s
|
||||||
else:
|
else:
|
||||||
return super(Prod, self).input_sensitivity(summarize)
|
return super(Prod, self).input_sensitivity(summarize)
|
||||||
|
|
||||||
|
def sde_update_gradient_full(self, gradients):
|
||||||
|
"""
|
||||||
|
Update gradient in the order in which parameters are represented in the
|
||||||
|
kernel
|
||||||
|
"""
|
||||||
|
part_start_param_index = 0
|
||||||
|
for p in self.parts:
|
||||||
|
if not p.is_fixed:
|
||||||
|
part_param_num = len(p.param_array) # number of parameters in the part
|
||||||
|
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
|
||||||
|
part_start_param_index += part_param_num
|
||||||
|
|
||||||
|
def sde(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
F = np.array((0,), ndmin=2)
|
||||||
|
L = np.array((1,), ndmin=2)
|
||||||
|
Qc = np.array((1,), ndmin=2)
|
||||||
|
H = np.array((1,), ndmin=2)
|
||||||
|
Pinf = np.array((1,), ndmin=2)
|
||||||
|
dF = None
|
||||||
|
dQc = None
|
||||||
|
dPinf = None
|
||||||
|
|
||||||
|
# Assign models
|
||||||
|
for p in self.parts:
|
||||||
|
(Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
|
||||||
|
|
||||||
|
# check derivative dimensions ->
|
||||||
|
number_of_parameters = len(p.param_array)
|
||||||
|
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
|
||||||
|
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
|
||||||
|
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
|
||||||
|
# check derivative dimensions <-
|
||||||
|
|
||||||
|
# exception for periodic kernel
|
||||||
|
if (p.name == 'std_periodic'):
|
||||||
|
Qct = P_inft
|
||||||
|
dQct = dP_inft
|
||||||
|
|
||||||
|
dF = dkron(F,dF,Ft,dFt,'sum')
|
||||||
|
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
|
||||||
|
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
|
||||||
|
|
||||||
|
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
|
||||||
|
L = np.kron(L,Lt)
|
||||||
|
Qc = np.kron(Qc,Qct)
|
||||||
|
Pinf = np.kron(Pinf,P_inft)
|
||||||
|
H = np.kron(H,Ht)
|
||||||
|
|
||||||
|
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
|
||||||
|
|
||||||
|
def dkron(A,dA,B,dB, operation='prod'):
|
||||||
|
"""
|
||||||
|
Function computes the derivative of Kronecker product A*B
|
||||||
|
(or Kronecker sum A+B).
|
||||||
|
|
||||||
|
Input:
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
A: 2D matrix
|
||||||
|
Some matrix
|
||||||
|
dA: 3D (or 2D matrix)
|
||||||
|
Derivarives of A
|
||||||
|
B: 2D matrix
|
||||||
|
Some matrix
|
||||||
|
dB: 3D (or 2D matrix)
|
||||||
|
Derivarives of B
|
||||||
|
|
||||||
|
operation: str 'prod' or 'sum'
|
||||||
|
Which operation is considered. If the operation is 'sum' it is assumed
|
||||||
|
that A and are square matrices.s
|
||||||
|
|
||||||
|
Output:
|
||||||
|
dC: 3D matrix
|
||||||
|
Derivative of Kronecker product A*B (or Kronecker sum A+B)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if dA is None:
|
||||||
|
dA_param_num = 0
|
||||||
|
dA = np.zeros((A.shape[0], A.shape[1],1))
|
||||||
|
else:
|
||||||
|
dA_param_num = dA.shape[2]
|
||||||
|
|
||||||
|
if dB is None:
|
||||||
|
dB_param_num = 0
|
||||||
|
dB = np.zeros((B.shape[0], B.shape[1],1))
|
||||||
|
else:
|
||||||
|
dB_param_num = dB.shape[2]
|
||||||
|
|
||||||
|
# Space allocation for derivative matrix
|
||||||
|
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
|
||||||
|
|
||||||
|
for k in range(dA_param_num):
|
||||||
|
if operation == 'prod':
|
||||||
|
dC[:,:,k] = np.kron(dA[:,:,k],B);
|
||||||
|
else:
|
||||||
|
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
|
||||||
|
|
||||||
|
for k in range(dB_param_num):
|
||||||
|
if operation == 'prod':
|
||||||
|
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
|
||||||
|
else:
|
||||||
|
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
|
||||||
|
|
||||||
|
return dC
|
||||||
|
|
|
||||||
|
|
@ -723,7 +723,7 @@ class DescreteStateSpace(object):
|
||||||
v*v / S)
|
v*v / S)
|
||||||
log_likelihood_update = log_likelihood_update[0,0] # to make int
|
log_likelihood_update = log_likelihood_update[0,0] # to make int
|
||||||
if np.isnan(log_likelihood_update):
|
if np.isnan(log_likelihood_update):
|
||||||
pass
|
raise ValueError("Errrrr 1")
|
||||||
LL = None; islower = None
|
LL = None; islower = None
|
||||||
else:
|
else:
|
||||||
LL,islower = linalg.cho_factor(S)
|
LL,islower = linalg.cho_factor(S)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import pylab as pb
|
||||||
from GPy.core.parameterization.param import Param
|
from GPy.core.parameterization.param import Param
|
||||||
|
|
||||||
import GPy
|
import GPy
|
||||||
|
from .. import likelihoods
|
||||||
import GPy.models.state_space_main as ssm
|
import GPy.models.state_space_main as ssm
|
||||||
#import state_space_main as ssm
|
#import state_space_main as ssm
|
||||||
reload(ssm)
|
reload(ssm)
|
||||||
|
|
@ -45,8 +46,7 @@ class StateSpace(Model):
|
||||||
self.Y = Y[sort_index]
|
self.Y = Y[sort_index]
|
||||||
|
|
||||||
# Noise variance
|
# Noise variance
|
||||||
self.sigma2 = Param('Gaussian_noise', sigma2)
|
self.likelihood = likelihoods.Gaussian()
|
||||||
self.link_parameter(self.sigma2)
|
|
||||||
|
|
||||||
# Default kernel
|
# Default kernel
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
|
|
@ -54,8 +54,8 @@ class StateSpace(Model):
|
||||||
else:
|
else:
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
self.link_parameter(self.kern)
|
self.link_parameter(self.kern)
|
||||||
|
self.link_parameter(self.likelihood)
|
||||||
self.sigma2.constrain_positive()
|
self.posterior = None
|
||||||
|
|
||||||
# Assert that the kernel is supported
|
# Assert that the kernel is supported
|
||||||
if not hasattr(self.kern, 'sde'):
|
if not hasattr(self.kern, 'sde'):
|
||||||
|
|
@ -98,7 +98,7 @@ class StateSpace(Model):
|
||||||
grad_calc_params['dR'] = dR
|
grad_calc_params['dR'] = dR
|
||||||
|
|
||||||
(filter_means, filter_covs, log_likelihood,
|
(filter_means, filter_covs, log_likelihood,
|
||||||
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.sigma2,P_inf,self.X,self.Y,m_init=None,
|
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,self.Gaussian_noise.variance,P_inf,self.X,self.Y,m_init=None,
|
||||||
P_init=None, calc_log_likelihood=True,
|
P_init=None, calc_log_likelihood=True,
|
||||||
calc_grad_log_likelihood=True,
|
calc_grad_log_likelihood=True,
|
||||||
grad_params_no=grad_params_no,
|
grad_params_no=grad_params_no,
|
||||||
|
|
@ -106,8 +106,9 @@ class StateSpace(Model):
|
||||||
|
|
||||||
self._log_marginal_likelihood = log_likelihood
|
self._log_marginal_likelihood = log_likelihood
|
||||||
#gradients = self.compute_gradients()
|
#gradients = self.compute_gradients()
|
||||||
self.sigma2.gradient_full[:] = grad_log_likelihood[-1,0]
|
self.likelihood.update_gradients(grad_log_likelihood[-1,0])
|
||||||
self.kern.gradient_full[:] = grad_log_likelihood[:-1,0]
|
|
||||||
|
self.kern.sde_update_gradient_full(grad_log_likelihood[:-1,0])
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self._log_marginal_likelihood
|
return self._log_marginal_likelihood
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue