ENH: Adding SDE representation of addition, sumation and standard periodic kernel.

All changes have been tested tests are added in later commits.
This commit is contained in:
Alexander Grigorievskiy 2015-05-19 14:03:12 +03:00
parent 00e95f957d
commit 06a7fedd22
8 changed files with 477 additions and 82 deletions

View file

@ -20,7 +20,15 @@ class sde_Matern32(Matern32):
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
@ -28,6 +36,7 @@ class sde_Matern32(Matern32):
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
@ -71,16 +80,125 @@ class sde_Matern52(Matern52):
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
# Arno, insert your code here
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
# Params to use:
# self.lengthscale
# self.variance
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
# % Derivative of F w.r.t. parameter magnSigma2
# dFmagnSigma2 = [0, 0, 0;
# 0, 0, 0;
# 0, 0, 0];
#
# % Derivative of F w.r.t parameter lengthScale
# dFlengthScale = [0, 0, 0;
# 0, 0, 0;
# 15*sqrt(5)/lengthScale^4, 30/lengthScale^3, 3*sqrt(5)/lengthScale^2];
#
# % Derivative of Qc w.r.t. parameter magnSigma2
# dQcmagnSigma2 = 400*sqrt(5)/3/lengthScale^5;
#
# % Derivative of Qc w.r.t. parameter lengthScale
# dQclengthScale = -magnSigma2*2000*sqrt(5)/3/lengthScale^6;
#
# % Derivative of Pinf w.r.t. parameter magnSigma2
# dPinfmagnSigma2 = Pinf/magnSigma2;
#
# % Derivative of Pinf w.r.t. parameter lengthScale
# kappa2 = -2*kappa/lengthScale;
# dPinflengthScale = [0, 0, -kappa2;
# 0, kappa2, 0;
# -kappa2, 0, -100*magnSigma2/lengthScale^5];
#
# % Stack all derivatives
# dF = zeros(3,3,2);
# dQc = zeros(1,1,2);
# dPinf = zeros(3,3,2);
#
# dF(:,:,1) = dFmagnSigma2;
# dF(:,:,2) = dFlengthScale;
# dQc(:,:,1) = dQcmagnSigma2;
# dQc(:,:,2) = dQclengthScale;
# dPinf(:,:,1) = dPinfmagnSigma2;
# dPinf(:,:,2) = dPinflengthScale;
# % Derived constants
# lambda = sqrt(5)/lengthScale;
#
# % Feedback matrix
# F = [ 0, 1, 0;
# 0, 0, 1;
# -lambda^3, -3*lambda^2, -3*lambda];
#
# % Noise effect matrix
# L = [0; 0; 1];
#
# % Spectral density
# Qc = magnSigma2*400*sqrt(5)/3/lengthScale^5;
#
# % Observation model
# H = [1, 0, 0];
# %% Stationary covariance
#
# % Calculate Pinf only if requested
# if nargout > 4,
#
# % Derived constant
# kappa = 5/3*magnSigma2/lengthScale^2;
#
# % Stationary covariance
# Pinf = [magnSigma2, 0, -kappa;
# 0, kappa, 0;
# -kappa, 0, 25*magnSigma2/lengthScale^4];
#
# end
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -6,6 +6,9 @@ Stochastic Differential Equation (SDE) functionality.
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
@ -21,20 +24,153 @@ class sde_StdPeriodic(StdPeriodic):
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.wavelengths.gradient = gradients[1]
self.lengthscales.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Arno, insert your code here
# Params to use:
# Params to use: (in that order)
#self.variance
#self.wavelengths
#self.lengthscales
N = 7 # approximation order
# Arno, you could visualize the Latex version of the kernel formula
# and assume inputs are 1D, so no ARD is used. Then use parameters aboove.
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
w0 = 2*np.pi/self.wavelengths # frequency
[q2,dq2l] = seriescoeff(N,2*self.lengthscales,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isnan(q2)):
raise ValueError("SDE periodic covariance error1")
if np.any( np.isnan(dq2l)):
raise ValueError("SDE periodic covariance error1")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.wavelengths
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
return (F, L, Qc, H, P_inf, dF, dQc, dP_inf)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isnan(coeffs)):
pass
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

View file

@ -263,4 +263,82 @@ class Add(CombinationKernel):
i_s[k._all_dims_active] += k.input_sensitivity(summarize)
return i_s
else:
return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
L = None
Qc = None
H = None
Pinf = None
dF = None
dQc = None
dPinf = None
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
F = la.block_diag(F,Ft) if (F is not None) else Ft
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)

View file

@ -305,51 +305,6 @@ class Kern(Parameterized):
def _check_active_dims(self, X):
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
def sde(self):
# TODO: should support adding kernels together
#raise NameError('Problem')
# Find out state dimensions
n = 0
nq = 0
nd = 0
for p in self.parts:
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = p.sde()
n += F.shape[0]
nq += Qc.shape[0]
nd += dF.shape[2]
# Allocate space for the matrices
F = np.zeros((n,n))
L = np.zeros((n,nq))
Qc = np.zeros((nq,nq))
H = np.zeros((1,n))
Pinf = np.zeros((n,n))
dF = np.zeros((n,n,nd))
dQc = np.zeros((nq,nq,nd))
dPinf = np.zeros((n,n,nd))
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,dFt,dQct,dPinft) = p.sde()
F[n:n+Ft.shape[0],n:n+Ft.shape[1]] = Ft
L[n:n+Lt.shape[0],nq:nq+Lt.shape[1]] = Lt
Qc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1]] = Qct
H[0,n:n+Ht.shape[1]] = Ht
Pinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1]] = Pinft
dF[n:n+Ft.shape[0],n:n+Ft.shape[1],nd:nd+dFt.shape[2]] = dFt
dQc[nq:nq+Qct.shape[0],nq:nq+Qct.shape[1],nd:nd+dQct.shape[2]] = dQct
dPinf[n:n+Pinft.shape[0],n:n+Pinft.shape[1],nd:nd+dPinft.shape[2]] = dPinft
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
class CombinationKernel(Kern):
"""
Abstract super class for combination kernels.

View file

@ -105,3 +105,110 @@ class Prod(CombinationKernel):
return i_s
else:
return super(Prod, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
"""
F = np.array((0,), ndmin=2)
L = np.array((1,), ndmin=2)
Qc = np.array((1,), ndmin=2)
H = np.array((1,), ndmin=2)
Pinf = np.array((1,), ndmin=2)
dF = None
dQc = None
dPinf = None
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft,dFt,dQct,dP_inft) = p.sde()
# check derivative dimensions ->
number_of_parameters = len(p.param_array)
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
# check derivative dimensions <-
# exception for periodic kernel
if (p.name == 'std_periodic'):
Qct = P_inft
dQct = dP_inft
dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft)
H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,dF,dQc,dPinf)
def dkron(A,dA,B,dB, operation='prod'):
"""
Function computes the derivative of Kronecker product A*B
(or Kronecker sum A+B).
Input:
-----------------------
A: 2D matrix
Some matrix
dA: 3D (or 2D matrix)
Derivarives of A
B: 2D matrix
Some matrix
dB: 3D (or 2D matrix)
Derivarives of B
operation: str 'prod' or 'sum'
Which operation is considered. If the operation is 'sum' it is assumed
that A and are square matrices.s
Output:
dC: 3D matrix
Derivative of Kronecker product A*B (or Kronecker sum A+B)
"""
if dA is None:
dA_param_num = 0
dA = np.zeros((A.shape[0], A.shape[1],1))
else:
dA_param_num = dA.shape[2]
if dB is None:
dB_param_num = 0
dB = np.zeros((B.shape[0], B.shape[1],1))
else:
dB_param_num = dB.shape[2]
# Space allocation for derivative matrix
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
for k in range(dA_param_num):
if operation == 'prod':
dC[:,:,k] = np.kron(dA[:,:,k],B);
else:
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
for k in range(dB_param_num):
if operation == 'prod':
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
else:
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
return dC