mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-03 08:42:39 +02:00
Added kernels for GpGrid and GpSsm regression
This commit is contained in:
parent
9a2670b98e
commit
3d346cbdd6
9 changed files with 641 additions and 1 deletions
|
|
@ -0,0 +1,113 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
# This implementation of converting GPs to state space models is based on the article:
|
||||
|
||||
#@article{Gilboa:2015,
|
||||
# title={Scaling multidimensional inference for structured Gaussian processes},
|
||||
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
|
||||
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
|
||||
# volume={37},
|
||||
# number={2},
|
||||
# pages={424--436},
|
||||
# year={2015},
|
||||
# publisher={IEEE}
|
||||
#}
|
||||
|
||||
from grid_posterior import GridPosterior
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class GaussianGridInference(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian and inputs are on a grid.
|
||||
|
||||
The function self.inference returns a GridPosterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def kron_mvprod(self, A, b):
|
||||
x = b
|
||||
N = 1
|
||||
D = len(A)
|
||||
G = np.zeros((D,1))
|
||||
for d in xrange(0, D):
|
||||
G[d] = len(A[d])
|
||||
N = np.prod(G)
|
||||
for d in xrange(D-1, -1, -1):
|
||||
X = np.reshape(x, (G[d], round(N/G[d])), order='F')
|
||||
Z = np.dot(A[d], X)
|
||||
Z = Z.T
|
||||
x = np.reshape(Z, (-1, 1), order='F')
|
||||
return x
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
|
||||
"""
|
||||
Returns a GridPosterior class containing essential quantities of the posterior
|
||||
"""
|
||||
N = X.shape[0] #number of training points
|
||||
D = X.shape[1] #number of dimensions
|
||||
|
||||
Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension
|
||||
Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension
|
||||
QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension
|
||||
V_kron = 1 # kronecker product of eigenvalues
|
||||
|
||||
# retrieve the one-dimensional variation of the designated kernel
|
||||
oneDkernel = kern.getOneDimensionalKernel(D)
|
||||
|
||||
for d in xrange(D):
|
||||
xg = list(set(X[:,d])) #extract unique values for a dimension
|
||||
xg = np.reshape(xg, (len(xg), 1))
|
||||
oneDkernel.lengthscale = kern.lengthscale[d]
|
||||
Kds[d] = oneDkernel.K(xg)
|
||||
[V, Q] = np.linalg.eig(Kds[d])
|
||||
V_kron = np.kron(V_kron, V)
|
||||
Qs[d] = Q
|
||||
QTs[d] = Q.T
|
||||
|
||||
noise = likelihood.variance + 1e-8
|
||||
|
||||
alpha_kron = self.kron_mvprod(QTs, Y)
|
||||
V_kron = V_kron.reshape(-1, 1)
|
||||
alpha_kron = alpha_kron / (V_kron + noise)
|
||||
alpha_kron = self.kron_mvprod(Qs, alpha_kron)
|
||||
|
||||
log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi)
|
||||
|
||||
# compute derivatives wrt parameters Thete
|
||||
derivs = np.zeros(D+2, dtype='object')
|
||||
for t in xrange(len(derivs)):
|
||||
dKd_dTheta = np.zeros(D, dtype='object')
|
||||
gamma = np.zeros(D, dtype='object')
|
||||
gam = 1
|
||||
for d in xrange(D):
|
||||
xg = list(set(X[:,d]))
|
||||
xg = np.reshape(xg, (len(xg), 1))
|
||||
oneDkernel.lengthscale = kern.lengthscale[d]
|
||||
if t < D:
|
||||
dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale
|
||||
elif (t == D):
|
||||
dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance
|
||||
else:
|
||||
dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise
|
||||
gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d]))
|
||||
gam = np.kron(gam, gamma[d])
|
||||
|
||||
gam = gam.reshape(-1,1)
|
||||
kappa = self.kron_mvprod(dKd_dTheta, alpha_kron)
|
||||
derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise))
|
||||
|
||||
# separate derivatives
|
||||
dL_dLen = derivs[:D]
|
||||
dL_dVar = derivs[D]
|
||||
dL_dThetaL = derivs[D+1]
|
||||
|
||||
return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
# This implementation of converting GPs to state space models is based on the article:
|
||||
|
||||
#@article{Gilboa:2015,
|
||||
# title={Scaling multidimensional inference for structured Gaussian processes},
|
||||
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
|
||||
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
|
||||
# volume={37},
|
||||
# number={2},
|
||||
# pages={424--436},
|
||||
# year={2015},
|
||||
# publisher={IEEE}
|
||||
#}
|
||||
|
||||
from ssm_posterior import SsmPosterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
import math as mt
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GaussianSSMInference(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
pass#self._YYTfactor_cache = caching.cache()
|
||||
|
||||
def get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LL^T = YY^T.
|
||||
|
||||
Note that L may have fewer columns than Y, else L=Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>D):
|
||||
return Y
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
order = kern.order
|
||||
K = X.shape[0]
|
||||
log_likelihood = 0
|
||||
results = np.zeros((K,4),dtype=object)
|
||||
H = np.zeros((1,order))
|
||||
H[0][0] = 1
|
||||
v_0 = kern.Phi_of_r(-1)
|
||||
mu_0 = np.zeros((order, 1))
|
||||
noise_var = likelihood.variance + 1e-8
|
||||
|
||||
# carry out forward filtering
|
||||
for t in range(K):
|
||||
if (t == 0):
|
||||
prior_m = np.dot(H,mu_0)
|
||||
prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
|
||||
|
||||
log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
|
||||
|
||||
kalman_gain = np.dot(v_0, H.T) / prior_v
|
||||
mu = mu_0 + kalman_gain*(Y[0] - prior_m)
|
||||
|
||||
|
||||
V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
|
||||
results[0][0] = mu
|
||||
results[0][1] = V
|
||||
else:
|
||||
delta = X[t] - X[t-1]
|
||||
Q = kern.Q_of_r(delta)
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
P = np.dot(np.dot(Phi, V), Phi.T) + Q
|
||||
PhiMu = np.dot(Phi, mu)
|
||||
prior_m = np.dot(H, PhiMu)
|
||||
prior_v = np.dot(np.dot(H, P), H.T) + noise_var
|
||||
|
||||
log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
|
||||
log_likelihood += log_likelihood_i
|
||||
|
||||
kalman_gain = np.dot(P, H.T)/prior_v
|
||||
mu = PhiMu + kalman_gain*(Y[t] - prior_m)
|
||||
V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
|
||||
|
||||
results[t-1][2] = Phi
|
||||
results[t-1][3] = P
|
||||
results[t][0] = mu
|
||||
results[t][1] = V
|
||||
|
||||
# carry out backwards smoothing
|
||||
|
||||
W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
|
||||
|
||||
mu_s = results[K-1][0]
|
||||
V_s = results[K-1][1]
|
||||
|
||||
posterior_mean = np.zeros((K,1))
|
||||
posterior_var = np.zeros((K,1))
|
||||
E = np.zeros((K,4), dtype='object')
|
||||
|
||||
posterior_mean[K-1] = np.dot(H, mu_s)
|
||||
posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
|
||||
E[K-1][0] = mu_s
|
||||
E[K-1][1] = V_s
|
||||
|
||||
for t in range(K-2, -1, -1):
|
||||
mu = results[t][0]
|
||||
V = results[t][1]
|
||||
Phi = results[t][2]
|
||||
P = results[t][3]
|
||||
|
||||
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
|
||||
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
|
||||
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
|
||||
posterior_mean[t] = np.dot(H, mu_s)
|
||||
posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
|
||||
|
||||
if (t < K-2):
|
||||
W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
|
||||
|
||||
E[t][0] = mu_s
|
||||
E[t][1] = V_s
|
||||
E[t][2] = L
|
||||
E[t][3] = W
|
||||
|
||||
return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
|
||||
62
GPy/inference/latent_function_inference/grid_posterior.py
Normal file
62
GPy/inference/latent_function_inference/grid_posterior.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
import numpy as np
|
||||
|
||||
class GridPosterior(object):
|
||||
"""
|
||||
Specially intended for the Grid Regression case
|
||||
An object to represent a Gaussian posterior over latent function values, p(f|D).
|
||||
|
||||
The purpose of this class is to serve as an interface between the inference
|
||||
schemes and the model classes.
|
||||
|
||||
"""
|
||||
def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None):
|
||||
"""
|
||||
alpha_kron :
|
||||
QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices
|
||||
Qs : eigen vectors resulting from decomposition of single dimension covariance matrices
|
||||
V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices
|
||||
"""
|
||||
|
||||
if ((alpha_kron is not None) and (QTs is not None)
|
||||
and (Qs is not None) and (V_kron is not None)):
|
||||
pass # we have sufficient to compute the posterior
|
||||
else:
|
||||
raise ValueError("insufficient information for predictions")
|
||||
|
||||
self._alpha_kron = alpha_kron
|
||||
self._qTs = QTs
|
||||
self._qs = Qs
|
||||
self._v_kron = V_kron
|
||||
|
||||
@property
|
||||
def alpha(self):
|
||||
"""
|
||||
"""
|
||||
return self._alpha_kron
|
||||
|
||||
@property
|
||||
def QTs(self):
|
||||
"""
|
||||
array of transposed eigenvectors resulting for single dimension covariance
|
||||
"""
|
||||
return self._qTs
|
||||
|
||||
@property
|
||||
def Qs(self):
|
||||
"""
|
||||
array of eigenvectors resulting for single dimension covariance
|
||||
"""
|
||||
return self._qs
|
||||
|
||||
@property
|
||||
def V_kron(self):
|
||||
"""
|
||||
kronecker product of eigenvalues s
|
||||
"""
|
||||
return self._v_kron
|
||||
|
||||
73
GPy/inference/latent_function_inference/ssm_posterior.py
Normal file
73
GPy/inference/latent_function_inference/ssm_posterior.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
import numpy as np
|
||||
|
||||
class SsmPosterior(object):
|
||||
"""
|
||||
Specially intended for the SSM Regression case
|
||||
An object to represent a Gaussian posterior over latent function values, p(f|D).
|
||||
|
||||
The purpose of this class is to serve as an interface between the inference
|
||||
schemes and the model classes.
|
||||
|
||||
"""
|
||||
def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
|
||||
"""
|
||||
mu_f : mean values predicted during kalman filtering step
|
||||
var_f : variance predicted during the kalman filtering step
|
||||
mu_s : mean values predicted during backwards smoothing step
|
||||
var_s : variance predicted during backwards smoothing step
|
||||
expectations : posterior expectations
|
||||
"""
|
||||
|
||||
if ((mu_f is not None) and (V_f is not None) and
|
||||
(mu_s is not None) and (V_s is not None) and
|
||||
(expectations is not None)):
|
||||
pass # we have sufficient to compute the posterior
|
||||
else:
|
||||
raise ValueError("insufficient information to compute predictions")
|
||||
|
||||
self._mu_f = mu_f
|
||||
self._V_f = V_f
|
||||
self._mu_s = mu_s
|
||||
self._V_s = V_s
|
||||
self._expectations = expectations
|
||||
|
||||
@property
|
||||
def mu_f(self):
|
||||
"""
|
||||
Mean values predicted during kalman filtering step mean
|
||||
"""
|
||||
return self._mu_f
|
||||
|
||||
@property
|
||||
def V_f(self):
|
||||
"""
|
||||
Variance predicted during the kalman filtering step
|
||||
"""
|
||||
return self._V_f
|
||||
|
||||
@property
|
||||
def mu_s(self):
|
||||
"""
|
||||
Mean values predicted during kalman backwards smoothin mean
|
||||
"""
|
||||
return self._mu_s
|
||||
|
||||
@property
|
||||
def V_s(self):
|
||||
"""
|
||||
Variance predicted during backwards smoothing step
|
||||
"""
|
||||
return self._V_s
|
||||
|
||||
@property
|
||||
def expectations(self):
|
||||
"""
|
||||
Posterior expectations
|
||||
"""
|
||||
return self._expectations
|
||||
|
||||
|
|
@ -29,7 +29,6 @@ from .src.splitKern import SplitKern,DEtime
|
|||
from .src.splitKern import DEtime as DiffGenomeKern
|
||||
from .src.spline import Spline
|
||||
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
|
||||
from .src.sde_matern import sde_Matern32
|
||||
from .src.sde_matern import sde_Matern52
|
||||
from .src.sde_linear import sde_Linear
|
||||
|
|
@ -37,3 +36,5 @@ from .src.sde_standard_periodic import sde_StdPeriodic
|
|||
from .src.sde_static import sde_White, sde_Bias
|
||||
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
|
||||
from .src.sde_brownian import sde_Brownian
|
||||
from .src.ssmKerns import Matern32_SSM
|
||||
from .src.gridKerns import GridRBF
|
||||
|
|
|
|||
76
GPy/kern/src/gridKerns.py
Normal file
76
GPy/kern/src/gridKerns.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
import numpy as np
|
||||
from stationary import Stationary
|
||||
from ...util.config import *
|
||||
from ...util.caching import Cache_this
|
||||
|
||||
class GridKern(Stationary):
|
||||
|
||||
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=False):
|
||||
super(GridKern, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||
self.originalDimensions = originalDimensions
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def dKd_dVar(self, X, X2=None):
|
||||
"""
|
||||
Derivative of Kernel function wrt variance applied on inputs X and X2.
|
||||
In the stationary case there is an inner function depending on the
|
||||
distances from X to X2, called r.
|
||||
|
||||
dKd_dVar(X, X2) = dKdVar_of_r((X-X2)**2)
|
||||
"""
|
||||
r = self._scaled_dist(X, X2)
|
||||
return self.dKdVar_of_r(r)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def dKd_dLen(self, X, dimension, lengthscale, X2=None):
|
||||
"""
|
||||
Derivate of Kernel function wrt lengthscale applied on inputs X and X2.
|
||||
In the stationary case there is an inner function depending on the
|
||||
distances from X to X2, called r.
|
||||
|
||||
dKd_dLen(X, X2) = dKdLen_of_r((X-X2)**2)
|
||||
"""
|
||||
r = self._scaled_dist(X, X2)
|
||||
return self.dKdLen_of_r(r, dimension, lengthscale)
|
||||
|
||||
class GridRBF(GridKern):
|
||||
"""
|
||||
Similar to regular RBF but supplemented with methods required for Gaussian grid regression
|
||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
_support_GPU = True
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='gridRBF', originalDimensions=1, useGPU=False):
|
||||
super(GridRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=useGPU)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2)
|
||||
|
||||
def dKdVar_of_r(self, r):
|
||||
"""
|
||||
Compute derivative of kernel wrt variance
|
||||
"""
|
||||
return np.exp(-0.5 * r**2)
|
||||
|
||||
def dKdLen_of_r(self, r, dimCheck, lengthscale):
|
||||
"""
|
||||
Compute derivative of kernel for dimension wrt lengthscale
|
||||
Computation of derivative changes when lengthscale corresponds to
|
||||
the dimension of the kernel whose derivate is being computed.
|
||||
"""
|
||||
if (dimCheck == True):
|
||||
return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) * (r**2) / (lengthscale**(float(1)/self.originalDimensions))
|
||||
else:
|
||||
return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) / (lengthscale**(float(1)/self.originalDimensions))
|
||||
|
||||
def dK_dr(self, r):
|
||||
return -r*self.K_of_r(r)
|
||||
|
|
@ -58,6 +58,14 @@ class RBF(Stationary):
|
|||
if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
|
||||
super(RBF,self).parameters_changed()
|
||||
|
||||
|
||||
def get_one_dimensional_kernel(self, dim):
|
||||
"""
|
||||
Specially intended for Grid regression.
|
||||
"""
|
||||
oneDkernel = GridRBF(input_dim=1, variance=self.variance.copy(), originalDimensions=dim)
|
||||
return oneDkernel
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
|
|
|||
148
GPy/kern/src/ssm_kerns.py
Normal file
148
GPy/kern/src/ssm_kerns.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#
|
||||
# Kurt Cutajar
|
||||
|
||||
from kern import Kern
|
||||
from ... import util
|
||||
import numpy as np
|
||||
from scipy import integrate, weave
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class SsmKerns(Kern):
|
||||
"""
|
||||
Kernels suitable for GP SSM regression with one-dimensional inputs
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
|
||||
|
||||
super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
|
||||
self.lengthscale = np.abs(lengthscale)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
|
||||
def K_of_r(self, r):
|
||||
raise NotImplementedError("implement the covariance function as a fn of r to use this class")
|
||||
|
||||
def dK_dr(self, r):
|
||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||
|
||||
def Q_of_r(self, r):
|
||||
raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
|
||||
|
||||
def Phi_of_r(self, r):
|
||||
raise NotImplementedError("implement transition function of SDE wrt r to use this class")
|
||||
|
||||
def noise(self):
|
||||
raise NotImplementedError("implement noise function of SDE to use this class")
|
||||
|
||||
def dPhidLam(self, r):
|
||||
raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
|
||||
|
||||
def dQ(self, r):
|
||||
raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
|
||||
|
||||
|
||||
class Matern32_SSM(SsmKerns):
|
||||
"""
|
||||
Matern 3/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
|
||||
super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
|
||||
lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
|
||||
self.lam = Param('lambda', lambd, Logexp())
|
||||
self.link_parameters(self.lam, self.variance)
|
||||
self.order = 2
|
||||
|
||||
def noise(self):
|
||||
"""
|
||||
Compute noise for the kernel
|
||||
"""
|
||||
p = 1
|
||||
lp_fact = np.sum(np.log(range(1,p+1)))
|
||||
l2p_fact = np.sum(np.log(range(1,2*p +1)))
|
||||
logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
|
||||
q = np.exp(logq)
|
||||
return q
|
||||
|
||||
def K_of_r(self, r):
|
||||
lengthscale = np.sqrt(3.) / self.lam
|
||||
r = r / lengthscale
|
||||
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
|
||||
|
||||
def dK_dr(self,r):
|
||||
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
|
||||
|
||||
def Q_of_r(self, r):
|
||||
"""
|
||||
Compute process variance (Q)
|
||||
"""
|
||||
q = self.noise()
|
||||
Q = np.zeros((2,2))
|
||||
Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2)
|
||||
+ 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
|
||||
Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
|
||||
Q[1][0] = Q[0][1]
|
||||
Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2)
|
||||
- 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
|
||||
return q*Q
|
||||
|
||||
def Phi_of_r(self, r):
|
||||
"""
|
||||
Compute transition function (Phi)
|
||||
"""
|
||||
if r < 0:
|
||||
phi = np.zeros((2,2))
|
||||
phi[0][0] = self.variance
|
||||
phi[0][1] = 0
|
||||
phi[1][0] = 0
|
||||
phi[1][1] = np.power(self.lam,2)*self.variance
|
||||
return phi
|
||||
else:
|
||||
mult = np.exp(-self.lam*r)
|
||||
phi = np.zeros((2, 2))
|
||||
phi[0][0] = (1 + self.lam*r)*mult
|
||||
phi[0][1] = mult*r
|
||||
phi[1][0] = -r*mult*self.lam**2
|
||||
phi[1][1] = mult*(1 - self.lam*r)
|
||||
return phi
|
||||
|
||||
def dPhidLam(self, r):
|
||||
"""
|
||||
Compute derivative of transition function (Phi) wrt lambda
|
||||
"""
|
||||
mult = np.exp(self.lam*r)
|
||||
dPhi = np.zeros((2, 2))
|
||||
dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
|
||||
dPhi[0][1] = (-1 * (r**2)) / mult
|
||||
dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
|
||||
dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
|
||||
return dPhi
|
||||
|
||||
def dQ(self, r):
|
||||
"""
|
||||
Compute derivatives of Q with respect to lambda and variance
|
||||
"""
|
||||
if (r == -1):
|
||||
dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
|
||||
dQdVar = None
|
||||
else:
|
||||
q = self.noise()
|
||||
Q = self.Q_of_r(r)
|
||||
dQdLam = np.zeros((2, 2))
|
||||
mult = np.exp(2*self.lam*r)
|
||||
dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
|
||||
dQdLam[0][1] = -(r**3)/mult
|
||||
dQdLam[1][0] = dQdLam[0][1]
|
||||
dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
|
||||
dq = (q*(2+1))/self.lam
|
||||
dQdLam = q*dQdLam + dq*(Q/q)
|
||||
dQdVar = Q / self.variance
|
||||
return [dQdLam, dQdVar]
|
||||
|
|
@ -189,6 +189,16 @@ class Stationary(Kern):
|
|||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||
|
||||
|
||||
def update_gradients_direct(self, dL_dVar, dL_dLen):
|
||||
"""
|
||||
Specially intended for the Grid regression case.
|
||||
Given the computed log likelihood derivates, update the corresponding
|
||||
kernel and likelihood gradients.
|
||||
Useful for when gradients have been computed a priori.
|
||||
"""
|
||||
self.variance.gradient = dL_dVar
|
||||
self.lengthscale.gradient = dL_dLen
|
||||
|
||||
def _inv_dist(self, X, X2=None):
|
||||
"""
|
||||
Compute the elementwise inverse of the distance matrix, expecpt on the
|
||||
|
|
@ -307,6 +317,15 @@ class Stationary(Kern):
|
|||
def input_sensitivity(self, summarize=True):
|
||||
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
||||
def get_one_dimensional_kernel(self, dimensions):
|
||||
"""
|
||||
Specially intended for the grid regression case
|
||||
For a given covariance kernel, this method returns the corresponding kernel for
|
||||
a single dimension. The resulting values can then be used in the algorithm for
|
||||
reconstructing the full covariance matrix.
|
||||
"""
|
||||
raise NotImplementedError("implement one dimensional variation of kernel")
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue