mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-04 17:22:39 +02:00
Removed SSM functionality - updated Kronecker grid case
This commit is contained in:
parent
08fa69fdd1
commit
8e63472237
18 changed files with 135 additions and 623 deletions
|
|
@ -3,12 +3,11 @@
|
|||
|
||||
from GPy.core.model import Model
|
||||
from .parameterization import Param, Parameterized
|
||||
from . import parameterization
|
||||
#from . import parameterization
|
||||
|
||||
from .gp import GP
|
||||
from .svgp import SVGP
|
||||
from .sparse_gp import SparseGP
|
||||
from .gp_ssm import GpSSM
|
||||
from .gp_grid import GpGrid
|
||||
from .mapping import *
|
||||
|
||||
|
|
|
|||
|
|
@ -1,253 +0,0 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
# This implementation of converting GPs to state space models is based on the article:
|
||||
|
||||
#@article{Gilboa:2015,
|
||||
# title={Scaling multidimensional inference for structured Gaussian processes},
|
||||
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
|
||||
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
|
||||
# volume={37},
|
||||
# number={2},
|
||||
# pages={424--436},
|
||||
# year={2015},
|
||||
# publisher={IEEE}
|
||||
#}
|
||||
|
||||
import numpy as np
|
||||
import scipy.linalg as sp
|
||||
from gp import GP
|
||||
from parameterization.param import Param
|
||||
from ..inference.latent_function_inference import gaussian_ssm_inference
|
||||
from .. import likelihoods
|
||||
from ..inference import optimization
|
||||
from parameterization.transformations import Logexp
|
||||
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||
|
||||
class GpSSM(GP):
|
||||
"""
|
||||
A GP model for sorted one-dimensional inputs
|
||||
|
||||
This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (num_data x input_dim)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
|
||||
:param kernel: the kernel (covariance function). See link kernels
|
||||
:type kernel: a GPy.kern.kern instance
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel, likelihood, inference_method=None,
|
||||
name='gp ssm', Y_metadata=None, normalizer=False):
|
||||
#pick a sensible inference method
|
||||
|
||||
inference_method = gaussian_ssm_inference.GaussianSSMInference()
|
||||
|
||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
|
||||
self.posterior = None
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
prevLikelihood = 0
|
||||
count = 0
|
||||
change = 1
|
||||
while ((change > 0.001) and (count < 50)):
|
||||
posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
|
||||
expectations = posterior.expectations
|
||||
self.optimize_params(expectations=expectations)
|
||||
change = np.abs(likelihood - prevLikelihood)
|
||||
prevLikelihood = likelihood
|
||||
count = count + 1
|
||||
|
||||
def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
|
||||
if self.is_fixed:
|
||||
print("nothing to optimize")
|
||||
if self.size == 0:
|
||||
print("nothing to optimize")
|
||||
|
||||
if not self.update_model():
|
||||
print("Updates were off, setting updates on again")
|
||||
self.update_model(True)
|
||||
|
||||
if start == None:
|
||||
start = self.optimizer_array
|
||||
|
||||
if optimizer is None:
|
||||
optimizer = self.preferred_optimizer
|
||||
|
||||
if isinstance(optimizer, optimization.Optimizer):
|
||||
opt = optimizer
|
||||
opt.model = self
|
||||
else:
|
||||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model=self, **kwargs)
|
||||
|
||||
opt.max_iters = 1
|
||||
|
||||
opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
|
||||
self.optimization_runs.append(opt)
|
||||
self.optimizer_array = opt.x_opt
|
||||
|
||||
def param_maximisation_step(self, loghyper, X, Y, E, *args):
|
||||
|
||||
loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
|
||||
lam = loghyper[0]
|
||||
sig = loghyper[1]
|
||||
noise = loghyper[2]
|
||||
|
||||
kern = self.kern
|
||||
order = kern.order
|
||||
|
||||
K = len(X)
|
||||
mu_0 = np.zeros((order, 1))
|
||||
v_0 = kern.Phi_of_r(-1)
|
||||
dvSig = v_0/sig
|
||||
dvLam = kern.dQ(-1)[0]
|
||||
|
||||
mu = E[0][0]
|
||||
V = E[0][1]
|
||||
E11 = V + np.dot(mu, mu.T)
|
||||
V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
|
||||
Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
|
||||
dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
|
||||
dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
|
||||
|
||||
for t in range(1, K):
|
||||
delta = X[t] - X[t-1]
|
||||
Q = kern.Q_of_r(delta)
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
dPhi = kern.dPhidLam(delta)
|
||||
[dLam, dSig] = kern.dQ(delta)
|
||||
mu_prev = E[t-1][0]
|
||||
V_prev = E[t-1][1]
|
||||
mu = E[t][0]
|
||||
V = E[t][1]
|
||||
Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
|
||||
Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
|
||||
Ett = V + np.dot(mu, mu.T)
|
||||
CC = sp.cholesky(Q, lower=True)
|
||||
Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
|
||||
|
||||
Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
|
||||
A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
|
||||
dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))
|
||||
A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
|
||||
dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
|
||||
|
||||
dUb_noise = 0
|
||||
for t in xrange(K):
|
||||
mu = E[t][0]
|
||||
V = E[t][1]
|
||||
Ett = V + np.dot(mu, mu.T)
|
||||
Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
|
||||
dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
|
||||
|
||||
dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
|
||||
|
||||
return Ub.item(), dUb
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
|
||||
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
|
||||
|
||||
.. warning::
|
||||
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
|
||||
this method yourself, there may be unexpected consequences.
|
||||
|
||||
We override the method in the parent class since we do not handle updates to the standard gradients.
|
||||
"""
|
||||
self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
|
||||
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
||||
"""
|
||||
Make a prediction for the latent function values
|
||||
N.B. It is assumed that input points are sorted
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
X = self.X
|
||||
K = X.shape[0]
|
||||
K_new = Xnew.shape[0]
|
||||
order = kern.order
|
||||
mean_pred = np.zeros(K_new)
|
||||
var_pred = np.zeros(K_new)
|
||||
H = np.zeros((1,order))
|
||||
H[0][0] = 1
|
||||
|
||||
count = 0
|
||||
for t in xrange(K_new):
|
||||
while ((count < K) and (Xnew[t] > X[count])):
|
||||
count += 1
|
||||
|
||||
if (count == 0):
|
||||
mu = np.zeros((order, 1))
|
||||
V = kern.Phi_of_r(-1)
|
||||
|
||||
delta = np.abs(Xnew[t] - X[count])
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
Q = kern.Q_of_r(delta)
|
||||
P = np.dot(np.dot(Phi, V), Phi.T) + Q
|
||||
|
||||
mu_s = self.posterior.mu_s[count]
|
||||
V_s = self.posterior.V_s[count]
|
||||
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
|
||||
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
|
||||
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
|
||||
|
||||
mean_pred[t] = np.dot(H, mu_s)
|
||||
var_pred[t] = np.dot(np.dot(H, V_s), H.T)
|
||||
|
||||
elif (count == K):
|
||||
# forwards phase
|
||||
delta = np.abs(Xnew[t] - X[count-1])
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
Q = kern.Q_of_r(delta)
|
||||
mu_f = self.posterior.mu_f[count-1]
|
||||
V_f = self.posterior.V_f[count-1]
|
||||
|
||||
mu = np.dot(Phi, mu_f)
|
||||
P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
|
||||
V = P
|
||||
|
||||
mean_pred[t] = np.dot(H,mu)
|
||||
var_pred[t] = np.dot(np.dot(H, V), H.T)
|
||||
|
||||
else:
|
||||
# forwards phase
|
||||
delta = np.abs(Xnew[t] - X[count-1])
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
Q = kern.Q_of_r(delta)
|
||||
mu_f = self.posterior.mu_f[count-1]
|
||||
V_f = self.posterior.V_f[count-1]
|
||||
mu = np.dot(Phi, mu_f)
|
||||
P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
|
||||
V = P
|
||||
|
||||
delta = np.abs(Xnew[t] - X[count])
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
Q = kern.Q_of_r(delta)
|
||||
P = np.dot(np.dot(Phi, V), Phi.T) + Q
|
||||
|
||||
# backwards phase
|
||||
mu_s = self.posterior.mu_s[count]
|
||||
V_s = self.posterior.V_s[count]
|
||||
|
||||
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
|
||||
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
|
||||
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
|
||||
|
||||
mean_pred[t] = np.dot(H, mu_s)
|
||||
var_pred[t] = np.dot(np.dot(H, V_s), H.T)
|
||||
|
||||
|
||||
mean_pred = mean_pred.reshape(-1, 1)
|
||||
var_pred = var_pred.reshape(-1, 1)
|
||||
|
||||
return mean_pred, var_pred
|
||||
|
|
@ -69,7 +69,6 @@ from .dtc import DTC
|
|||
from .fitc import FITC
|
||||
from .var_dtc_parallel import VarDTC_minibatch
|
||||
from .var_gauss import VarGauss
|
||||
from .gaussian_ssm_inference import GaussianSSMInference
|
||||
from .gaussian_grid_inference import GaussianGridInference
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class GaussianGridInference(LatentFunctionInference):
|
|||
V_kron = 1 # kronecker product of eigenvalues
|
||||
|
||||
# retrieve the one-dimensional variation of the designated kernel
|
||||
oneDkernel = kern.getOneDimensionalKernel(D)
|
||||
oneDkernel = kern.get_one_dimensional_kernel(D)
|
||||
|
||||
for d in xrange(D):
|
||||
xg = list(set(X[:,d])) #extract unique values for a dimension
|
||||
|
|
|
|||
|
|
@ -1,140 +0,0 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
# This implementation of converting GPs to state space models is based on the article:
|
||||
|
||||
#@article{Gilboa:2015,
|
||||
# title={Scaling multidimensional inference for structured Gaussian processes},
|
||||
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
|
||||
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
|
||||
# volume={37},
|
||||
# number={2},
|
||||
# pages={424--436},
|
||||
# year={2015},
|
||||
# publisher={IEEE}
|
||||
#}
|
||||
|
||||
from ssm_posterior import SsmPosterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
import math as mt
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GaussianSSMInference(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
pass#self._YYTfactor_cache = caching.cache()
|
||||
|
||||
def get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LL^T = YY^T.
|
||||
|
||||
Note that L may have fewer columns than Y, else L=Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>D):
|
||||
return Y
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
order = kern.order
|
||||
K = X.shape[0]
|
||||
log_likelihood = 0
|
||||
results = np.zeros((K,4),dtype=object)
|
||||
H = np.zeros((1,order))
|
||||
H[0][0] = 1
|
||||
v_0 = kern.Phi_of_r(-1)
|
||||
mu_0 = np.zeros((order, 1))
|
||||
noise_var = likelihood.variance + 1e-8
|
||||
|
||||
# carry out forward filtering
|
||||
for t in range(K):
|
||||
if (t == 0):
|
||||
prior_m = np.dot(H,mu_0)
|
||||
prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
|
||||
|
||||
log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
|
||||
|
||||
kalman_gain = np.dot(v_0, H.T) / prior_v
|
||||
mu = mu_0 + kalman_gain*(Y[0] - prior_m)
|
||||
|
||||
|
||||
V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
|
||||
results[0][0] = mu
|
||||
results[0][1] = V
|
||||
else:
|
||||
delta = X[t] - X[t-1]
|
||||
Q = kern.Q_of_r(delta)
|
||||
Phi = kern.Phi_of_r(delta)
|
||||
P = np.dot(np.dot(Phi, V), Phi.T) + Q
|
||||
PhiMu = np.dot(Phi, mu)
|
||||
prior_m = np.dot(H, PhiMu)
|
||||
prior_v = np.dot(np.dot(H, P), H.T) + noise_var
|
||||
|
||||
log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
|
||||
log_likelihood += log_likelihood_i
|
||||
|
||||
kalman_gain = np.dot(P, H.T)/prior_v
|
||||
mu = PhiMu + kalman_gain*(Y[t] - prior_m)
|
||||
V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
|
||||
|
||||
results[t-1][2] = Phi
|
||||
results[t-1][3] = P
|
||||
results[t][0] = mu
|
||||
results[t][1] = V
|
||||
|
||||
# carry out backwards smoothing
|
||||
|
||||
W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
|
||||
|
||||
mu_s = results[K-1][0]
|
||||
V_s = results[K-1][1]
|
||||
|
||||
posterior_mean = np.zeros((K,1))
|
||||
posterior_var = np.zeros((K,1))
|
||||
E = np.zeros((K,4), dtype='object')
|
||||
|
||||
posterior_mean[K-1] = np.dot(H, mu_s)
|
||||
posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
|
||||
E[K-1][0] = mu_s
|
||||
E[K-1][1] = V_s
|
||||
|
||||
for t in range(K-2, -1, -1):
|
||||
mu = results[t][0]
|
||||
V = results[t][1]
|
||||
Phi = results[t][2]
|
||||
P = results[t][3]
|
||||
|
||||
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
|
||||
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
|
||||
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
|
||||
posterior_mean[t] = np.dot(H, mu_s)
|
||||
posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
|
||||
|
||||
if (t < K-2):
|
||||
W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
|
||||
|
||||
E[t][0] = mu_s
|
||||
E[t][1] = V_s
|
||||
E[t][2] = L
|
||||
E[t][3] = W
|
||||
|
||||
return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
import numpy as np
|
||||
|
||||
class SsmPosterior(object):
|
||||
"""
|
||||
Specially intended for the SSM Regression case
|
||||
An object to represent a Gaussian posterior over latent function values, p(f|D).
|
||||
|
||||
The purpose of this class is to serve as an interface between the inference
|
||||
schemes and the model classes.
|
||||
|
||||
"""
|
||||
def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
|
||||
"""
|
||||
mu_f : mean values predicted during kalman filtering step
|
||||
var_f : variance predicted during the kalman filtering step
|
||||
mu_s : mean values predicted during backwards smoothing step
|
||||
var_s : variance predicted during backwards smoothing step
|
||||
expectations : posterior expectations
|
||||
"""
|
||||
|
||||
if ((mu_f is not None) and (V_f is not None) and
|
||||
(mu_s is not None) and (V_s is not None) and
|
||||
(expectations is not None)):
|
||||
pass # we have sufficient to compute the posterior
|
||||
else:
|
||||
raise ValueError("insufficient information to compute predictions")
|
||||
|
||||
self._mu_f = mu_f
|
||||
self._V_f = V_f
|
||||
self._mu_s = mu_s
|
||||
self._V_s = V_s
|
||||
self._expectations = expectations
|
||||
|
||||
@property
|
||||
def mu_f(self):
|
||||
"""
|
||||
Mean values predicted during kalman filtering step mean
|
||||
"""
|
||||
return self._mu_f
|
||||
|
||||
@property
|
||||
def V_f(self):
|
||||
"""
|
||||
Variance predicted during the kalman filtering step
|
||||
"""
|
||||
return self._V_f
|
||||
|
||||
@property
|
||||
def mu_s(self):
|
||||
"""
|
||||
Mean values predicted during kalman backwards smoothin mean
|
||||
"""
|
||||
return self._mu_s
|
||||
|
||||
@property
|
||||
def V_s(self):
|
||||
"""
|
||||
Variance predicted during backwards smoothing step
|
||||
"""
|
||||
return self._V_s
|
||||
|
||||
@property
|
||||
def expectations(self):
|
||||
"""
|
||||
Posterior expectations
|
||||
"""
|
||||
return self._expectations
|
||||
|
||||
|
|
@ -29,5 +29,4 @@ from .src.splitKern import SplitKern,DEtime
|
|||
from .src.splitKern import DEtime as DiffGenomeKern
|
||||
from .src.spline import Spline
|
||||
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
from .src.ssmKerns import Matern32_SSM
|
||||
from .src.gridKerns import GridRBF
|
||||
from .src.grid_kerns import GridRBF
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
|
||||
import numpy as np
|
||||
from stationary import Stationary
|
||||
from ...util.config import *
|
||||
from ...util.caching import Cache_this
|
||||
from paramz.caching import Cache_this
|
||||
|
||||
|
||||
class GridKern(Stationary):
|
||||
|
||||
|
|
@ -7,6 +7,7 @@ from .stationary import Stationary
|
|||
from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
|
||||
from ...core import Param
|
||||
from paramz.transformations import Logexp
|
||||
from .gridKerns import GridRBF
|
||||
|
||||
class RBF(Stationary):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,148 +0,0 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#
|
||||
# Kurt Cutajar
|
||||
|
||||
from kern import Kern
|
||||
from ... import util
|
||||
import numpy as np
|
||||
from scipy import integrate, weave
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class SsmKerns(Kern):
|
||||
"""
|
||||
Kernels suitable for GP SSM regression with one-dimensional inputs
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False):
|
||||
|
||||
super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
|
||||
self.lengthscale = np.abs(lengthscale)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
|
||||
def K_of_r(self, r):
|
||||
raise NotImplementedError("implement the covariance function as a fn of r to use this class")
|
||||
|
||||
def dK_dr(self, r):
|
||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||
|
||||
def Q_of_r(self, r):
|
||||
raise NotImplementedError("implement covariance function of SDE wrt r to use this class")
|
||||
|
||||
def Phi_of_r(self, r):
|
||||
raise NotImplementedError("implement transition function of SDE wrt r to use this class")
|
||||
|
||||
def noise(self):
|
||||
raise NotImplementedError("implement noise function of SDE to use this class")
|
||||
|
||||
def dPhidLam(self, r):
|
||||
raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class")
|
||||
|
||||
def dQ(self, r):
|
||||
raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class")
|
||||
|
||||
|
||||
class Matern32_SSM(SsmKerns):
|
||||
"""
|
||||
Matern 3/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'):
|
||||
super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name)
|
||||
lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale)
|
||||
self.lam = Param('lambda', lambd, Logexp())
|
||||
self.link_parameters(self.lam, self.variance)
|
||||
self.order = 2
|
||||
|
||||
def noise(self):
|
||||
"""
|
||||
Compute noise for the kernel
|
||||
"""
|
||||
p = 1
|
||||
lp_fact = np.sum(np.log(range(1,p+1)))
|
||||
l2p_fact = np.sum(np.log(range(1,2*p +1)))
|
||||
logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact
|
||||
q = np.exp(logq)
|
||||
return q
|
||||
|
||||
def K_of_r(self, r):
|
||||
lengthscale = np.sqrt(3.) / self.lam
|
||||
r = r / lengthscale
|
||||
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
|
||||
|
||||
def dK_dr(self,r):
|
||||
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
|
||||
|
||||
def Q_of_r(self, r):
|
||||
"""
|
||||
Compute process variance (Q)
|
||||
"""
|
||||
q = self.noise()
|
||||
Q = np.zeros((2,2))
|
||||
Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2)
|
||||
+ 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam))
|
||||
Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam))
|
||||
Q[1][0] = Q[0][1]
|
||||
Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2)
|
||||
- 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam))
|
||||
return q*Q
|
||||
|
||||
def Phi_of_r(self, r):
|
||||
"""
|
||||
Compute transition function (Phi)
|
||||
"""
|
||||
if r < 0:
|
||||
phi = np.zeros((2,2))
|
||||
phi[0][0] = self.variance
|
||||
phi[0][1] = 0
|
||||
phi[1][0] = 0
|
||||
phi[1][1] = np.power(self.lam,2)*self.variance
|
||||
return phi
|
||||
else:
|
||||
mult = np.exp(-self.lam*r)
|
||||
phi = np.zeros((2, 2))
|
||||
phi[0][0] = (1 + self.lam*r)*mult
|
||||
phi[0][1] = mult*r
|
||||
phi[1][0] = -r*mult*self.lam**2
|
||||
phi[1][1] = mult*(1 - self.lam*r)
|
||||
return phi
|
||||
|
||||
def dPhidLam(self, r):
|
||||
"""
|
||||
Compute derivative of transition function (Phi) wrt lambda
|
||||
"""
|
||||
mult = np.exp(self.lam*r)
|
||||
dPhi = np.zeros((2, 2))
|
||||
dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult
|
||||
dPhi[0][1] = (-1 * (r**2)) / mult
|
||||
dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult
|
||||
dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult
|
||||
return dPhi
|
||||
|
||||
def dQ(self, r):
|
||||
"""
|
||||
Compute derivatives of Q with respect to lambda and variance
|
||||
"""
|
||||
if (r == -1):
|
||||
dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]])
|
||||
dQdVar = None
|
||||
else:
|
||||
q = self.noise()
|
||||
Q = self.Q_of_r(r)
|
||||
dQdLam = np.zeros((2, 2))
|
||||
mult = np.exp(2*self.lam*r)
|
||||
dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult)
|
||||
dQdLam[0][1] = -(r**3)/mult
|
||||
dQdLam[1][0] = dQdLam[0][1]
|
||||
dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult)
|
||||
dq = (q*(2+1))/self.lam
|
||||
dQdLam = q*dQdLam + dq*(Q/q)
|
||||
dQdVar = Q / self.variance
|
||||
return [dQdLam, dQdVar]
|
||||
|
|
@ -317,7 +317,7 @@ class Stationary(Kern):
|
|||
def input_sensitivity(self, summarize=True):
|
||||
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
||||
def get_one_dimensional_kernel(self, dimensions):
|
||||
def get_one_dimensional_kernel(self, dimensions):
|
||||
"""
|
||||
Specially intended for the grid regression case
|
||||
For a given covariance kernel, this method returns the corresponding kernel for
|
||||
|
|
|
|||
|
|
@ -22,3 +22,4 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
|
|||
from .one_vs_all_classification import OneVsAllClassification
|
||||
from .one_vs_all_sparse_classification import OneVsAllSparseClassification
|
||||
from .dpgplvm import DPBayesianGPLVM
|
||||
from .gp_grid_regression import GPRegressionGrid
|
||||
36
GPy/models/gp_grid_regression.py
Normal file
36
GPy/models/gp_grid_regression.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
from ..core import GpGrid
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
|
||||
class GPRegressionGrid(GpGrid):
|
||||
"""
|
||||
Gaussian Process model for grid inputs using Kronecker products
|
||||
|
||||
This is a thin wrapper around the models.GpGrid class, with a set of sensible defaults
|
||||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to the kron variation of SqExp
|
||||
:param Norm normalizer: [False]
|
||||
|
||||
Normalize Y with the norm given.
|
||||
If normalizer is False, no normalization will be done
|
||||
If it is None, we use GaussianNorm(alization)
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(1) # no other kernels implemented so far
|
||||
|
||||
likelihood = likelihoods.Gaussian()
|
||||
super(GPRegressionGrid, self).__init__(X, Y, kernel, likelihood, name='GP Grid regression', Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
|
||||
51
GPy/testing/grid_tests.py
Normal file
51
GPy/testing/grid_tests.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Kurt Cutajar
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
class GridModelTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
######################################
|
||||
# # 3 dimensional example
|
||||
|
||||
# sample inputs and outputs
|
||||
self.X = np.array([[0,0,0],[0,0,1],[0,1,0],[0,1,1],[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
|
||||
self.Y = np.random.randn(8, 1) * 100
|
||||
self.dim = self.X.shape[1]
|
||||
|
||||
def test_alpha_match(self):
|
||||
kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
|
||||
|
||||
kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
|
||||
|
||||
np.testing.assert_almost_equal(m.posterior.alpha, m2.posterior.woodbury_vector)
|
||||
|
||||
def test_gradient_match(self):
|
||||
kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
|
||||
|
||||
kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
|
||||
|
||||
np.testing.assert_almost_equal(kernel.variance.gradient, kernel2.variance.gradient)
|
||||
np.testing.assert_almost_equal(kernel.lengthscale.gradient, kernel2.lengthscale.gradient)
|
||||
np.testing.assert_almost_equal(m.likelihood.variance.gradient, m2.likelihood.variance.gradient)
|
||||
|
||||
|
||||
def test_prediction_match(self):
|
||||
kernel = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m = GPy.models.GPRegressionGrid(self.X, self.Y, kernel)
|
||||
|
||||
kernel2 = GPy.kern.RBF(input_dim=self.dim, variance=1, ARD=True)
|
||||
m2 = GPy.models.GPRegression(self.X, self.Y, kernel2)
|
||||
|
||||
test = np.array([[0,0,2],[-1,3,-4]])
|
||||
|
||||
np.testing.assert_almost_equal(m.predict(test), m2.predict(test))
|
||||
|
||||
|
|
@ -59,6 +59,14 @@ GPy.core.svgp module
|
|||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.core.gp_grid module
|
||||
------------------------
|
||||
|
||||
.. automodule:: GPy.core.gp_grid
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.core.symbolic module
|
||||
------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,14 @@ GPy.inference.latent_function_inference.posterior module
|
|||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.inference.latent_function_inference.grid_posterior module
|
||||
--------------------------------------------------------
|
||||
|
||||
.. automodule:: GPy.inference.latent_function_inference.grid_posterior
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.inference.latent_function_inference.svgp module
|
||||
---------------------------------------------------
|
||||
|
||||
|
|
@ -92,6 +100,14 @@ GPy.inference.latent_function_inference.var_gauss module
|
|||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.inference.latent_function_inference.gaussian_grid_inference module
|
||||
--------------------------------------------------------
|
||||
|
||||
.. automodule:: GPy.inference.latent_function_inference.gaussian_grid_inference
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
|
|
|||
|
|
@ -171,6 +171,14 @@ GPy.kern.src.spline module
|
|||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.kern.src.grid_kerns module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: GPy.kern.src.grid_kerns
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
GPy.kern.src.splitKern module
|
||||
-----------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -197,6 +197,14 @@ GPy.testing.svgp_tests module
|
|||
:show-inheritance:
|
||||
|
||||
|
||||
GPy.testing.grid_tests module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: GPy.testing.grid_tests
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue