Added core code for GpSSM and GpGrid

This commit is contained in:
kcutajar 2016-03-22 08:53:39 +01:00
parent 726d76c427
commit 9a2670b98e
4 changed files with 374 additions and 0 deletions

View file

@ -8,6 +8,8 @@ from . import parameterization
from .gp import GP
from .svgp import SVGP
from .sparse_gp import SparseGP
from .gp_ssm import GpSSM
from .gp_grid import GpGrid
from .mapping import *

116
GPy/core/gp_grid.py Normal file
View file

@ -0,0 +1,116 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Kurt Cutajar
#This implementation of converting GPs to state space models is based on the article:
#@article{Gilboa:2015,
# title={Scaling multidimensional inference for structured Gaussian processes},
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
# volume={37},
# number={2},
# pages={424--436},
# year={2015},
# publisher={IEEE}
#}
import numpy as np
import scipy.linalg as sp
from gp import GP
from parameterization.param import Param
from ..inference.latent_function_inference import gaussian_grid_inference
from .. import likelihoods
import logging
from GPy.inference.latent_function_inference.posterior import Posterior
logger = logging.getLogger("gp grid")
class GpGrid(GP):
"""
A GP model for Grid inputs
:param X: inputs
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
"""
def __init__(self, X, Y, kernel, likelihood, inference_method=None,
name='gp grid', Y_metadata=None, normalizer=False):
#pick a sensible inference method
inference_method = gaussian_grid_inference.GaussianGridInference()
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
self.posterior = None
def parameters_changed(self):
"""
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
.. warning::
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences.
"""
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_direct(self.grad_dict['dL_dVar'], self.grad_dict['dL_dLen'])
def kron_mmprod(self, A, B):
count = 0
D = len(A)
for b in (B.T):
x = b
N = 1
G = np.zeros(D)
for d in xrange(D):
G[d] = len(A[d])
N = np.prod(G)
for d in xrange(D-1, -1, -1):
X = np.reshape(x, (G[d], round(N/G[d])), order='F')
Z = np.dot(A[d], X)
Z = Z.T
x = np.reshape(Z, (-1, 1), order='F')
if (count == 0):
result = x
else:
result = np.column_stack((result, x))
count+=1
return result
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values
"""
if kern is None:
kern = self.kern
# compute mean predictions
Kmn = kern.K(Xnew, self.X)
alpha_kron = self.posterior.alpha
mu = np.dot(Kmn, alpha_kron)
mu = mu.reshape(-1,1)
# compute variance of predictions
Knm = Kmn.T
noise = self.likelihood.variance
V_kron = self.posterior.V_kron
Qs = self.posterior.Qs
QTs = self.posterior.QTs
A = self.kron_mmprod(QTs, Knm)
V_kron = V_kron.reshape(-1, 1)
A = A / (V_kron + noise)
A = self.kron_mmprod(Qs, A)
Kmm = kern.K(Xnew)
var = np.diag(Kmm - np.dot(Kmn, A)).copy()
#var = np.zeros((Xnew.shape[0]))
var = var.reshape(-1, 1)
return mu, var

253
GPy/core/gp_ssm.py Normal file
View file

@ -0,0 +1,253 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Kurt Cutajar
# This implementation of converting GPs to state space models is based on the article:
#@article{Gilboa:2015,
# title={Scaling multidimensional inference for structured Gaussian processes},
# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
# volume={37},
# number={2},
# pages={424--436},
# year={2015},
# publisher={IEEE}
#}
import numpy as np
import scipy.linalg as sp
from gp import GP
from parameterization.param import Param
from ..inference.latent_function_inference import gaussian_ssm_inference
from .. import likelihoods
from ..inference import optimization
from parameterization.transformations import Logexp
from GPy.inference.latent_function_inference.posterior import Posterior
class GpSSM(GP):
"""
A GP model for sorted one-dimensional inputs
This model allows the representation of a Gaussian Process as a Gauss-Markov State Machine.
:param X: inputs
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
"""
def __init__(self, X, Y, kernel, likelihood, inference_method=None,
name='gp ssm', Y_metadata=None, normalizer=False):
#pick a sensible inference method
inference_method = gaussian_ssm_inference.GaussianSSMInference()
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
self.posterior = None
def optimize(self, optimizer=None, start=None, **kwargs):
prevLikelihood = 0
count = 0
change = 1
while ((change > 0.001) and (count < 50)):
posterior, likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
expectations = posterior.expectations
self.optimize_params(expectations=expectations)
change = np.abs(likelihood - prevLikelihood)
prevLikelihood = likelihood
count = count + 1
def optimize_params(self, optimizer=None, start=None, expectations=None, **kwargs):
if self.is_fixed:
print("nothing to optimize")
if self.size == 0:
print("nothing to optimize")
if not self.update_model():
print("Updates were off, setting updates on again")
self.update_model(True)
if start == None:
start = self.optimizer_array
if optimizer is None:
optimizer = self.preferred_optimizer
if isinstance(optimizer, optimization.Optimizer):
opt = optimizer
opt.model = self
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
opt.max_iters = 1
opt.run(f_fp=self.param_maximisation_step, args=(self.X, self.Y, expectations))
self.optimization_runs.append(opt)
self.optimizer_array = opt.x_opt
def param_maximisation_step(self, loghyper, X, Y, E, *args):
loghyper = np.log(np.exp(loghyper) + 1) - 1e-20
lam = loghyper[0]
sig = loghyper[1]
noise = loghyper[2]
kern = self.kern
order = kern.order
K = len(X)
mu_0 = np.zeros((order, 1))
v_0 = kern.Phi_of_r(-1)
dvSig = v_0/sig
dvLam = kern.dQ(-1)[0]
mu = E[0][0]
V = E[0][1]
E11 = V + np.dot(mu, mu.T)
V0_inv = np.linalg.solve(v_0, np.eye(len(mu)))
Ub = np.log(np.linalg.det(v_0)) + np.trace(np.dot(V0_inv, E11))
dUb_lam = np.trace(np.dot(V0_inv, dvLam.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvLam.T),V0_inv), E11))
dUb_sig = np.trace(np.dot(V0_inv, dvSig.T)) - np.trace(np.dot(np.dot(np.dot(V0_inv, dvSig.T),V0_inv), E11))
for t in range(1, K):
delta = X[t] - X[t-1]
Q = kern.Q_of_r(delta)
Phi = kern.Phi_of_r(delta)
dPhi = kern.dPhidLam(delta)
[dLam, dSig] = kern.dQ(delta)
mu_prev = E[t-1][0]
V_prev = E[t-1][1]
mu = E[t][0]
V = E[t][1]
Ett_prev = V_prev + np.dot(mu_prev, mu_prev.T)
Eadj = E[t-1][3] + np.dot(mu, mu_prev.T)
Ett = V + np.dot(mu, mu.T)
CC = sp.cholesky(Q, lower=True)
Q_inv = np.linalg.solve(CC.T, np.linalg.solve(CC, np.eye(len(mu))))
Ub = Ub + np.log(np.linalg.det(Q)) + np.trace(np.dot(Q_inv, Ett)) - 2*np.trace(np.dot(np.dot(Phi.T, Q_inv), Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), Phi), Ett_prev))
A = np.dot(dPhi.T, Q_inv) - np.dot(Phi.T, np.dot(np.dot(Q_inv, dLam), Q_inv))
dUb_lam = dUb_lam + np.trace(np.dot(Q_inv, dLam.T)) - np.trace(np.dot(np.dot(np.dot(Q_inv, dLam.T), Q_inv), Ett)) - 2*np.trace(np.dot(A,Eadj)) + np.trace(np.dot(np.dot(np.dot(Phi.T, Q_inv), dPhi) + np.dot(A, Phi), Ett_prev))
A = -1 * np.dot(Phi.T, np.dot(np.dot(Q_inv, dSig), Q_inv))
dUb_sig = dUb_sig + np.trace(np.dot(Q_inv, dSig.T)) - np . trace(np.dot(np.dot(np.dot(Q_inv, dSig.T), Q_inv), Ett)) - 2*np.trace(np.dot(A, Eadj)) + np.trace(np.dot(np.dot(A, Phi), Ett_prev))
dUb_noise = 0
for t in xrange(K):
mu = E[t][0]
V = E[t][1]
Ett = V + np.dot(mu, mu.T)
Ub = Ub + np.log(noise) + (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/noise
dUb_noise = dUb_noise + 1/noise - (Y[t]**2 - 2*Y[t]*mu[0] + Ett[0][0])/(noise**2)
dUb = np.array([lam, sig, noise]) * np.array([dUb_lam.item(), dUb_sig.item(), dUb_noise.item()])
return Ub.item(), dUb
def parameters_changed(self):
"""
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood
.. warning::
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences.
We override the method in the parent class since we do not handle updates to the standard gradients.
"""
self.posterior, self._log_marginal_likelihood = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values
N.B. It is assumed that input points are sorted
"""
if kern is None:
kern = self.kern
X = self.X
K = X.shape[0]
K_new = Xnew.shape[0]
order = kern.order
mean_pred = np.zeros(K_new)
var_pred = np.zeros(K_new)
H = np.zeros((1,order))
H[0][0] = 1
count = 0
for t in xrange(K_new):
while ((count < K) and (Xnew[t] > X[count])):
count += 1
if (count == 0):
mu = np.zeros((order, 1))
V = kern.Phi_of_r(-1)
delta = np.abs(Xnew[t] - X[count])
Phi = kern.Phi_of_r(delta)
Q = kern.Q_of_r(delta)
P = np.dot(np.dot(Phi, V), Phi.T) + Q
mu_s = self.posterior.mu_s[count]
V_s = self.posterior.V_s[count]
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
mean_pred[t] = np.dot(H, mu_s)
var_pred[t] = np.dot(np.dot(H, V_s), H.T)
elif (count == K):
# forwards phase
delta = np.abs(Xnew[t] - X[count-1])
Phi = kern.Phi_of_r(delta)
Q = kern.Q_of_r(delta)
mu_f = self.posterior.mu_f[count-1]
V_f = self.posterior.V_f[count-1]
mu = np.dot(Phi, mu_f)
P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
V = P
mean_pred[t] = np.dot(H,mu)
var_pred[t] = np.dot(np.dot(H, V), H.T)
else:
# forwards phase
delta = np.abs(Xnew[t] - X[count-1])
Phi = kern.Phi_of_r(delta)
Q = kern.Q_of_r(delta)
mu_f = self.posterior.mu_f[count-1]
V_f = self.posterior.V_f[count-1]
mu = np.dot(Phi, mu_f)
P = np.dot(np.dot(Phi, V_f), Phi.T) + Q
V = P
delta = np.abs(Xnew[t] - X[count])
Phi = kern.Phi_of_r(delta)
Q = kern.Q_of_r(delta)
P = np.dot(np.dot(Phi, V), Phi.T) + Q
# backwards phase
mu_s = self.posterior.mu_s[count]
V_s = self.posterior.V_s[count]
L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(len(P))))
mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
V_s = V + np.dot(np.dot(L, V_s - P), L.T)
mean_pred[t] = np.dot(H, mu_s)
var_pred[t] = np.dot(np.dot(H, V_s), H.T)
mean_pred = mean_pred.reshape(-1, 1)
var_pred = var_pred.reshape(-1, 1)
return mean_pred, var_pred

View file

@ -69,6 +69,9 @@ from .dtc import DTC
from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch
from .var_gauss import VarGauss
from .gaussian_ssm_inference import GaussianSSMInference
from .gaussian_grid_inference import GaussianGridInference
# class FullLatentFunctionData(object):
#