2013-06-05 14:11:49 +01:00
|
|
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
|
|
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
2013-12-16 13:45:24 +00:00
|
|
|
from gplvm import GPLVM
|
|
|
|
|
from .. import kern
|
|
|
|
|
from ..core import SparseGP
|
|
|
|
|
from ..likelihoods import Gaussian
|
|
|
|
|
from ..inference.optimization import SCG
|
2014-01-28 13:42:14 +00:00
|
|
|
from ..util import linalg
|
2014-02-24 09:49:29 +00:00
|
|
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-02-24 14:47:43 +00:00
|
|
|
class BayesianGPLVM(SparseGP):
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
|
|
|
|
Bayesian Gaussian Process Latent Variable Model
|
|
|
|
|
|
|
|
|
|
:param Y: observed data (np.ndarray) or GPy.likelihood
|
|
|
|
|
:type Y: np.ndarray| GPy.likelihood instance
|
|
|
|
|
:param input_dim: latent dimensionality
|
|
|
|
|
:type input_dim: int
|
|
|
|
|
:param init: initialisation method for the latent space
|
|
|
|
|
:type init: 'PCA'|'random'
|
|
|
|
|
|
|
|
|
|
"""
|
2014-02-10 15:12:49 +00:00
|
|
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
2014-02-12 17:11:55 +00:00
|
|
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
2013-06-05 14:11:49 +01:00
|
|
|
if X == None:
|
2014-02-24 14:47:43 +00:00
|
|
|
from ..util.initialization import initialize_latent
|
|
|
|
|
X = initialize_latent(init, input_dim, Y)
|
2013-06-05 14:11:49 +01:00
|
|
|
self.init = init
|
|
|
|
|
|
|
|
|
|
if X_variance is None:
|
2014-02-24 09:49:29 +00:00
|
|
|
X_variance = np.random.uniform(0,.1,X.shape)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
if Z is None:
|
2013-06-05 15:29:45 +01:00
|
|
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
2013-06-05 14:11:49 +01:00
|
|
|
assert Z.shape[1] == X.shape[1]
|
|
|
|
|
|
|
|
|
|
if kernel is None:
|
2014-02-21 10:38:11 +00:00
|
|
|
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
|
2014-02-12 17:11:55 +00:00
|
|
|
|
|
|
|
|
if likelihood is None:
|
|
|
|
|
likelihood = Gaussian()
|
2014-02-24 09:49:29 +00:00
|
|
|
self.q = NormalPosterior(X, X_variance)
|
|
|
|
|
self.variational_prior = NormalPrior()
|
|
|
|
|
|
2014-02-10 15:12:49 +00:00
|
|
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
|
|
|
|
self.add_parameter(self.q, index=0)
|
2014-02-11 14:44:15 +00:00
|
|
|
#self.ensure_default_constraints()
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-01-24 15:48:23 +00:00
|
|
|
def _getstate(self):
|
2013-06-25 17:43:42 +01:00
|
|
|
"""
|
|
|
|
|
Get the current state of the class,
|
|
|
|
|
here just all the indices, rest can get recomputed
|
|
|
|
|
"""
|
2014-01-24 15:48:23 +00:00
|
|
|
return SparseGP._getstate(self) + [self.init]
|
2013-06-25 17:43:42 +01:00
|
|
|
|
2014-01-24 15:48:23 +00:00
|
|
|
def _setstate(self, state):
|
2013-09-02 11:53:45 +01:00
|
|
|
self._const_jitter = None
|
2013-06-25 17:43:42 +01:00
|
|
|
self.init = state.pop()
|
2014-01-24 15:48:23 +00:00
|
|
|
SparseGP._setstate(self, state)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-02-10 15:12:49 +00:00
|
|
|
def parameters_changed(self):
|
2014-02-20 14:04:16 +00:00
|
|
|
super(BayesianGPLVM, self).parameters_changed()
|
2014-02-24 09:49:29 +00:00
|
|
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q)
|
|
|
|
|
|
2014-02-24 11:45:18 +00:00
|
|
|
# TODO: This has to go into kern
|
|
|
|
|
# maybe a update_gradients_q_variational?
|
|
|
|
|
self.q.mean.gradient, self.q.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict)
|
2014-02-24 09:49:29 +00:00
|
|
|
|
|
|
|
|
# update for the KL divergence
|
|
|
|
|
self.variational_prior.update_gradients_KL(self.q)
|
|
|
|
|
|
2014-02-11 14:44:15 +00:00
|
|
|
|
2013-07-18 15:15:09 +01:00
|
|
|
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
2014-01-28 13:42:14 +00:00
|
|
|
"""
|
|
|
|
|
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
|
|
|
|
|
"""
|
2014-02-11 14:44:15 +00:00
|
|
|
import sys
|
2014-01-28 13:42:14 +00:00
|
|
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
|
|
|
from ..plotting.matplot_dep import dim_reduction_plots
|
|
|
|
|
|
|
|
|
|
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
def do_test_latents(self, Y):
|
|
|
|
|
"""
|
|
|
|
|
Compute the latent representation for a set of new points Y
|
|
|
|
|
|
|
|
|
|
Notes:
|
|
|
|
|
This will only work with a univariate Gaussian likelihood (for now)
|
|
|
|
|
"""
|
|
|
|
|
assert not self.likelihood.is_heteroscedastic
|
|
|
|
|
N_test = Y.shape[0]
|
|
|
|
|
input_dim = self.Z.shape[1]
|
|
|
|
|
means = np.zeros((N_test, input_dim))
|
|
|
|
|
covars = np.zeros((N_test, input_dim))
|
|
|
|
|
|
|
|
|
|
dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
|
|
|
|
|
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
|
|
|
|
|
V = self.likelihood.precision * Y
|
2013-09-20 13:03:24 +01:00
|
|
|
|
|
|
|
|
#compute CPsi1V
|
|
|
|
|
if self.Cpsi1V is None:
|
|
|
|
|
psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
|
|
|
|
tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
|
|
|
|
tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
|
|
|
|
|
self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
|
|
|
|
|
|
2013-06-05 14:11:49 +01:00
|
|
|
dpsi1 = np.dot(self.Cpsi1V, V.T)
|
|
|
|
|
|
|
|
|
|
start = np.zeros(self.input_dim * 2)
|
|
|
|
|
|
|
|
|
|
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
|
2013-09-20 13:03:24 +01:00
|
|
|
args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
|
2013-06-05 14:11:49 +01:00
|
|
|
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
|
|
|
|
|
|
|
|
|
|
mu, log_S = xopt.reshape(2, 1, -1)
|
|
|
|
|
means[n] = mu[0].copy()
|
|
|
|
|
covars[n] = np.exp(log_S[0]).copy()
|
|
|
|
|
|
|
|
|
|
return means, covars
|
|
|
|
|
|
2013-07-29 15:25:57 +01:00
|
|
|
def dmu_dX(self, Xnew):
|
|
|
|
|
"""
|
|
|
|
|
Calculate the gradient of the prediction at Xnew w.r.t Xnew.
|
|
|
|
|
"""
|
|
|
|
|
dmu_dX = np.zeros_like(Xnew)
|
|
|
|
|
for i in range(self.Z.shape[0]):
|
2014-02-10 15:12:49 +00:00
|
|
|
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
2013-07-29 15:25:57 +01:00
|
|
|
return dmu_dX
|
|
|
|
|
|
|
|
|
|
def dmu_dXnew(self, Xnew):
|
|
|
|
|
"""
|
|
|
|
|
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
|
|
|
|
|
"""
|
2014-02-10 15:12:49 +00:00
|
|
|
gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
|
2013-07-29 15:25:57 +01:00
|
|
|
ones = np.ones((1, 1))
|
|
|
|
|
for i in range(self.Z.shape[0]):
|
2014-02-10 15:12:49 +00:00
|
|
|
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
|
|
|
|
return np.dot(gradients_X, self.Cpsi1Vf)
|
2013-07-29 15:25:57 +01:00
|
|
|
|
2014-01-28 13:42:14 +00:00
|
|
|
def plot_steepest_gradient_map(self, *args, ** kwargs):
|
|
|
|
|
"""
|
|
|
|
|
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
|
|
|
|
|
"""
|
2014-02-24 09:49:29 +00:00
|
|
|
import sys
|
2014-01-28 13:42:14 +00:00
|
|
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
|
|
|
from ..plotting.matplot_dep import dim_reduction_plots
|
|
|
|
|
|
2014-02-11 14:44:15 +00:00
|
|
|
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-02-17 09:03:44 +00:00
|
|
|
class BayesianGPLVMWithMissingData(BayesianGPLVM):
|
|
|
|
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
|
|
|
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
|
|
|
|
from ..util.subarray_and_sorting import common_subarrays
|
|
|
|
|
self.subarrays = common_subarrays(Y)
|
|
|
|
|
import ipdb;ipdb.set_trace()
|
|
|
|
|
BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parameters_changed(self):
|
|
|
|
|
super(BayesianGPLVM, self).parameters_changed()
|
|
|
|
|
self._log_marginal_likelihood -= self.KL_divergence()
|
|
|
|
|
|
|
|
|
|
dL_dmu, dL_dS = self.dL_dmuS()
|
|
|
|
|
|
|
|
|
|
# dL:
|
|
|
|
|
self.q.mean.gradient = dL_dmu
|
|
|
|
|
self.q.variance.gradient = dL_dS
|
|
|
|
|
|
|
|
|
|
# dKL:
|
|
|
|
|
self.q.mean.gradient -= self.X
|
|
|
|
|
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
import numpy as np
|
|
|
|
|
X = np.random.randn(20,2)
|
|
|
|
|
W = np.linspace(0,1,10)[None,:]
|
|
|
|
|
Y = (X*W).sum(1)
|
|
|
|
|
missing = np.random.binomial(1,.1,size=Y.shape)
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
2013-06-05 14:11:49 +01:00
|
|
|
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
|
|
|
|
"""
|
|
|
|
|
objective function for fitting the latent variables for test points
|
|
|
|
|
(negative log-likelihood: should be minimised!)
|
|
|
|
|
"""
|
|
|
|
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
|
|
|
|
S = np.exp(log_S)
|
|
|
|
|
|
|
|
|
|
psi0 = kern.psi0(Z, mu, S)
|
|
|
|
|
psi1 = kern.psi1(Z, mu, S)
|
|
|
|
|
psi2 = kern.psi2(Z, mu, S)
|
|
|
|
|
|
|
|
|
|
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
|
|
|
|
|
|
|
|
|
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
|
|
|
|
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
|
|
|
|
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
|
|
|
|
|
|
|
|
|
dmu = mu0 + mu1 + mu2 - mu
|
|
|
|
|
# dS = S0 + S1 + S2 -0.5 + .5/S
|
|
|
|
|
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
|
|
|
|
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
|
|
|
|
|
|
|
|
|
|
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
|
|
|
|
"""
|
|
|
|
|
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
|
|
|
|
|
This is the same as latent_cost_and_grad but only for the objective
|
|
|
|
|
"""
|
|
|
|
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
|
|
|
|
S = np.exp(log_S)
|
|
|
|
|
|
|
|
|
|
psi0 = kern.psi0(Z, mu, S)
|
|
|
|
|
psi1 = kern.psi1(Z, mu, S)
|
|
|
|
|
psi2 = kern.psi2(Z, mu, S)
|
|
|
|
|
|
|
|
|
|
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
|
|
|
|
return -float(lik)
|
|
|
|
|
|
|
|
|
|
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
|
|
|
|
"""
|
|
|
|
|
This is the same as latent_cost_and_grad but only for the grad
|
|
|
|
|
"""
|
|
|
|
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
|
|
|
|
S = np.exp(log_S)
|
|
|
|
|
|
|
|
|
|
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
|
|
|
|
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
|
|
|
|
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
|
|
|
|
|
|
|
|
|
dmu = mu0 + mu1 + mu2 - mu
|
|
|
|
|
# dS = S0 + S1 + S2 -0.5 + .5/S
|
|
|
|
|
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
|
|
|
|
|
|
|
|
|
return -np.hstack((dmu.flatten(), dlnS.flatten()))
|