mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-04 01:02:39 +02:00
[vardtc] missing data handling and stochastic update in d
This commit is contained in:
parent
3358d06e42
commit
26396939e5
6 changed files with 124 additions and 37 deletions
|
|
@ -10,6 +10,8 @@ from parameterization.variational import VariationalPosterior
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from GPy.inference.latent_function_inference.posterior import Posterior
|
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||||
|
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
|
||||||
|
SparseGPMissing
|
||||||
logger = logging.getLogger("sparse gp")
|
logger = logging.getLogger("sparse gp")
|
||||||
|
|
||||||
class SparseGP(GP):
|
class SparseGP(GP):
|
||||||
|
|
@ -37,12 +39,7 @@ class SparseGP(GP):
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||||
name='sparse gp', Y_metadata=None, normalizer=False,
|
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||||
missing_data=False):
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
|
|
||||||
self.missing_data = missing_data
|
|
||||||
if self.missing_data:
|
|
||||||
self.ninan = ~np.isnan(Y)
|
|
||||||
|
|
||||||
#pick a sensible inference method
|
#pick a sensible inference method
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
if isinstance(likelihood, likelihoods.Gaussian):
|
if isinstance(likelihood, likelihoods.Gaussian):
|
||||||
|
|
@ -56,6 +53,22 @@ class SparseGP(GP):
|
||||||
self.num_inducing = Z.shape[0]
|
self.num_inducing = Z.shape[0]
|
||||||
|
|
||||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||||
|
self.missing_data = missing_data
|
||||||
|
|
||||||
|
if stochastic and missing_data:
|
||||||
|
self.missing_data = True
|
||||||
|
self.ninan = ~np.isnan(Y)
|
||||||
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
|
elif stochastic and not missing_data:
|
||||||
|
self.missing_data = False
|
||||||
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
|
elif missing_data:
|
||||||
|
self.missing_data = True
|
||||||
|
self.ninan = ~np.isnan(Y)
|
||||||
|
self.stochastics = SparseGPMissing(self)
|
||||||
|
else:
|
||||||
|
self.stochastics = False
|
||||||
|
|
||||||
logger.info("Adding Z as parameter")
|
logger.info("Adding Z as parameter")
|
||||||
self.link_parameter(self.Z, index=0)
|
self.link_parameter(self.Z, index=0)
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
|
|
@ -71,6 +84,7 @@ class SparseGP(GP):
|
||||||
print message,
|
print message,
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
|
self.posterior = None
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return isinstance(self.X, VariationalPosterior)
|
return isinstance(self.X, VariationalPosterior)
|
||||||
|
|
@ -156,7 +170,31 @@ class SparseGP(GP):
|
||||||
|
|
||||||
value_indices:
|
value_indices:
|
||||||
dictionary holding indices for the update in full_values.
|
dictionary holding indices for the update in full_values.
|
||||||
if the key exists the update rule is:
|
if the key exists the update rule is:def df(x):
|
||||||
|
m.stochastics.do_stochastics()
|
||||||
|
grads = m._grads(x)
|
||||||
|
print '\r',
|
||||||
|
message = "Lik: {: 6.4E} Grad: {: 6.4E} Dim: {} Lik: {} Len: {!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), m.stochastics.d, float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values]))
|
||||||
|
print message,
|
||||||
|
return grads
|
||||||
|
|
||||||
|
def grad_stop(threshold):
|
||||||
|
def inner(args):
|
||||||
|
g = args['gradient']
|
||||||
|
return np.sqrt(np.einsum('i,i->',g,g)) < threshold
|
||||||
|
return inner
|
||||||
|
|
||||||
|
def maxiter_stop(maxiter):
|
||||||
|
def inner(args):
|
||||||
|
return args['n_iter'] == maxiter
|
||||||
|
return inner
|
||||||
|
|
||||||
|
def optimize(m, maxiter=1000):
|
||||||
|
#opt = climin.RmsProp(m.optimizer_array.copy(), df, 1e-6, decay=0.9, momentum=0.9, step_adapt=1e-7)
|
||||||
|
opt = climin.Adadelta(m.optimizer_array.copy(), df, 1e-2, decay=0.9)
|
||||||
|
ret = opt.minimize_until((grad_stop(.1), maxiter_stop(maxiter)))
|
||||||
|
print
|
||||||
|
return ret
|
||||||
full_values[key][value_indices[key]] += current_values[key]
|
full_values[key][value_indices[key]] += current_values[key]
|
||||||
"""
|
"""
|
||||||
for key in current_values.keys():
|
for key in current_values.keys():
|
||||||
|
|
@ -206,19 +244,26 @@ class SparseGP(GP):
|
||||||
def _outer_loop_for_missing_data(self):
|
def _outer_loop_for_missing_data(self):
|
||||||
Lm = None
|
Lm = None
|
||||||
dL_dKmm = None
|
dL_dKmm = None
|
||||||
Kmm = None
|
|
||||||
|
|
||||||
self._log_marginal_likelihood = 0
|
self._log_marginal_likelihood = 0
|
||||||
full_values = self._outer_init_full_values()
|
full_values = self._outer_init_full_values()
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||||
m_f = lambda i: "Inference with missing data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
else:
|
||||||
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
|
if not self.stochastics:
|
||||||
|
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
||||||
message = m_f(-1)
|
message = m_f(-1)
|
||||||
print message,
|
print message,
|
||||||
for d in xrange(self.output_dim):
|
|
||||||
|
for d in self.stochastics.d:
|
||||||
ninan = self.ninan[:, d]
|
ninan = self.ninan[:, d]
|
||||||
|
|
||||||
|
if not self.stochastics:
|
||||||
print ' '*(len(message)) + '\r',
|
print ' '*(len(message)) + '\r',
|
||||||
message = m_f(d)
|
message = m_f(d)
|
||||||
print message,
|
print message,
|
||||||
|
|
@ -236,19 +281,50 @@ class SparseGP(GP):
|
||||||
|
|
||||||
Lm = posterior.K_chol
|
Lm = posterior.K_chol
|
||||||
dL_dKmm = grad_dict['dL_dKmm']
|
dL_dKmm = grad_dict['dL_dKmm']
|
||||||
Kmm = posterior._K
|
|
||||||
woodbury_inv[:, :, d] = posterior.woodbury_inv
|
woodbury_inv[:, :, d] = posterior.woodbury_inv
|
||||||
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
|
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
|
||||||
self._log_marginal_likelihood += log_marginal_likelihood
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
|
if not self.stochastics:
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||||
K=Kmm, mean=None, cov=None, K_chol=Lm)
|
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||||
self._outer_values_update(full_values)
|
self._outer_values_update(full_values)
|
||||||
|
|
||||||
|
def _outer_loop_without_missing_data(self):
|
||||||
|
self._log_marginal_likelihood = 0
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
|
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||||
|
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||||
|
else:
|
||||||
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
|
d = self.stochastics.d
|
||||||
|
posterior, log_marginal_likelihood, \
|
||||||
|
grad_dict, current_values, _ = self._inner_parameters_changed(
|
||||||
|
self.kern, self.X,
|
||||||
|
self.Z, self.likelihood,
|
||||||
|
self.Y_normalized[:, d], self.Y_metadata)
|
||||||
|
self.grad_dict = grad_dict
|
||||||
|
|
||||||
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
|
|
||||||
|
self._outer_values_update(current_values)
|
||||||
|
|
||||||
|
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||||
|
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||||
|
if self.posterior is None:
|
||||||
|
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||||
|
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
self._outer_loop_for_missing_data()
|
self._outer_loop_for_missing_data()
|
||||||
|
elif self.stochastics:
|
||||||
|
self._outer_loop_without_missing_data()
|
||||||
else:
|
else:
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||||
self._outer_values_update(full_values)
|
self._outer_values_update(full_values)
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,8 @@ class SparseGP_MPI(SparseGP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False, missing_data=False):
|
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False,
|
||||||
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
self._IN_OPTIMIZATION_ = False
|
self._IN_OPTIMIZATION_ = False
|
||||||
if mpi_comm != None:
|
if mpi_comm != None:
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -42,7 +43,8 @@ class SparseGP_MPI(SparseGP):
|
||||||
else:
|
else:
|
||||||
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
|
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
|
||||||
|
|
||||||
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer, missing_data=missing_data)
|
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer,
|
||||||
|
missing_data=missing_data, stochastic=stochastic, batchsize=batchsize)
|
||||||
self.update_model(False)
|
self.update_model(False)
|
||||||
self.link_parameter(self.X, index=0)
|
self.link_parameter(self.X, index=0)
|
||||||
if variational_prior is not None:
|
if variational_prior is not None:
|
||||||
|
|
|
||||||
|
|
@ -208,12 +208,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
|
||||||
Q_signal = 4
|
Q_signal = 4
|
||||||
import GPy
|
import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(0)
|
np.random.seed(3000)
|
||||||
|
|
||||||
k = GPy.kern.Matern32(Q_signal, 1., lengthscale=np.random.uniform(1,6,Q_signal), ARD=1)
|
k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
|
||||||
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
|
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
|
||||||
K = k.K(t)
|
K = k.K(t)
|
||||||
s1, s2, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
|
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
|
||||||
|
|
||||||
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
|
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
|
||||||
|
|
||||||
|
|
@ -360,7 +360,6 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
):
|
):
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
|
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
|
||||||
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
|
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None,
|
Z=None, kernel=None, inference_method=None, likelihood=None,
|
||||||
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||||
missing_data=False):
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
self.mpi_comm = mpi_comm
|
self.mpi_comm = mpi_comm
|
||||||
self.__IN_OPTIMIZATION__ = False
|
self.__IN_OPTIMIZATION__ = False
|
||||||
|
|
||||||
|
|
@ -77,7 +77,8 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
name=name, inference_method=inference_method,
|
name=name, inference_method=inference_method,
|
||||||
normalizer=normalizer, mpi_comm=mpi_comm,
|
normalizer=normalizer, mpi_comm=mpi_comm,
|
||||||
variational_prior=self.variational_prior,
|
variational_prior=self.variational_prior,
|
||||||
missing_data=missing_data)
|
missing_data=missing_data, stochastic=stochastic,
|
||||||
|
batchsize=batchsize)
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
|
|
@ -90,7 +91,12 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
|
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
|
||||||
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
|
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
|
||||||
|
|
||||||
log_marginal_likelihood -= self.variational_prior.KL_divergence(X)
|
kl_fctr = 1.
|
||||||
|
if self.missing_data:
|
||||||
|
d = self.output_dim
|
||||||
|
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||||
|
else:
|
||||||
|
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||||
|
|
||||||
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||||
variational_posterior=X,
|
variational_posterior=X,
|
||||||
|
|
@ -104,8 +110,12 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
X.variance.gradient[:] = 0
|
X.variance.gradient[:] = 0
|
||||||
|
|
||||||
self.variational_prior.update_gradients_KL(X)
|
self.variational_prior.update_gradients_KL(X)
|
||||||
current_values['meangrad'] += X.mean.gradient
|
if self.missing_data:
|
||||||
current_values['vargrad'] += X.variance.gradient
|
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||||
|
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||||
|
else:
|
||||||
|
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||||
|
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||||
|
|
||||||
if subset_indices is not None:
|
if subset_indices is not None:
|
||||||
value_indices['meangrad'] = subset_indices['samples']
|
value_indices['meangrad'] = subset_indices['samples']
|
||||||
|
|
|
||||||
|
|
@ -139,16 +139,16 @@ class MiscTests(unittest.TestCase):
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[''] = m.kern[:]
|
m2.kern[''] = m.kern[:]
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[:] = m.kern[:]
|
m2.kern[:] = m.kern[:]
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[''] = m.kern['']
|
m2.kern[''] = m.kern['']
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[:] = m.kern[''].values()
|
m2.kern[:] = m.kern[''].values()
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
|
|
||||||
def test_big_model(self):
|
def test_big_model(self):
|
||||||
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ def initialize_latent(init, input_dim, Y):
|
||||||
p = pca(Y)
|
p = pca(Y)
|
||||||
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
var = p.fracs[:input_dim]
|
var = .1*p.fracs[:input_dim]
|
||||||
else:
|
else:
|
||||||
var = Xr.var(0)
|
var = Xr.var(0)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue