From 26396939e530ff39ce3cc5e3077c094eef8871b0 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 16 Oct 2014 12:52:17 +0100 Subject: [PATCH] [vardtc] missing data handling and stochastic update in d --- GPy/core/sparse_gp.py | 118 +++++++++++++++++++---- GPy/core/sparse_gp_mpi.py | 6 +- GPy/examples/dimensionality_reduction.py | 7 +- GPy/models/bayesian_gplvm.py | 20 +++- GPy/testing/model_tests.py | 8 +- GPy/util/initialization.py | 2 +- 6 files changed, 124 insertions(+), 37 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 8ea0d4c6..120f0d94 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -10,6 +10,8 @@ from parameterization.variational import VariationalPosterior import logging from GPy.inference.latent_function_inference.posterior import Posterior +from GPy.inference.optimization.stochastics import SparseGPStochastics,\ + SparseGPMissing logger = logging.getLogger("sparse gp") class SparseGP(GP): @@ -37,12 +39,7 @@ class SparseGP(GP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False, - missing_data=False): - - self.missing_data = missing_data - if self.missing_data: - self.ninan = ~np.isnan(Y) - + missing_data=False, stochastic=False, batchsize=1): #pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): @@ -56,6 +53,22 @@ class SparseGP(GP): self.num_inducing = Z.shape[0] GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) + self.missing_data = missing_data + + if stochastic and missing_data: + self.missing_data = True + self.ninan = ~np.isnan(Y) + self.stochastics = SparseGPStochastics(self, batchsize) + elif stochastic and not missing_data: + self.missing_data = False + self.stochastics = SparseGPStochastics(self, batchsize) + elif missing_data: + self.missing_data = True + self.ninan = ~np.isnan(Y) + self.stochastics = SparseGPMissing(self) + else: + self.stochastics = False + logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) if self.missing_data: @@ -71,6 +84,7 @@ class SparseGP(GP): print message, print '' + self.posterior = None def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) @@ -156,7 +170,31 @@ class SparseGP(GP): value_indices: dictionary holding indices for the update in full_values. - if the key exists the update rule is: + if the key exists the update rule is:def df(x): + m.stochastics.do_stochastics() + grads = m._grads(x) + print '\r', + message = "Lik: {: 6.4E} Grad: {: 6.4E} Dim: {} Lik: {} Len: {!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), m.stochastics.d, float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values])) + print message, + return grads + +def grad_stop(threshold): + def inner(args): + g = args['gradient'] + return np.sqrt(np.einsum('i,i->',g,g)) < threshold + return inner + +def maxiter_stop(maxiter): + def inner(args): + return args['n_iter'] == maxiter + return inner + +def optimize(m, maxiter=1000): + #opt = climin.RmsProp(m.optimizer_array.copy(), df, 1e-6, decay=0.9, momentum=0.9, step_adapt=1e-7) + opt = climin.Adadelta(m.optimizer_array.copy(), df, 1e-2, decay=0.9) + ret = opt.minimize_until((grad_stop(.1), maxiter_stop(maxiter))) + print + return ret full_values[key][value_indices[key]] += current_values[key] """ for key in current_values.keys(): @@ -206,22 +244,29 @@ class SparseGP(GP): def _outer_loop_for_missing_data(self): Lm = None dL_dKmm = None - Kmm = None self._log_marginal_likelihood = 0 full_values = self._outer_init_full_values() - woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) - woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) - m_f = lambda i: "Inference with missing data: {: >7.2%}".format(float(i+1)/self.output_dim) - message = m_f(-1) - print message, - for d in xrange(self.output_dim): + if self.posterior is None: + woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) + woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) + else: + woodbury_inv = self.posterior._woodbury_inv + woodbury_vector = self.posterior._woodbury_vector + + if not self.stochastics: + m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim) + message = m_f(-1) + print message, + + for d in self.stochastics.d: ninan = self.ninan[:, d] - print ' '*(len(message)) + '\r', - message = m_f(d) - print message, + if not self.stochastics: + print ' '*(len(message)) + '\r', + message = m_f(d) + print message, posterior, log_marginal_likelihood, \ grad_dict, current_values, value_indices = self._inner_parameters_changed( @@ -236,19 +281,50 @@ class SparseGP(GP): Lm = posterior.K_chol dL_dKmm = grad_dict['dL_dKmm'] - Kmm = posterior._K woodbury_inv[:, :, d] = posterior.woodbury_inv woodbury_vector[:, d:d+1] = posterior.woodbury_vector self._log_marginal_likelihood += log_marginal_likelihood - print '' + if not self.stochastics: + print '' - self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, - K=Kmm, mean=None, cov=None, K_chol=Lm) + if self.posterior is None: + self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, + K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) self._outer_values_update(full_values) + def _outer_loop_without_missing_data(self): + self._log_marginal_likelihood = 0 + + if self.posterior is None: + woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) + woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) + else: + woodbury_inv = self.posterior._woodbury_inv + woodbury_vector = self.posterior._woodbury_vector + + d = self.stochastics.d + posterior, log_marginal_likelihood, \ + grad_dict, current_values, _ = self._inner_parameters_changed( + self.kern, self.X, + self.Z, self.likelihood, + self.Y_normalized[:, d], self.Y_metadata) + self.grad_dict = grad_dict + + self._log_marginal_likelihood += log_marginal_likelihood + + self._outer_values_update(current_values) + + woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None] + woodbury_vector[:, d] = posterior.woodbury_vector + if self.posterior is None: + self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, + K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) + def parameters_changed(self): if self.missing_data: self._outer_loop_for_missing_data() + elif self.stochastics: + self._outer_loop_without_missing_data() else: self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self._outer_values_update(full_values) diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py index d0008e58..52f75d45 100644 --- a/GPy/core/sparse_gp_mpi.py +++ b/GPy/core/sparse_gp_mpi.py @@ -34,7 +34,8 @@ class SparseGP_MPI(SparseGP): """ - def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False, missing_data=False): + def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False, + missing_data=False, stochastic=False, batchsize=1): self._IN_OPTIMIZATION_ = False if mpi_comm != None: if inference_method is None: @@ -42,7 +43,8 @@ class SparseGP_MPI(SparseGP): else: assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!' - super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer, missing_data=missing_data) + super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer, + missing_data=missing_data, stochastic=stochastic, batchsize=batchsize) self.update_model(False) self.link_parameter(self.X, index=0) if variational_prior is not None: diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 94e83ff1..615e215c 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -208,12 +208,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False): Q_signal = 4 import GPy import numpy as np - np.random.seed(0) + np.random.seed(3000) - k = GPy.kern.Matern32(Q_signal, 1., lengthscale=np.random.uniform(1,6,Q_signal), ARD=1) + k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1) t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T K = k.K(t) - s1, s2, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None] + s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None] Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS) @@ -360,7 +360,6 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, ): from GPy import kern from GPy.models import BayesianGPLVM - from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 67cb6e62..44dc7f35 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -27,7 +27,7 @@ class BayesianGPLVM(SparseGP_MPI): def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None, - missing_data=False): + missing_data=False, stochastic=False, batchsize=1): self.mpi_comm = mpi_comm self.__IN_OPTIMIZATION__ = False @@ -77,7 +77,8 @@ class BayesianGPLVM(SparseGP_MPI): name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior, - missing_data=missing_data) + missing_data=missing_data, stochastic=stochastic, + batchsize=batchsize) def set_X_gradients(self, X, X_grad): """Set the gradients of the posterior distribution of X in its specific form.""" @@ -90,7 +91,12 @@ class BayesianGPLVM(SparseGP_MPI): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None): posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices) - log_marginal_likelihood -= self.variational_prior.KL_divergence(X) + kl_fctr = 1. + if self.missing_data: + d = self.output_dim + log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d + else: + log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X) current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( variational_posterior=X, @@ -104,8 +110,12 @@ class BayesianGPLVM(SparseGP_MPI): X.variance.gradient[:] = 0 self.variational_prior.update_gradients_KL(X) - current_values['meangrad'] += X.mean.gradient - current_values['vargrad'] += X.variance.gradient + if self.missing_data: + current_values['meangrad'] += kl_fctr*X.mean.gradient/d + current_values['vargrad'] += kl_fctr*X.variance.gradient/d + else: + current_values['meangrad'] += kl_fctr*X.mean.gradient + current_values['vargrad'] += kl_fctr*X.variance.gradient if subset_indices is not None: value_indices['meangrad'] = subset_indices['samples'] diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index c87a44b1..d05e33de 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -139,16 +139,16 @@ class MiscTests(unittest.TestCase): np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) m.kern.randomize() m2.kern[''] = m.kern[:] - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood()) m.kern.randomize() m2.kern[:] = m.kern[:] - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood()) m.kern.randomize() m2.kern[''] = m.kern[''] - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood()) m.kern.randomize() m2.kern[:] = m.kern[''].values() - np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) + np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood()) def test_big_model(self): m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0) diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 908da023..61837606 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -13,7 +13,7 @@ def initialize_latent(init, input_dim, Y): p = pca(Y) PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC - var = p.fracs[:input_dim] + var = .1*p.fracs[:input_dim] else: var = Xr.var(0)