[missing data] general implementation for subsetting data

This commit is contained in:
Max Zwiessele 2014-10-08 12:03:51 +01:00
parent d93fac8c13
commit fa7807ee6f
7 changed files with 329 additions and 108 deletions

View file

@ -9,6 +9,7 @@ from .. import likelihoods
from parameterization.variational import VariationalPosterior
import logging
from GPy.inference.latent_function_inference.posterior import Posterior
logger = logging.getLogger("sparse gp")
class SparseGP(GP):
@ -34,12 +35,18 @@ class SparseGP(GP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False):
self.missing_data = missing_data
if self.missing_data:
self.ninan = ~np.isnan(Y)
#pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC()
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
@ -51,44 +58,200 @@ class SparseGP(GP):
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print message,
for d in xrange(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print ' '*(len(message)+1) + '\r',
message = m_f(d)
print message,
print ''
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior):
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
"""
This is the standard part, which usually belongs in parameters_changed.
For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
loop, in order to ensure a different handling of gradients etc of different
subsets of data.
The dict in current_values will be passed aroung as current_values for
the rest of the algorithm, so this is the place to store current values,
such as subsets etc, if necessary.
If Lm and dL_dKmm can be precomputed (or only need to be computed once)
pass them in here, so they will be passed to the inference_method.
subset_indices is a dictionary of indices. you can put the indices however you
like them into this dictionary for inner use of the indices inside the
algorithm.
"""
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = self.grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += target
dL_dKmm = grad_dict['dL_dKmm']
kern.update_gradients_full(dL_dKmm, Z, None)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_expectations(variational_posterior=X,
Z=Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'],
Z=self.Z,
variational_posterior=self.X)
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
current_values['Zgrad'] += kern.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
Z=Z,
variational_posterior=X)
else:
#gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
target = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
target += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += target
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
current_values['kerngrad'] += kern.gradient
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
"""
This is for automatic updates of values in the inner loop of missing
data handling. Both arguments are dictionaries and the values in
full_values will be updated by the current_gradients.
If a key from current_values does not exist in full_values, it will be
initialized to the value in current_values.
If there is indices needed for the update, value_indices can be used for
that. If value_indices has the same key, as current_values, the update
in full_values will be indexed by the indices in value_indices.
grads:
dictionary of standing gradients (you will have to carefully make sure, that
the ordering is right!). The values in here will be updated such that
full_values[key] += current_values[key] forall key in full_gradients.keys()
gradients:
dictionary of gradients in the current set of parameters.
value_indices:
dictionary holding indices for the update in full_values.
if the key exists the update rule is:
full_values[key][value_indices[key]] += current_values[key]
"""
for key in current_values.keys():
if value_indices is not None and value_indices.has_key(key):
index = value_indices[key]
else:
index = slice(None)
if full_values.has_key(key):
full_values[key][index] += current_values[key]
else:
full_values[key] = current_values[key]
def _inner_values_update(self, current_values):
"""
This exists if there is more to do with the current values.
It will be called allways in the inner loop, so that
you can do additional inner updates for the inside of the missing data
loop etc. This can also be used for stochastic updates, when only working on
one dimension of the output.
"""
pass
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
self.likelihood.gradient = full_values['likgrad']
self.kern.gradient = full_values['kerngrad']
self.Z.gradient = full_values['Zgrad']
def _outer_init_full_values(self):
"""
If full_values has indices in values_indices, we might want to initialize
the full_values differently, so that subsetting is possible.
Here you can initialize the full_values for the values needed.
Keep in mind, that if a key does not exist in full_values when updating
values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing)
"""
return {}
def _outer_loop_for_missing_data(self):
Lm = None
dL_dKmm = None
Kmm = None
self._log_marginal_likelihood = 0
full_values = self._outer_init_full_values()
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
m_f = lambda i: "Inference with missing data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print message,
for d in xrange(self.output_dim):
ninan = self.ninan[:, d]
print ' '*(len(message)) + '\r',
message = m_f(d)
print message,
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
self._inner_take_over_or_update(full_values, current_values, value_indices)
self._inner_values_update(current_values)
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
Kmm = posterior._K
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
print ''
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=Kmm, mean=None, cov=None, K_chol=Lm)
self._outer_values_update(full_values)
def parameters_changed(self):
if self.missing_data:
self._outer_loop_for_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, gradients, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self.kern.gradient = gradients['kerngrad']
self.Z.gradient = gradients['Zgrad']
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""