mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
[merge] for spgp minibatch and psi NxMxM
This commit is contained in:
commit
9ddec5bc70
11 changed files with 324 additions and 257 deletions
|
|
@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
|
|||
import logging
|
||||
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
|
||||
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||
"""
|
||||
|
|
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||
return X.mean.gradient, X.variance.gradient
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=X,
|
||||
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
else:
|
||||
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
|
||||
if subset_indices is not None:
|
||||
value_indices['Xgrad'] = subset_indices['samples']
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if self.has_uncertain_inputs():
|
||||
if self.missing_data:
|
||||
d = self.output_dim
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||
else:
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||
|
||||
# Subsetting Variational Posterior objects, makes the gradients
|
||||
# empty. We need them to be 0 though:
|
||||
X.mean.gradient[:] = 0
|
||||
X.variance.gradient[:] = 0
|
||||
|
||||
self.variational_prior.update_gradients_KL(X)
|
||||
if self.missing_data:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||
else:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||
|
||||
if subset_indices is not None:
|
||||
value_indices['meangrad'] = subset_indices['samples']
|
||||
value_indices['vargrad'] = subset_indices['samples']
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _outer_values_update(self, full_values):
|
||||
"""
|
||||
|
|
@ -128,20 +93,46 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""
|
||||
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient = full_values['meangrad']
|
||||
self.X.variance.gradient = full_values['vargrad']
|
||||
meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
|
||||
self.X.mean.gradient[:] = 0
|
||||
self.X.variance.gradient[:] = 0
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
|
||||
else:
|
||||
d = self.output_dim
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
|
||||
self.X.mean.gradient += meangrad_tmp
|
||||
self.X.variance.gradient += vargrad_tmp
|
||||
|
||||
else:
|
||||
self.X.gradient = full_values['Xgrad']
|
||||
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
if self.has_uncertain_inputs():
|
||||
return dict(meangrad=np.zeros(self.X.mean.shape),
|
||||
vargrad=np.zeros(self.X.variance.shape))
|
||||
else:
|
||||
return dict(Xgrad=np.zeros(self.X.shape))
|
||||
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||
return full_values
|
||||
|
||||
def parameters_changed(self):
|
||||
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
||||
kl_fctr = self.kl_factr
|
||||
if self.missing_data or not self.stochastics:
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
elif self.stochastics:
|
||||
d = self.output_dim
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
|
||||
if stochastic and missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif stochastic and not missing_data:
|
||||
self.missing_data = False
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPMissing(self)
|
||||
|
|
@ -81,7 +81,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
|
||||
"""
|
||||
This is the standard part, which usually belongs in parameters_changed.
|
||||
|
||||
|
|
@ -100,47 +100,13 @@ class SparseGPMiniBatch(SparseGP):
|
|||
like them into this dictionary for inner use of the indices inside the
|
||||
algorithm.
|
||||
"""
|
||||
try:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
|
||||
except:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
|
||||
current_values = {}
|
||||
likelihood.update_gradients(grad_dict['dL_dthetaL'])
|
||||
current_values['likgrad'] = likelihood.gradient.copy()
|
||||
if subset_indices is None:
|
||||
subset_indices = {}
|
||||
if isinstance(X, VariationalPosterior):
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
kern.update_gradients_full(dL_dKmm, Z, None)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_expectations(variational_posterior=X,
|
||||
Z=Z,
|
||||
dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
|
||||
current_values['Zgrad'] += kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi0'],
|
||||
grad_dict['dL_dpsi1'],
|
||||
grad_dict['dL_dpsi2'],
|
||||
Z=Z,
|
||||
variational_posterior=X)
|
||||
if psi2 is None:
|
||||
psi2_sum_n = None
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
|
||||
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
|
||||
psi2_sum_n = psi2.sum(axis=0)
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||
"""
|
||||
|
|
@ -174,7 +140,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
else:
|
||||
index = slice(None)
|
||||
if key in full_values:
|
||||
full_values[key][index] += current_values[key]
|
||||
try:
|
||||
full_values[key][index] += current_values[key]
|
||||
except:
|
||||
full_values[key] += current_values[key]
|
||||
else:
|
||||
full_values[key] = current_values[key]
|
||||
|
||||
|
|
@ -193,9 +162,43 @@ class SparseGPMiniBatch(SparseGP):
|
|||
Here you put the values, which were collected before in the right places.
|
||||
E.g. set the gradients of parameters, etc.
|
||||
"""
|
||||
self.likelihood.gradient = full_values['likgrad']
|
||||
self.kern.gradient = full_values['kerngrad']
|
||||
self.Z.gradient = full_values['Zgrad']
|
||||
if self.has_uncertain_inputs():
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = full_values['dL_dKmm']
|
||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
self.kern.gradient += kgrad
|
||||
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
||||
kgrad += self.kern.gradient
|
||||
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
||||
self.kern.gradient += kgrad
|
||||
#kgrad += self.kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
||||
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
||||
|
||||
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
"""
|
||||
|
|
@ -210,7 +213,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
to initialize the gradients for the mean and the variance in order to
|
||||
have the full gradient for indexing)
|
||||
"""
|
||||
return {}
|
||||
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||
if self.has_uncertain_inputs():
|
||||
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
|
||||
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
|
||||
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
|
||||
else:
|
||||
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
|
||||
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
|
||||
return retd
|
||||
|
||||
def _outer_loop_for_missing_data(self):
|
||||
Lm = None
|
||||
|
|
@ -232,28 +243,36 @@ class SparseGPMiniBatch(SparseGP):
|
|||
print(message, end=' ')
|
||||
|
||||
for d, ninan in self.stochastics.d:
|
||||
|
||||
if not self.stochastics:
|
||||
print(' '*(len(message)) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print(message, end=' ')
|
||||
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||
psi0ni = self.psi0[ninan]
|
||||
psi1ni = self.psi1[ninan]
|
||||
if self.has_uncertain_inputs():
|
||||
psi2ni = self.psi2[ninan]
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
|
||||
else:
|
||||
psi2ni = None
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||
|
||||
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
|
||||
self.kern, self.X[ninan],
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[ninan][:, d], self.Y_metadata,
|
||||
Lm, dL_dKmm,
|
||||
subset_indices=dict(outputs=d, samples=ninan))
|
||||
psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
|
||||
|
||||
self._inner_take_over_or_update(self.full_values, current_values, value_indices)
|
||||
self._inner_values_update(current_values)
|
||||
# Fill out the full values by adding in the apporpriate grad_dict
|
||||
# values
|
||||
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
||||
self._inner_values_update(grad_dict) # What is this for? -> MRD
|
||||
|
||||
Lm = posterior.K_chol
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
|
||||
if not self.stochastics:
|
||||
print('')
|
||||
|
||||
|
|
@ -261,10 +280,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
self._outer_values_update(self.full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.kern.return_psi2_n = False
|
||||
|
||||
def _outer_loop_without_missing_data(self):
|
||||
self._log_marginal_likelihood = 0
|
||||
|
||||
if self.posterior is None:
|
||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||
|
|
@ -272,17 +291,16 @@ class SparseGPMiniBatch(SparseGP):
|
|||
woodbury_inv = self.posterior._woodbury_inv
|
||||
woodbury_vector = self.posterior._woodbury_vector
|
||||
|
||||
d = self.stochastics.d
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, self.full_values, _ = self._inner_parameters_changed(
|
||||
d = self.stochastics.d[0][0]
|
||||
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
||||
self.kern, self.X,
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[:, d], self.Y_metadata)
|
||||
self.grad_dict = grad_dict
|
||||
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
self._log_marginal_likelihood = log_marginal_likelihood
|
||||
|
||||
self._outer_values_update(self.full_values)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
|
|
@ -291,10 +309,23 @@ class SparseGPMiniBatch(SparseGP):
|
|||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
|
||||
def parameters_changed(self):
|
||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||
if self.has_uncertain_inputs():
|
||||
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||
self.psi0 = self.kern.psi0(self.Z, self.X)
|
||||
self.psi1 = self.kern.psi1(self.Z, self.X)
|
||||
self.psi2 = self.kern.psi2n(self.Z, self.X)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.X, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
if self.missing_data:
|
||||
self._outer_loop_for_missing_data()
|
||||
elif self.stochastics:
|
||||
self._outer_loop_without_missing_data()
|
||||
else:
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.full_values)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue