From 5f119d09185ae64bd5474935ceffca644d598881 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Thu, 27 Aug 2015 19:08:11 +0300 Subject: [PATCH] Tidied but broken --- GPy/models/bayesian_gplvm_minibatch.py | 46 +++------- GPy/models/sparse_gp_minibatch.py | 115 ++++++++++++------------- 2 files changed, 69 insertions(+), 92 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 47c425a4..b7e0c9b2 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -80,10 +80,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """Get the gradients of the posterior distribution of X in its specific form.""" return X.mean.gradient, X.variance.gradient - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, **kw): - posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, - psi0=psi0, psi1=psi1, psi2=psi2, subset_indices=subset_indices, **kw) - return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None,psi0=None, psi1=None, psi2=None, **kw): + posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, + psi0=psi0, psi1=psi1, psi2=psi2, **kw) + return posterior, log_marginal_likelihood, grad_dict def _outer_values_update(self, full_values): """ @@ -91,27 +91,19 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): E.g. set the gradients of parameters, etc. """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) - current_values = full_values - grad_dict = current_values if self.has_uncertain_inputs(): - current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( + full_values['meangrad'], full_values['vargrad'] = self.kern.gradients_qX_expectations( variational_posterior=self.X, - Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2'], + Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2'], psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) else: - current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], self.X, self.Z) - current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], self.X) - if subset_indices is not None: - value_indices['Xgrad'] = subset_indices['samples'] + full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) + full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) kl_fctr = self.kl_factr if self.has_uncertain_inputs(): - #if self.missing_data: - #d = self.output_dim - #log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)/d - #else: self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) # Subsetting Variational Posterior objects, makes the gradients @@ -120,18 +112,9 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): self.X.variance.gradient[:] = 0 self.variational_prior.update_gradients_KL(self.X) - #if self.missing_data: - #current_values['meangrad'] += kl_fctr*self.X.mean.gradient/d - #current_values['vargrad'] += kl_fctr*self.X.variance.gradient/d - #else: - current_values['meangrad'] += kl_fctr*self.X.mean.gradient - current_values['vargrad'] += kl_fctr*self.X.variance.gradient + full_values['meangrad'] += kl_fctr*self.X.mean.gradient + full_values['vargrad'] += kl_fctr*self.X.variance.gradient - #if subset_indices is not None: - #value_indices['meangrad'] = subset_indices['samples'] - #value_indices['vargrad'] = subset_indices['samples'] - - full_values = current_values if self.has_uncertain_inputs(): self.X.mean.gradient = full_values['meangrad'] self.X.variance.gradient = full_values['vargrad'] @@ -140,11 +123,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): def _outer_init_full_values(self): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() - #if self.has_uncertain_inputs(): - #return dict(meangrad=np.zeros(self.X.mean.shape), - #vargrad=np.zeros(self.X.variance.shape)) - #else: - #return dict(Xgrad=np.zeros(self.X.shape)) return full_values def parameters_changed(self): diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index f8322b4e..eab8d8f3 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP): def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, missing_inds=None, **kwargs): + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs): """ This is the standard part, which usually belongs in parameters_changed. @@ -103,19 +103,10 @@ class SparseGPMiniBatch(SparseGP): psi2_sum_n = None else: psi2_sum_n = psi2.sum(axis=0) - posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) - current_values = {} - likelihood.update_gradients(grad_dict['dL_dthetaL']) - current_values['likgrad'] = likelihood.gradient.copy() - if subset_indices is None: - subset_indices = {} - current_values['dL_dpsi0'] = grad_dict['dL_dpsi0'] - current_values['dL_dpsi1'] = grad_dict['dL_dpsi1'] - current_values['dL_dpsi2'] = grad_dict['dL_dpsi2'] - current_values['dL_dKmm'] = grad_dict['dL_dKmm'] + posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, + dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) - #current_values = grad_dict - return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices + return posterior, log_marginal_likelihood, grad_dict def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): """ @@ -149,7 +140,10 @@ class SparseGPMiniBatch(SparseGP): else: index = slice(None) if key in full_values: - full_values[key][index] += current_values[key] + if np.isscalar(current_values[key]): + full_values[key] += current_values[key] + else: + full_values[key][index] += current_values[key] else: full_values[key] = current_values[key] @@ -168,43 +162,44 @@ class SparseGPMiniBatch(SparseGP): Here you put the values, which were collected before in the right places. E.g. set the gradients of parameters, etc. """ - grad_dict = full_values - current_values = full_values - #current_values = {} - if isinstance(self.X, VariationalPosterior): + if self.has_uncertain_inputs(): #gradients wrt kernel - dL_dKmm = grad_dict['dL_dKmm'] + dL_dKmm = full_values['dL_dKmm'] self.kern.update_gradients_full(dL_dKmm, self.Z, None) - current_values['kerngrad'] = self.kern.gradient.copy() + full_values['kerngrad'] = self.kern.gradient.copy() self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, - dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2']) - current_values['kerngrad'] += self.kern.gradient + dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2']) + full_values['kerngrad'] += self.kern.gradient #gradients wrt Z - current_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) - current_values['Zgrad'] += self.kern.gradients_Z_expectations( - grad_dict['dL_dpsi0'], - grad_dict['dL_dpsi1'], - grad_dict['dL_dpsi2'], + full_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) + full_values['Zgrad'] += self.kern.gradients_Z_expectations( + full_values['dL_dpsi0'], + full_values['dL_dpsi1'], + full_values['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel - kern.update_gradients_diag(grad_dict['dL_dKdiag'], self.X) - current_values['kerngrad'] = self.kern.gradient.copy() - kern.update_gradients_full(grad_dict['dL_dKnm'], self.X, self.Z) - current_values['kerngrad'] += kern.gradient - kern.update_gradients_full(grad_dict['dL_dKmm'], self.Z, None) - current_values['kerngrad'] += kern.gradient + self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X) + full_values['kerngrad'] = self.kern.gradient.copy() + self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z) + full_values['kerngrad'] += self.kern.gradient + self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None) + full_values['kerngrad'] += self.kern.gradient #gradients wrt Z - current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], self.Z) - current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, self.Z, self.X) - self.likelihood.gradient = current_values['likgrad'] - self.kern.gradient = current_values['kerngrad'] - self.Z.gradient = current_values['Zgrad'] + full_values['Zgrad'] = self.kern.gradients_X(full_values['dL_dKmm'], self.Z) + full_values['Zgrad'] += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X) + + self.likelihood.update_gradients(full_values['dL_dthetaL']) + full_values['likgrad'] = self.likelihood.gradient.copy() + + self.likelihood.gradient = full_values['likgrad'] + self.kern.gradient = full_values['kerngrad'] + self.Z.gradient = full_values['Zgrad'] def _outer_init_full_values(self): """ @@ -242,10 +237,18 @@ class SparseGPMiniBatch(SparseGP): print(message, end=' ') #Compute the psi statistics for N once, but don't sum out N in psi2 - self.kern.return_psi2_n = True - psi0 = self.kern.psi0(self.Z, self.X) - psi1 = self.kern.psi1(self.Z, self.X) - psi2 = self.kern.psi2(self.Z, self.X) + if self.has_uncertain_inputs(): + self.kern.return_psi2_n = True + psi0 = self.kern.psi0(self.Z, self.X) + psi1 = self.kern.psi1(self.Z, self.X) + psi2 = self.kern.psi2(self.Z, self.X) + else: + if psi0 is None: + psi0 = kern.Kdiag(X) + if psi1 is None: + psi1 = kern.K(X, Z) + psi2 = None + self.psi0 = psi0 self.psi1 = psi1 self.psi2 = psi2 @@ -259,23 +262,20 @@ class SparseGPMiniBatch(SparseGP): psi0ni = psi0[ninan] psi1ni = psi1[ninan] psi2ni = psi2[ninan] + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan) - posterior, log_marginal_likelihood, \ - grad_dict, current_values, value_indices = self._inner_parameters_changed( + posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed( self.kern, self.X[ninan], self.Z, self.likelihood, self.Y_normalized[ninan][:, d], self.Y_metadata, Lm, dL_dKmm, - subset_indices=dict(outputs=d, samples=ninan, - dL_dpsi0=ninan, - dL_dpsi1=ninan), psi0=psi0ni, psi1=psi1ni, psi2=psi2ni) - self._inner_take_over_or_update(self.full_values, current_values, value_indices) - self._inner_values_update(current_values) + # Fill out the full values by adding in the apporpriate grad_dict + # values + self._inner_take_over_or_update(self.full_values, grad_dict, value_indices) + self._inner_values_update(grad_dict) # What is this for? - Lm = posterior.K_chol - dL_dKmm = grad_dict['dL_dKmm'] woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_vector[:, d] = posterior.woodbury_vector self._log_marginal_likelihood += log_marginal_likelihood @@ -299,8 +299,7 @@ class SparseGPMiniBatch(SparseGP): woodbury_vector = self.posterior._woodbury_vector d = self.stochastics.d - posterior, log_marginal_likelihood, \ - grad_dict, self.full_values, _ = self._inner_parameters_changed( + posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed( self.kern, self.X, self.Z, self.likelihood, self.Y_normalized[:, d], self.Y_metadata) @@ -308,7 +307,7 @@ class SparseGPMiniBatch(SparseGP): self._log_marginal_likelihood += log_marginal_likelihood - self._outer_values_update(self.full_values) + self._outer_values_update(self.grad_dict) woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None] woodbury_vector[:, d] = posterior.woodbury_vector @@ -322,5 +321,5 @@ class SparseGPMiniBatch(SparseGP): elif self.stochastics: self._outer_loop_without_missing_data() else: - self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) - self._outer_values_update(self.full_values) + self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) + self._outer_values_update(self.grad_dict)