diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 00a80c7b..7677fea2 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -58,11 +58,33 @@ class SparseGP(GP): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y) self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) if isinstance(self.X, VariationalPosterior): - self.kern.update_gradients_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) - self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) + #gradients wrt kernel + dL_dKmm = self.grad_dict.pop('dL_dKmm') + self.kern.update_gradients_full(dL_dKmm, self.Z, None) + target = np.zeros(self.kern.size) + self.kern._collect_gradient(target) + self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) + self.kern._collect_gradient(target) + self.kern._set_gradient(target) + + #gradients wrt Z + self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) + self.Z.gradient += self.kern.gradients_Z_expectations( + self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpis2'], Z=self.Z, variational_posterior=self.X) else: - self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) - self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) + #gradients wrt kernel + target = np.zeros(self.kern.size) + self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) + self.kern._collect_gradient(target) + self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z) + self.kern._collect_gradient(target) + self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) + self.kern._collect_gradient(target) + self.kern._set_gradient(target) + + #gradients wrt Z + self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 1022124d..3e52d5af 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -101,7 +101,7 @@ class Add(Kern): raise NotImplementedError, "psi2 cannot be computed for this kernel" return psi2 - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): from white import White from rbf import RBF #from rbf_inv import RBFInv @@ -124,10 +124,10 @@ class Add(Kern): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. - p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): from white import White from rbf import RBF #from rbf_inv import rbfinv @@ -151,10 +151,10 @@ class Add(Kern): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. - target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) return target - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): from white import white from rbf import rbf #from rbf_inv import rbfinv @@ -179,7 +179,7 @@ class Add(Kern): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. - a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) target_mu += a target_S += b return target_mu, target_S diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 6b23a69e..2e412688 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -39,28 +39,21 @@ class Kern(Parameterized): def update_gradients_full(self, dL_dK, X, X2): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - target = np.zeros(self.size) - self.update_gradients_diag(dL_dKdiag, X) - self._collect_gradient(target) - self.update_gradients_full(dL_dKnm, X, Z) - self._collect_gradient(target) - self.update_gradients_full(dL_dKmm, Z, None) - self._collect_gradient(target) - self._set_gradient(target) + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + """ + Set the gradients of all parameters when doing inference with + uncertain inputs, using expectations of the kernel. + """ + raise NotImplementedError + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + raise NotImplementedError + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + """ + Compute the gradients wrt the parameters of the variational + distruibution q(X), chain-ruling via the expectations of the kernel + """ + raise NotImplementedError - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" - raise NotImplementedError - def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - grad = self.gradients_X(dL_dKmm, Z) - grad += self.gradients_X(dL_dKnm.T, Z, X) - return grad - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - raise NotImplementedError - def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - raise NotImplementedError - def plot_ARD(self, *args, **kw): if "matplotlib" in sys.modules: from ...plotting.matplot_dep import kernel_plots @@ -68,13 +61,13 @@ class Kern(Parameterized): assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import kernel_plots return kernel_plots.plot_ARD(self,*args,**kw) - + def input_sensitivity(self): """ Returns the sensitivity for each dimension of this kernel. """ return np.zeros(self.input_dim) - + def __add__(self, other): """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 1d4f4611..e503180a 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -117,7 +117,7 @@ class Linear(Kern): ZAinner = self._ZAinner(variational_posterior, Z) return np.dot(ZAinner, ZA.T) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): mu, S = variational_posterior.mean, variational_posterior.variance # psi0: tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) @@ -130,20 +130,15 @@ class Linear(Kern): tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) if self.ARD: grad += tmp.sum(0).sum(0).sum(0) else: grad += tmp.sum() - #from Kmm - self.update_gradients_full(dL_dKmm, Z, None) - self.variances.gradient += grad - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): - # Kmm - grad = self.gradients_X(dL_dKmm, Z, None) + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): #psi1 - grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) + grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) #psi2 self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) return grad - def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) # psi0 grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index c80fb646..7c43b18d 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -55,10 +55,7 @@ class RBF(Stationary): self._psi_computations(Z, mu, S) return self._psi2 - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - #contributions from Kmm - sself.update_gradients_full(dL_dKmm, Z) - + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance self._psi_computations(Z, mu, S) @@ -87,7 +84,7 @@ class RBF(Stationary): else: self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance self._psi_computations(Z, mu, S) @@ -104,11 +101,9 @@ class RBF(Stationary): dZ = self._psi2[:, :, :, None] * (term1[None] + term2) grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - grad += self.gradients_X(dL_dKmm, Z, None) - return grad - def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance self._psi_computations(Z, mu, S) diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index f4400ed7..135e3f9e 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -25,10 +25,10 @@ class Static(Kern): def gradients_X_diag(self, dL_dKdiag, X): return np.zeros(X.shape) - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return np.zeros(Z.shape) - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) def psi0(self, Z, variational_posterior): @@ -61,8 +61,8 @@ class White(Static): def update_gradients_diag(self, dL_dKdiag, X): self.variance.gradient = dL_dKdiag.sum() - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum() + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + self.variance.gradient = dL_dpsi0.sum() class Bias(Static): @@ -86,6 +86,6 @@ class Bias(Static): ret[:] = self.variance**2 return ret - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index b998969c..2d0d284a 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -312,4 +312,8 @@ class RatQuad(Stationary): grad = np.sum(dL_dK*dK_dpow) self.power.gradient = grad + def update_gradients_diag(self, dL_dKdiag, X): + super(RatQuad, self).update_gradients_diag(dL_dKdiag, X) + self.power.gradient = 0. + diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 50fc2810..366995dc 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -66,7 +66,7 @@ class BayesianGPLVM(SparseGP): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) - self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) + self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(variational_posterior=self.X, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.X)