diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py index 603a64a5..42044b1b 100644 --- a/GPy/core/svgp.py +++ b/GPy/core/svgp.py @@ -36,12 +36,12 @@ class SVGP(SparseGP): KL_scale = 1.0 import climin.util - #Make a climin slicer to make drawing minibatches much quicker + #Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle. self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) X_batch, Y_batch = self.new_batch() #create the SVI inference method - inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale) + inf_method = svgp_inf() SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method, name=name, Y_metadata=Y_metadata, normalizer=False) @@ -53,7 +53,7 @@ class SVGP(SparseGP): self.link_parameter(self.m) def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0])) #update the kernel gradients self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z) diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py index ba36b74b..52db242c 100644 --- a/GPy/inference/latent_function_inference/svgp.py +++ b/GPy/inference/latent_function_inference/svgp.py @@ -5,11 +5,8 @@ import numpy as np from posterior import Posterior class SVGP(LatentFunctionInference): - def __init__(self, KL_scale=1., batch_scale=1.): - self.KL_scale = KL_scale - self.batch_scale = batch_scale - def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None): + def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0): num_inducing = Z.shape[0] num_data, num_outputs = Y.shape @@ -44,9 +41,6 @@ class SVGP(LatentFunctionInference): dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) - KL_scale = self.KL_scale - batch_scale = self.batch_scale - KL, dKL_dKmm, dKL_dS, dKL_dm = KL_scale*KL, KL_scale*dKL_dKmm, KL_scale*dKL_dS, KL_scale*dKL_dm #quadrature for the likelihood F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v) diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index dd9a5fe4..bff6d841 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -42,25 +42,41 @@ class Prod(CombinationKernel): return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) def update_gradients_full(self, dL_dK, X, X2=None): - k = self.K(X,X2)*dL_dK - for p in self.parts: - p.update_gradients_full(k/p.K(X,X2),X,X2) + if len(self.parts)==2: + self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2) + self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2) + else: + k = self.K(X,X2)*dL_dK + for p in self.parts: + p.update_gradients_full(k/p.K(X,X2),X,X2) def update_gradients_diag(self, dL_dKdiag, X): - k = self.Kdiag(X)*dL_dKdiag - for p in self.parts: - p.update_gradients_diag(k/p.Kdiag(X),X) + if len(self.parts)==2: + self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X) + self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X) + else: + k = self.Kdiag(X)*dL_dKdiag + for p in self.parts: + p.update_gradients_diag(k/p.Kdiag(X),X) def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape) - k = self.K(X,X2)*dL_dK - for p in self.parts: - target += p.gradients_X(k/p.K(X,X2),X,X2) + if len(self.parts)==2: + target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2) + target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2) + else: + k = self.K(X,X2)*dL_dK + for p in self.parts: + target += p.gradients_X(k/p.K(X,X2),X,X2) return target def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) - k = self.Kdiag(X)*dL_dKdiag - for p in self.parts: - target += p.gradients_X_diag(k/p.Kdiag(X),X) + if len(self.parts)==2: + target += self.parts[0].gradients_X_diag(dL_dKdiag*self.parts[1].Kdiag(X), X) + target += self.parts[1].gradients_X_diag(dL_dKdiag*self.parts[0].Kdiag(X), X) + else: + k = self.Kdiag(X)*dL_dKdiag + for p in self.parts: + target += p.gradients_X_diag(k/p.Kdiag(X),X) return target