reconfigured svgp inference a little

This commit is contained in:
James Hensman 2015-02-11 11:49:45 +00:00
parent 393b9e94ba
commit 482bd1472c
3 changed files with 32 additions and 22 deletions

View file

@ -36,12 +36,12 @@ class SVGP(SparseGP):
KL_scale = 1.0 KL_scale = 1.0
import climin.util import climin.util
#Make a climin slicer to make drawing minibatches much quicker #Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle.
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
X_batch, Y_batch = self.new_batch() X_batch, Y_batch = self.new_batch()
#create the SVI inference method #create the SVI inference method
inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale) inf_method = svgp_inf()
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method, SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
name=name, Y_metadata=Y_metadata, normalizer=False) name=name, Y_metadata=Y_metadata, normalizer=False)
@ -53,7 +53,7 @@ class SVGP(SparseGP):
self.link_parameter(self.m) self.link_parameter(self.m)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
#update the kernel gradients #update the kernel gradients
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)

View file

@ -5,11 +5,8 @@ import numpy as np
from posterior import Posterior from posterior import Posterior
class SVGP(LatentFunctionInference): class SVGP(LatentFunctionInference):
def __init__(self, KL_scale=1., batch_scale=1.):
self.KL_scale = KL_scale
self.batch_scale = batch_scale
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, num_outputs = Y.shape num_data, num_outputs = Y.shape
@ -44,9 +41,6 @@ class SVGP(LatentFunctionInference):
dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
KL_scale = self.KL_scale
batch_scale = self.batch_scale
KL, dKL_dKmm, dKL_dS, dKL_dm = KL_scale*KL, KL_scale*dKL_dKmm, KL_scale*dKL_dS, KL_scale*dKL_dm
#quadrature for the likelihood #quadrature for the likelihood
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v) F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)

View file

@ -42,17 +42,29 @@ class Prod(CombinationKernel):
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if len(self.parts)==2:
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
else:
k = self.K(X,X2)*dL_dK k = self.K(X,X2)*dL_dK
for p in self.parts: for p in self.parts:
p.update_gradients_full(k/p.K(X,X2),X,X2) p.update_gradients_full(k/p.K(X,X2),X,X2)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
if len(self.parts)==2:
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else:
k = self.Kdiag(X)*dL_dKdiag k = self.Kdiag(X)*dL_dKdiag
for p in self.parts: for p in self.parts:
p.update_gradients_diag(k/p.Kdiag(X),X) p.update_gradients_diag(k/p.Kdiag(X),X)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
if len(self.parts)==2:
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
else:
k = self.K(X,X2)*dL_dK k = self.K(X,X2)*dL_dK
for p in self.parts: for p in self.parts:
target += p.gradients_X(k/p.K(X,X2),X,X2) target += p.gradients_X(k/p.K(X,X2),X,X2)
@ -60,6 +72,10 @@ class Prod(CombinationKernel):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
if len(self.parts)==2:
target += self.parts[0].gradients_X_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
target += self.parts[1].gradients_X_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else:
k = self.Kdiag(X)*dL_dKdiag k = self.Kdiag(X)*dL_dKdiag
for p in self.parts: for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X) target += p.gradients_X_diag(k/p.Kdiag(X),X)