mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 03:22:38 +02:00
reconfigured svgp inference a little
This commit is contained in:
parent
393b9e94ba
commit
482bd1472c
3 changed files with 32 additions and 22 deletions
|
|
@ -36,12 +36,12 @@ class SVGP(SparseGP):
|
||||||
KL_scale = 1.0
|
KL_scale = 1.0
|
||||||
|
|
||||||
import climin.util
|
import climin.util
|
||||||
#Make a climin slicer to make drawing minibatches much quicker
|
#Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle.
|
||||||
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
|
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
|
||||||
X_batch, Y_batch = self.new_batch()
|
X_batch, Y_batch = self.new_batch()
|
||||||
|
|
||||||
#create the SVI inference method
|
#create the SVI inference method
|
||||||
inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale)
|
inf_method = svgp_inf()
|
||||||
|
|
||||||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
|
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
|
||||||
name=name, Y_metadata=Y_metadata, normalizer=False)
|
name=name, Y_metadata=Y_metadata, normalizer=False)
|
||||||
|
|
@ -53,7 +53,7 @@ class SVGP(SparseGP):
|
||||||
self.link_parameter(self.m)
|
self.link_parameter(self.m)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
|
||||||
|
|
||||||
#update the kernel gradients
|
#update the kernel gradients
|
||||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
|
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,8 @@ import numpy as np
|
||||||
from posterior import Posterior
|
from posterior import Posterior
|
||||||
|
|
||||||
class SVGP(LatentFunctionInference):
|
class SVGP(LatentFunctionInference):
|
||||||
def __init__(self, KL_scale=1., batch_scale=1.):
|
|
||||||
self.KL_scale = KL_scale
|
|
||||||
self.batch_scale = batch_scale
|
|
||||||
|
|
||||||
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None):
|
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
|
||||||
num_inducing = Z.shape[0]
|
num_inducing = Z.shape[0]
|
||||||
num_data, num_outputs = Y.shape
|
num_data, num_outputs = Y.shape
|
||||||
|
|
||||||
|
|
@ -44,9 +41,6 @@ class SVGP(LatentFunctionInference):
|
||||||
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
|
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
|
||||||
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
||||||
|
|
||||||
KL_scale = self.KL_scale
|
|
||||||
batch_scale = self.batch_scale
|
|
||||||
KL, dKL_dKmm, dKL_dS, dKL_dm = KL_scale*KL, KL_scale*dKL_dKmm, KL_scale*dKL_dS, KL_scale*dKL_dm
|
|
||||||
|
|
||||||
#quadrature for the likelihood
|
#quadrature for the likelihood
|
||||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)
|
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)
|
||||||
|
|
|
||||||
|
|
@ -42,17 +42,29 @@ class Prod(CombinationKernel):
|
||||||
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
if len(self.parts)==2:
|
||||||
|
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
|
||||||
|
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
|
||||||
|
else:
|
||||||
k = self.K(X,X2)*dL_dK
|
k = self.K(X,X2)*dL_dK
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
p.update_gradients_full(k/p.K(X,X2),X,X2)
|
p.update_gradients_full(k/p.K(X,X2),X,X2)
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
if len(self.parts)==2:
|
||||||
|
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
|
||||||
|
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
|
||||||
|
else:
|
||||||
k = self.Kdiag(X)*dL_dKdiag
|
k = self.Kdiag(X)*dL_dKdiag
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
p.update_gradients_diag(k/p.Kdiag(X),X)
|
p.update_gradients_diag(k/p.Kdiag(X),X)
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
|
if len(self.parts)==2:
|
||||||
|
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
|
||||||
|
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
|
||||||
|
else:
|
||||||
k = self.K(X,X2)*dL_dK
|
k = self.K(X,X2)*dL_dK
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
target += p.gradients_X(k/p.K(X,X2),X,X2)
|
target += p.gradients_X(k/p.K(X,X2),X,X2)
|
||||||
|
|
@ -60,6 +72,10 @@ class Prod(CombinationKernel):
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
|
if len(self.parts)==2:
|
||||||
|
target += self.parts[0].gradients_X_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
|
||||||
|
target += self.parts[1].gradients_X_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
|
||||||
|
else:
|
||||||
k = self.Kdiag(X)*dL_dKdiag
|
k = self.Kdiag(X)*dL_dKdiag
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
target += p.gradients_X_diag(k/p.Kdiag(X),X)
|
target += p.gradients_X_diag(k/p.Kdiag(X),X)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue