mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-04 17:22:39 +02:00
Try calculating dL_dpsi1*psi1 individually for each dimension as we go along
This commit is contained in:
parent
c83f56723e
commit
3818aa3745
7 changed files with 47 additions and 31 deletions
|
|
@ -117,7 +117,7 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
"""
|
"""
|
||||||
Set the gradients of all parameters when doing inference with
|
Set the gradients of all parameters when doing inference with
|
||||||
uncertain inputs, using expectations of the kernel.
|
uncertain inputs, using expectations of the kernel.
|
||||||
|
|
@ -129,26 +129,26 @@ class Kern(Parameterized):
|
||||||
dL_dpsi2 * dpsi2_d{theta_i}
|
dL_dpsi2 * dpsi2_d{theta_i}
|
||||||
"""
|
"""
|
||||||
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2)[0]
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[0]
|
||||||
self.gradient[:] = dtheta
|
self.gradient[:] = dtheta
|
||||||
|
|
||||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
"""
|
"""
|
||||||
Returns the derivative of the objective wrt Z, using the chain rule
|
Returns the derivative of the objective wrt Z, using the chain rule
|
||||||
through the expectation variables.
|
through the expectation variables.
|
||||||
"""
|
"""
|
||||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2)[1]
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[1]
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
"""
|
"""
|
||||||
Compute the gradients wrt the parameters of the variational
|
Compute the gradients wrt the parameters of the variational
|
||||||
distruibution q(X), chain-ruling via the expectations of the kernel
|
distruibution q(X), chain-ruling via the expectations of the kernel
|
||||||
"""
|
"""
|
||||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2)[2:]
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2:]
|
||||||
|
|
||||||
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
|
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -117,30 +117,30 @@ def _slice_psi(f):
|
||||||
def _slice_update_gradients_expectations(f):
|
def _slice_update_gradients_expectations(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||||
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2)
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
def _slice_gradients_Z_expectations(f):
|
def _slice_gradients_Z_expectations(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||||
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2))
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
def _slice_gradients_qX_expectations(f):
|
def _slice_gradients_qX_expectations(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
with _Slice_wrap(self, variational_posterior, Z) as s:
|
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X,
|
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2))
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
|
||||||
r2 = ret[:2]
|
r2 = ret[:2]
|
||||||
ret[0] = s.handle_return_array(r2[0])
|
ret[0] = s.handle_return_array(r2[0])
|
||||||
ret[1] = s.handle_return_array(r2[1])
|
ret[1] = s.handle_return_array(r2[1])
|
||||||
|
|
|
||||||
|
|
@ -24,10 +24,10 @@ class PSICOMP_RBF(Pickleable):
|
||||||
|
|
||||||
@Cache_this(limit=10, ignore_args=(0,1,2,3))
|
@Cache_this(limit=10, ignore_args=(0,1,2,3))
|
||||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||||
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2)
|
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
|
||||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -69,11 +69,11 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
|
||||||
return _psi2
|
return _psi2
|
||||||
|
|
||||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
ARD = (len(lengthscale)!=1)
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
||||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1)
|
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1, Lpsi1=Lpsi1)
|
||||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2)
|
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2, Lpsi2=Lpsi2)
|
||||||
|
|
||||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||||
|
|
||||||
|
|
@ -87,7 +87,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
|
||||||
|
|
||||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||||
|
|
||||||
def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None):
|
def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=None):
|
||||||
"""
|
"""
|
||||||
dL_dpsi1 - NxM
|
dL_dpsi1 - NxM
|
||||||
Z - MxQ
|
Z - MxQ
|
||||||
|
|
@ -108,7 +108,8 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None):
|
||||||
|
|
||||||
if psi1 is None:
|
if psi1 is None:
|
||||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
Lpsi1 = dL_dpsi1*psi1
|
if Lpsi1 is None:
|
||||||
|
Lpsi1 = dL_dpsi1*psi1
|
||||||
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
||||||
denom = 1./(S+lengthscale2)
|
denom = 1./(S+lengthscale2)
|
||||||
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
||||||
|
|
@ -120,7 +121,7 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None):
|
||||||
|
|
||||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None):
|
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
|
||||||
"""
|
"""
|
||||||
Z - MxQ
|
Z - MxQ
|
||||||
mu - NxQ
|
mu - NxQ
|
||||||
|
|
@ -143,7 +144,8 @@ def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None):
|
||||||
|
|
||||||
if psi2 is None:
|
if psi2 is None:
|
||||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||||
Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
|
if Lpsi2 is None:
|
||||||
|
Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
|
||||||
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
||||||
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
||||||
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
|
|
||||||
|
|
@ -59,16 +59,16 @@ class RBF(Stationary):
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2]
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[:2]
|
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[:2]
|
||||||
self.variance.gradient = dL_dvar
|
self.variance.gradient = dL_dvar
|
||||||
self.lengthscale.gradient = dL_dlengscale
|
self.lengthscale.gradient = dL_dlengscale
|
||||||
|
|
||||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[2]
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2]
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[3:]
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[3:]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
||||||
|
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
||||||
full_values['meangrad'] += meangrad_tmp
|
full_values['meangrad'] += meangrad_tmp
|
||||||
full_values['vargrad'] += vargrad_tmp
|
full_values['vargrad'] += vargrad_tmp
|
||||||
else:
|
else:
|
||||||
|
|
@ -156,6 +157,11 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
||||||
full_values['dL_dpsi0'] = np.zeros(self.X.shape[0])
|
full_values['dL_dpsi0'] = np.zeros(self.X.shape[0])
|
||||||
full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
|
full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
|
||||||
|
full_values['dL_dpsi2'] = np.zeros((self.Z.shape[0], self.Z.shape[0]))
|
||||||
|
|
||||||
|
full_values['Lpsi0'] = np.zeros(self.X.shape[0])
|
||||||
|
full_values['Lpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
|
||||||
|
full_values['Lpsi2'] = np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))
|
||||||
return full_values
|
return full_values
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,10 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||||
|
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
grad_dict['Lpsi0'] = grad_dict['dL_dpsi0']*psi0
|
||||||
|
grad_dict['Lpsi1'] = grad_dict['dL_dpsi1']*psi1
|
||||||
|
grad_dict['Lpsi2'] = grad_dict['dL_dpsi2']*psi2
|
||||||
return posterior, log_marginal_likelihood, grad_dict
|
return posterior, log_marginal_likelihood, grad_dict
|
||||||
|
|
||||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||||
|
|
@ -172,7 +176,8 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
||||||
|
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
||||||
#self.kern.update_gradients_expectations(variational_posterior=self.X,
|
#self.kern.update_gradients_expectations(variational_posterior=self.X,
|
||||||
#Z=self.Z,
|
#Z=self.Z,
|
||||||
#dL_dpsi0=full_values['dL_dpsi0'],
|
#dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
|
|
@ -187,7 +192,8 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
||||||
|
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
||||||
else:
|
else:
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||||
|
|
@ -267,7 +273,9 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
psi1ni = psi1[ninan]
|
psi1ni = psi1[ninan]
|
||||||
if self.has_uncertain_inputs():
|
if self.has_uncertain_inputs():
|
||||||
psi2ni = psi2[ninan]
|
psi2ni = psi2[ninan]
|
||||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan)
|
#value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan)
|
||||||
|
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan,
|
||||||
|
Lpsi0=ninan, Lpsi1=ninan, Lpsi2=ninan)
|
||||||
else:
|
else:
|
||||||
psi2ni = None
|
psi2ni = None
|
||||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue