mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
[spgp minibatch] added new routine for psi NxMxM, much faster, little bigger mem footbprint
This commit is contained in:
parent
b73932c350
commit
cb024f0bf8
5 changed files with 101 additions and 146 deletions
|
|
@ -43,7 +43,7 @@ class SparseGPMissing(StochasticStorage):
|
||||||
np.set_printoptions(threshold='nan')
|
np.set_printoptions(threshold='nan')
|
||||||
for d in range(self.Y.shape[1]):
|
for d in range(self.Y.shape[1]):
|
||||||
inan = np.isnan(self.Y)[:, d]
|
inan = np.isnan(self.Y)[:, d]
|
||||||
arr_str = np.array2string(~inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
try:
|
try:
|
||||||
bdict[arr_str][0].append(d)
|
bdict[arr_str][0].append(d)
|
||||||
except:
|
except:
|
||||||
|
|
@ -56,35 +56,36 @@ class SparseGPStochastics(StochasticStorage):
|
||||||
For the sparse gp we need to store the dimension we are in,
|
For the sparse gp we need to store the dimension we are in,
|
||||||
and the indices corresponding to those
|
and the indices corresponding to those
|
||||||
"""
|
"""
|
||||||
def __init__(self, model, batchsize=1):
|
def __init__(self, model, batchsize=1, missing_data=True):
|
||||||
self.batchsize = batchsize
|
self.batchsize = batchsize
|
||||||
self.output_dim = model.Y.shape[1]
|
self.output_dim = model.Y.shape[1]
|
||||||
self.Y = model.Y_normalized
|
self.Y = model.Y_normalized
|
||||||
|
self.missing_data = missing_data
|
||||||
self.reset()
|
self.reset()
|
||||||
self.do_stochastics()
|
self.do_stochastics()
|
||||||
|
|
||||||
def do_stochastics(self):
|
def do_stochastics(self):
|
||||||
|
import numpy as np
|
||||||
if self.batchsize == 1:
|
if self.batchsize == 1:
|
||||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
|
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||||
else:
|
else:
|
||||||
import numpy as np
|
|
||||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||||
bdict = {}
|
bdict = {}
|
||||||
opt = np.get_printoptions()
|
if self.missing_data:
|
||||||
np.set_printoptions(threshold='nan')
|
opt = np.get_printoptions()
|
||||||
for d in self.d:
|
np.set_printoptions(threshold='nan')
|
||||||
inan = np.isnan(self.Y[:, d])
|
for d in self.d:
|
||||||
arr_str = int(np.array2string(inan,
|
inan = np.isnan(self.Y[:, d])
|
||||||
np.inf, 0,
|
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
True, '',
|
try:
|
||||||
formatter={'bool':lambda x: '1' if x else '0'}), 2)
|
bdict[arr_str][0].append(d)
|
||||||
try:
|
except:
|
||||||
bdict[arr_str][0].append(d)
|
bdict[arr_str] = [[d], ~inan]
|
||||||
except:
|
np.set_printoptions(**opt)
|
||||||
bdict[arr_str] = [[d], ~inan]
|
self.d = bdict.values()
|
||||||
np.set_printoptions(**opt)
|
else:
|
||||||
self.d = bdict.values()
|
self.d = [[self.d, None]]
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.current_dim = -1
|
self.current_dim = -1
|
||||||
|
|
|
||||||
|
|
@ -58,24 +58,10 @@ class Kern(Parameterized):
|
||||||
|
|
||||||
self._sliced_X = 0
|
self._sliced_X = 0
|
||||||
self.useGPU = self._support_GPU and useGPU
|
self.useGPU = self._support_GPU and useGPU
|
||||||
self._return_psi2_n_flag = ObsAr(np.zeros(1).astype(bool))
|
|
||||||
|
|
||||||
from .psi_comp import PSICOMP_GH
|
from .psi_comp import PSICOMP_GH
|
||||||
self.psicomp = PSICOMP_GH()
|
self.psicomp = PSICOMP_GH()
|
||||||
|
|
||||||
@property
|
|
||||||
def return_psi2_n(self):
|
|
||||||
"""
|
|
||||||
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
|
|
||||||
"""
|
|
||||||
return self._return_psi2_n_flag[0]
|
|
||||||
@return_psi2_n.setter
|
|
||||||
def return_psi2_n(self, val):
|
|
||||||
def visit(self):
|
|
||||||
if isinstance(self, Kern):
|
|
||||||
self._return_psi2_n_flag[0]=val
|
|
||||||
self.traverse(visit)
|
|
||||||
|
|
||||||
@Cache_this(limit=20)
|
@Cache_this(limit=20)
|
||||||
def _slice_X(self, X):
|
def _slice_X(self, X):
|
||||||
return X[:, self.active_dims]
|
return X[:, self.active_dims]
|
||||||
|
|
@ -97,7 +83,9 @@ class Kern(Parameterized):
|
||||||
def psi1(self, Z, variational_posterior):
|
def psi1(self, Z, variational_posterior):
|
||||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2]
|
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||||
|
def psi2n(self, Z, variational_posterior):
|
||||||
|
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,10 @@ class RBF(Stationary):
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=False)[2]
|
||||||
|
|
||||||
|
def psi2n(self, Z, variational_posterior):
|
||||||
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=True)[2]
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
|
|
|
||||||
|
|
@ -81,38 +81,9 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||||
return X.mean.gradient, X.variance.gradient
|
return X.mean.gradient, X.variance.gradient
|
||||||
|
|
||||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None,psi0=None, psi1=None, psi2=None, **kw):
|
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
|
||||||
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
||||||
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
||||||
kl_fctr = self.kl_factr
|
|
||||||
if self.has_uncertain_inputs():
|
|
||||||
if self.missing_data:
|
|
||||||
d = self.output_dim
|
|
||||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
|
||||||
else:
|
|
||||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
|
||||||
|
|
||||||
X.mean.gradient[:] = 0
|
|
||||||
X.variance.gradient[:] = 0
|
|
||||||
|
|
||||||
self.variational_prior.update_gradients_KL(X)
|
|
||||||
if self.missing_data:
|
|
||||||
grad_dict['meangrad'] = kl_fctr*X.mean.gradient/d
|
|
||||||
grad_dict['vargrad'] = kl_fctr*X.variance.gradient/d
|
|
||||||
else:
|
|
||||||
grad_dict['meangrad'] = kl_fctr*X.mean.gradient
|
|
||||||
grad_dict['vargrad'] = kl_fctr*X.variance.gradient
|
|
||||||
|
|
||||||
#if subset_indices is not None:
|
|
||||||
#value_indices['meangrad'] = subset_indices['samples']
|
|
||||||
#value_indices['vargrad'] = subset_indices['samples']
|
|
||||||
|
|
||||||
#grad_dict['meangrad'] = kl_fctr*self.X.mean.gradient
|
|
||||||
#grad_dict['vargrad'] = kl_fctr*self.X.variance.gradient
|
|
||||||
#self.X.mean.gradient[:] = 0
|
|
||||||
#self.X.variance.gradient[:] = 0
|
|
||||||
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
|
||||||
|
|
||||||
return posterior, log_marginal_likelihood, grad_dict
|
return posterior, log_marginal_likelihood, grad_dict
|
||||||
|
|
||||||
def _outer_values_update(self, full_values):
|
def _outer_values_update(self, full_values):
|
||||||
|
|
@ -127,36 +98,41 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||||
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
|
||||||
full_values['meangrad'] += meangrad_tmp
|
kl_fctr = self.kl_factr
|
||||||
full_values['vargrad'] += vargrad_tmp
|
|
||||||
else:
|
self.X.mean.gradient[:] = 0
|
||||||
full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
self.X.variance.gradient[:] = 0
|
||||||
full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
||||||
|
if self.missing_data or not self.stochastics:
|
||||||
|
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
|
||||||
|
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
|
||||||
|
else:
|
||||||
|
d = self.output_dim
|
||||||
|
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
|
||||||
|
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
|
||||||
|
self.X.mean.gradient += meangrad_tmp
|
||||||
|
self.X.variance.gradient += vargrad_tmp
|
||||||
|
|
||||||
if self.has_uncertain_inputs():
|
|
||||||
self.X.mean.gradient = full_values['meangrad']
|
|
||||||
self.X.variance.gradient = full_values['vargrad']
|
|
||||||
else:
|
else:
|
||||||
self.X.gradient = full_values['Xgrad']
|
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||||
|
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||||
|
|
||||||
def _outer_init_full_values(self):
|
def _outer_init_full_values(self):
|
||||||
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||||
full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
|
||||||
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
|
||||||
|
|
||||||
full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
|
|
||||||
full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
|
|
||||||
full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
|
||||||
|
|
||||||
full_values['Lpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
|
|
||||||
full_values['Lpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
|
|
||||||
full_values['Lpsi2'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])))
|
|
||||||
return full_values
|
return full_values
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
||||||
|
kl_fctr = self.kl_factr
|
||||||
|
if self.missing_data or not self.stochastics:
|
||||||
|
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||||
|
elif self.stochastics:
|
||||||
|
d = self.output_dim
|
||||||
|
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||||
|
|
||||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
|
|
||||||
if stochastic and missing_data:
|
if stochastic and missing_data:
|
||||||
self.missing_data = True
|
self.missing_data = True
|
||||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||||
elif stochastic and not missing_data:
|
elif stochastic and not missing_data:
|
||||||
self.missing_data = False
|
self.missing_data = False
|
||||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||||
elif missing_data:
|
elif missing_data:
|
||||||
self.missing_data = True
|
self.missing_data = True
|
||||||
self.stochastics = SparseGPMissing(self)
|
self.stochastics = SparseGPMissing(self)
|
||||||
|
|
@ -105,11 +105,6 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
psi2_sum_n = psi2.sum(axis=0)
|
psi2_sum_n = psi2.sum(axis=0)
|
||||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||||
|
|
||||||
if self.has_uncertain_inputs():
|
|
||||||
grad_dict['Lpsi0'] = grad_dict['dL_dpsi0']*psi0
|
|
||||||
grad_dict['Lpsi1'] = grad_dict['dL_dpsi1']*psi1
|
|
||||||
grad_dict['Lpsi2'] = grad_dict['dL_dpsi2']*psi2
|
|
||||||
return posterior, log_marginal_likelihood, grad_dict
|
return posterior, log_marginal_likelihood, grad_dict
|
||||||
|
|
||||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||||
|
|
@ -144,14 +139,10 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
else:
|
else:
|
||||||
index = slice(None)
|
index = slice(None)
|
||||||
if key in full_values:
|
if key in full_values:
|
||||||
if np.isscalar(current_values[key]):
|
try:
|
||||||
|
full_values[key][index] += current_values[key]
|
||||||
|
except:
|
||||||
full_values[key] += current_values[key]
|
full_values[key] += current_values[key]
|
||||||
else:
|
|
||||||
from ..core.parameterization.observable_array import ObsAr
|
|
||||||
if isinstance(full_values[key], ObsAr):
|
|
||||||
full_values[key].values[index] += current_values[key]
|
|
||||||
else:
|
|
||||||
full_values[key][index] += current_values[key]
|
|
||||||
else:
|
else:
|
||||||
full_values[key] = current_values[key]
|
full_values[key] = current_values[key]
|
||||||
|
|
||||||
|
|
@ -174,43 +165,39 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
dL_dKmm = full_values['dL_dKmm']
|
dL_dKmm = full_values['dL_dKmm']
|
||||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||||
full_values['kerngrad'] = self.kern.gradient.copy()
|
kgrad = self.kern.gradient.copy()
|
||||||
self.kern.update_gradients_expectations(
|
self.kern.update_gradients_expectations(
|
||||||
variational_posterior=self.X,
|
variational_posterior=self.X,
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||||
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
self.kern.gradient += kgrad
|
||||||
full_values['kerngrad'] += self.kern.gradient
|
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
full_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z)
|
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||||
full_values['Zgrad'] += self.kern.gradients_Z_expectations(
|
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||||
variational_posterior=self.X,
|
variational_posterior=self.X,
|
||||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||||
dL_dpsi1=full_values['dL_dpsi1'],
|
dL_dpsi1=full_values['dL_dpsi1'],
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||||
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
|
||||||
else:
|
else:
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||||
full_values['kerngrad'] = self.kern.gradient.copy()
|
kgrad = self.kern.gradient.copy()
|
||||||
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
||||||
full_values['kerngrad'] += self.kern.gradient
|
kgrad += self.kern.gradient
|
||||||
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
||||||
full_values['kerngrad'] += self.kern.gradient
|
self.kern.gradient += kgrad
|
||||||
|
#kgrad += self.kern.gradient
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
full_values['Zgrad'] = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
||||||
full_values['Zgrad'] += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
||||||
|
|
||||||
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
||||||
full_values['likgrad'] = self.likelihood.gradient.copy()
|
|
||||||
|
|
||||||
self.likelihood.gradient = full_values['likgrad']
|
|
||||||
self.kern.gradient = full_values['kerngrad']
|
|
||||||
self.Z.gradient = full_values['Zgrad']
|
|
||||||
|
|
||||||
def _outer_init_full_values(self):
|
def _outer_init_full_values(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -225,8 +212,15 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
to initialize the gradients for the mean and the variance in order to
|
to initialize the gradients for the mean and the variance in order to
|
||||||
have the full gradient for indexing)
|
have the full gradient for indexing)
|
||||||
"""
|
"""
|
||||||
return {'dL_dKdiag': np.zeros(self.X.shape[0]),
|
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||||
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))}
|
if self.has_uncertain_inputs():
|
||||||
|
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
|
||||||
|
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
|
||||||
|
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
|
||||||
|
else:
|
||||||
|
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
|
||||||
|
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
|
||||||
|
return retd
|
||||||
|
|
||||||
def _outer_loop_for_missing_data(self):
|
def _outer_loop_for_missing_data(self):
|
||||||
Lm = None
|
Lm = None
|
||||||
|
|
@ -247,35 +241,17 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
message = m_f(-1)
|
message = m_f(-1)
|
||||||
print(message, end=' ')
|
print(message, end=' ')
|
||||||
|
|
||||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
|
||||||
if self.has_uncertain_inputs():
|
|
||||||
self.kern.return_psi2_n = True
|
|
||||||
from ..core.parameterization.observable_array import ObsAr
|
|
||||||
psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
|
||||||
psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
|
||||||
psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
|
||||||
else:
|
|
||||||
psi0 = self.kern.Kdiag(self.X)
|
|
||||||
psi1 = self.kern.K(self.X, self.Z)
|
|
||||||
psi2 = None
|
|
||||||
|
|
||||||
self.psi0 = psi0
|
|
||||||
self.psi1 = psi1
|
|
||||||
self.psi2 = psi2
|
|
||||||
|
|
||||||
for d, ninan in self.stochastics.d:
|
for d, ninan in self.stochastics.d:
|
||||||
if not self.stochastics:
|
if not self.stochastics:
|
||||||
print(' '*(len(message)) + '\r', end=' ')
|
print(' '*(len(message)) + '\r', end=' ')
|
||||||
message = m_f(d)
|
message = m_f(d)
|
||||||
print(message, end=' ')
|
print(message, end=' ')
|
||||||
|
|
||||||
psi0ni = psi0[ninan]
|
psi0ni = self.psi0[ninan]
|
||||||
psi1ni = psi1[ninan]
|
psi1ni = self.psi1[ninan]
|
||||||
if self.has_uncertain_inputs():
|
if self.has_uncertain_inputs():
|
||||||
psi2ni = psi2[ninan]
|
psi2ni = self.psi2[ninan]
|
||||||
#value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan)
|
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
|
||||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan,
|
|
||||||
Lpsi0=ninan, Lpsi1=ninan, Lpsi2=ninan)
|
|
||||||
else:
|
else:
|
||||||
psi2ni = None
|
psi2ni = None
|
||||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||||
|
|
@ -290,7 +266,7 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
# Fill out the full values by adding in the apporpriate grad_dict
|
# Fill out the full values by adding in the apporpriate grad_dict
|
||||||
# values
|
# values
|
||||||
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
||||||
self._inner_values_update(grad_dict) # What is this for?
|
self._inner_values_update(grad_dict) # What is this for? -> MRD
|
||||||
|
|
||||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||||
|
|
@ -307,8 +283,6 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
self.kern.return_psi2_n = False
|
self.kern.return_psi2_n = False
|
||||||
|
|
||||||
def _outer_loop_without_missing_data(self):
|
def _outer_loop_without_missing_data(self):
|
||||||
self._log_marginal_likelihood = 0
|
|
||||||
|
|
||||||
if self.posterior is None:
|
if self.posterior is None:
|
||||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||||
|
|
@ -316,14 +290,14 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
woodbury_inv = self.posterior._woodbury_inv
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
woodbury_vector = self.posterior._woodbury_vector
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
d = self.stochastics.d
|
d = self.stochastics.d[0][0]
|
||||||
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
||||||
self.kern, self.X,
|
self.kern, self.X,
|
||||||
self.Z, self.likelihood,
|
self.Z, self.likelihood,
|
||||||
self.Y_normalized[:, d], self.Y_metadata)
|
self.Y_normalized[:, d], self.Y_metadata)
|
||||||
self.grad_dict = grad_dict
|
self.grad_dict = grad_dict
|
||||||
|
|
||||||
self._log_marginal_likelihood += log_marginal_likelihood
|
self._log_marginal_likelihood = log_marginal_likelihood
|
||||||
|
|
||||||
self._outer_values_update(self.grad_dict)
|
self._outer_values_update(self.grad_dict)
|
||||||
|
|
||||||
|
|
@ -334,6 +308,19 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
|
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||||
|
if self.has_uncertain_inputs():
|
||||||
|
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||||
|
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||||
|
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||||
|
self.psi0 = self.kern.psi0(self.Z, self.X)
|
||||||
|
self.psi1 = self.kern.psi1(self.Z, self.X)
|
||||||
|
self.psi2 = self.kern.psi2n(self.Z, self.X)
|
||||||
|
else:
|
||||||
|
self.psi0 = self.kern.Kdiag(self.X)
|
||||||
|
self.psi1 = self.kern.K(self.X, self.Z)
|
||||||
|
self.psi2 = None
|
||||||
|
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
self._outer_loop_for_missing_data()
|
self._outer_loop_for_missing_data()
|
||||||
elif self.stochastics:
|
elif self.stochastics:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue