[inference] minibatch inference needed polishing

This commit is contained in:
mzwiessele 2015-06-29 10:19:36 +02:00
parent 61ef9912cb
commit f46b23d16f
2 changed files with 38 additions and 23 deletions

View file

@ -5,6 +5,10 @@ class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10
'''
def __init__(self, model):
"""
@ -28,9 +32,23 @@ class SparseGPMissing(StochasticStorage):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time.
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
self.d = range(model.Y_normalized.shape[1])
import numpy as np
self.Y = model.Y_normalized
bdict = {}
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
@ -40,16 +58,29 @@ class SparseGPStochastics(StochasticStorage):
def __init__(self, model, batchsize=1):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.reset()
self.do_stochastics()
def do_stochastics(self):
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [self.current_dim]
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
def reset(self):
self.current_dim = -1

View file

@ -63,33 +63,18 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPStochastics(self, batchsize)
elif stochastic and not missing_data:
self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize)
elif missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPMissing(self)
else:
self.stochastics = False
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print(message, end=' ')
for d in range(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print(' '*(len(message)+1) + '\r', end=' ')
message = m_f(d)
print(message, end=' ')
print('')
self.posterior = None
def has_uncertain_inputs(self):
@ -245,8 +230,7 @@ class SparseGPMiniBatch(SparseGP):
message = m_f(-1)
print(message, end=' ')
for d in self.stochastics.d:
ninan = self.ninan[:, d]
for d, ninan in self.stochastics.d:
if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ')
@ -257,7 +241,7 @@ class SparseGPMiniBatch(SparseGP):
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
@ -266,8 +250,8 @@ class SparseGPMiniBatch(SparseGP):
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print('')