[inference] minibatch inference needed polishing

This commit is contained in:
mzwiessele 2015-06-29 10:19:36 +02:00
parent 61ef9912cb
commit f46b23d16f
2 changed files with 38 additions and 23 deletions

View file

@ -5,6 +5,10 @@ class StochasticStorage(object):
''' '''
This is a container for holding the stochastic parameters, This is a container for holding the stochastic parameters,
such as subset indices or step length and so on. such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10
''' '''
def __init__(self, model): def __init__(self, model):
""" """
@ -28,9 +32,23 @@ class SparseGPMissing(StochasticStorage):
""" """
Here we want to loop over all dimensions everytime. Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every Thus, we can just make sure the loop goes over self.d every
time. time. We will try to get batches which look the same together
which speeds up calculations significantly.
""" """
self.d = range(model.Y_normalized.shape[1]) import numpy as np
self.Y = model.Y_normalized
bdict = {}
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage): class SparseGPStochastics(StochasticStorage):
""" """
@ -40,16 +58,29 @@ class SparseGPStochastics(StochasticStorage):
def __init__(self, model, batchsize=1): def __init__(self, model, batchsize=1):
self.batchsize = batchsize self.batchsize = batchsize
self.output_dim = model.Y.shape[1] self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.reset() self.reset()
self.do_stochastics() self.do_stochastics()
def do_stochastics(self): def do_stochastics(self):
if self.batchsize == 1: if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [self.current_dim] self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
else: else:
import numpy as np import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
def reset(self): def reset(self):
self.current_dim = -1 self.current_dim = -1

View file

@ -63,33 +63,18 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data: if stochastic and missing_data:
self.missing_data = True self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize)
elif stochastic and not missing_data: elif stochastic and not missing_data:
self.missing_data = False self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize)
elif missing_data: elif missing_data:
self.missing_data = True self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPMissing(self) self.stochastics = SparseGPMissing(self)
else: else:
self.stochastics = False self.stochastics = False
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print(message, end=' ')
for d in range(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print(' '*(len(message)+1) + '\r', end=' ')
message = m_f(d)
print(message, end=' ')
print('')
self.posterior = None self.posterior = None
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
@ -245,8 +230,7 @@ class SparseGPMiniBatch(SparseGP):
message = m_f(-1) message = m_f(-1)
print(message, end=' ') print(message, end=' ')
for d in self.stochastics.d: for d, ninan in self.stochastics.d:
ninan = self.ninan[:, d]
if not self.stochastics: if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ') print(' '*(len(message)) + '\r', end=' ')
@ -257,7 +241,7 @@ class SparseGPMiniBatch(SparseGP):
grad_dict, current_values, value_indices = self._inner_parameters_changed( grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan], self.kern, self.X[ninan],
self.Z, self.likelihood, self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata, self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm, Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan)) subset_indices=dict(outputs=d, samples=ninan))
@ -266,8 +250,8 @@ class SparseGPMiniBatch(SparseGP):
Lm = posterior.K_chol Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm'] dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d:d+1] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics: if not self.stochastics:
print('') print('')