mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 04:22:38 +02:00
[inference] minibatch inference needed polishing
This commit is contained in:
parent
61ef9912cb
commit
f46b23d16f
2 changed files with 38 additions and 23 deletions
|
|
@ -5,6 +5,10 @@ class StochasticStorage(object):
|
||||||
'''
|
'''
|
||||||
This is a container for holding the stochastic parameters,
|
This is a container for holding the stochastic parameters,
|
||||||
such as subset indices or step length and so on.
|
such as subset indices or step length and so on.
|
||||||
|
|
||||||
|
self.d has to be a list of lists:
|
||||||
|
[dimension indices, nan indices for those dimensions]
|
||||||
|
so that the minibatches can be used as efficiently as possible.10
|
||||||
'''
|
'''
|
||||||
def __init__(self, model):
|
def __init__(self, model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -28,9 +32,23 @@ class SparseGPMissing(StochasticStorage):
|
||||||
"""
|
"""
|
||||||
Here we want to loop over all dimensions everytime.
|
Here we want to loop over all dimensions everytime.
|
||||||
Thus, we can just make sure the loop goes over self.d every
|
Thus, we can just make sure the loop goes over self.d every
|
||||||
time.
|
time. We will try to get batches which look the same together
|
||||||
|
which speeds up calculations significantly.
|
||||||
"""
|
"""
|
||||||
self.d = range(model.Y_normalized.shape[1])
|
import numpy as np
|
||||||
|
self.Y = model.Y_normalized
|
||||||
|
bdict = {}
|
||||||
|
for d in range(self.Y.shape[1]):
|
||||||
|
inan = np.isnan(self.Y[:, d])
|
||||||
|
arr_str = np.array2string(inan,
|
||||||
|
np.inf, 0,
|
||||||
|
True, '',
|
||||||
|
formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
|
try:
|
||||||
|
bdict[arr_str][0].append(d)
|
||||||
|
except:
|
||||||
|
bdict[arr_str] = [[d], ~inan]
|
||||||
|
self.d = bdict.values()
|
||||||
|
|
||||||
class SparseGPStochastics(StochasticStorage):
|
class SparseGPStochastics(StochasticStorage):
|
||||||
"""
|
"""
|
||||||
|
|
@ -40,16 +58,29 @@ class SparseGPStochastics(StochasticStorage):
|
||||||
def __init__(self, model, batchsize=1):
|
def __init__(self, model, batchsize=1):
|
||||||
self.batchsize = batchsize
|
self.batchsize = batchsize
|
||||||
self.output_dim = model.Y.shape[1]
|
self.output_dim = model.Y.shape[1]
|
||||||
|
self.Y = model.Y_normalized
|
||||||
self.reset()
|
self.reset()
|
||||||
self.do_stochastics()
|
self.do_stochastics()
|
||||||
|
|
||||||
def do_stochastics(self):
|
def do_stochastics(self):
|
||||||
if self.batchsize == 1:
|
if self.batchsize == 1:
|
||||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||||
self.d = [self.current_dim]
|
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
|
||||||
else:
|
else:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||||
|
bdict = {}
|
||||||
|
for d in self.d:
|
||||||
|
inan = np.isnan(self.Y[:, d])
|
||||||
|
arr_str = np.array2string(inan,
|
||||||
|
np.inf, 0,
|
||||||
|
True, '',
|
||||||
|
formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
|
try:
|
||||||
|
bdict[arr_str][0].append(d)
|
||||||
|
except:
|
||||||
|
bdict[arr_str] = [[d], ~inan]
|
||||||
|
self.d = bdict.values()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.current_dim = -1
|
self.current_dim = -1
|
||||||
|
|
|
||||||
|
|
@ -63,33 +63,18 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
|
|
||||||
if stochastic and missing_data:
|
if stochastic and missing_data:
|
||||||
self.missing_data = True
|
self.missing_data = True
|
||||||
self.ninan = ~np.isnan(Y)
|
|
||||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
elif stochastic and not missing_data:
|
elif stochastic and not missing_data:
|
||||||
self.missing_data = False
|
self.missing_data = False
|
||||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
elif missing_data:
|
elif missing_data:
|
||||||
self.missing_data = True
|
self.missing_data = True
|
||||||
self.ninan = ~np.isnan(Y)
|
|
||||||
self.stochastics = SparseGPMissing(self)
|
self.stochastics = SparseGPMissing(self)
|
||||||
else:
|
else:
|
||||||
self.stochastics = False
|
self.stochastics = False
|
||||||
|
|
||||||
logger.info("Adding Z as parameter")
|
logger.info("Adding Z as parameter")
|
||||||
self.link_parameter(self.Z, index=0)
|
self.link_parameter(self.Z, index=0)
|
||||||
if self.missing_data:
|
|
||||||
self.Ylist = []
|
|
||||||
overall = self.Y_normalized.shape[1]
|
|
||||||
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
|
|
||||||
message = m_f(-1)
|
|
||||||
print(message, end=' ')
|
|
||||||
for d in range(overall):
|
|
||||||
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
|
|
||||||
print(' '*(len(message)+1) + '\r', end=' ')
|
|
||||||
message = m_f(d)
|
|
||||||
print(message, end=' ')
|
|
||||||
print('')
|
|
||||||
|
|
||||||
self.posterior = None
|
self.posterior = None
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
|
|
@ -245,8 +230,7 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
message = m_f(-1)
|
message = m_f(-1)
|
||||||
print(message, end=' ')
|
print(message, end=' ')
|
||||||
|
|
||||||
for d in self.stochastics.d:
|
for d, ninan in self.stochastics.d:
|
||||||
ninan = self.ninan[:, d]
|
|
||||||
|
|
||||||
if not self.stochastics:
|
if not self.stochastics:
|
||||||
print(' '*(len(message)) + '\r', end=' ')
|
print(' '*(len(message)) + '\r', end=' ')
|
||||||
|
|
@ -257,7 +241,7 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||||
self.kern, self.X[ninan],
|
self.kern, self.X[ninan],
|
||||||
self.Z, self.likelihood,
|
self.Z, self.likelihood,
|
||||||
self.Ylist[d], self.Y_metadata,
|
self.Y_normalized[ninan][:, d], self.Y_metadata,
|
||||||
Lm, dL_dKmm,
|
Lm, dL_dKmm,
|
||||||
subset_indices=dict(outputs=d, samples=ninan))
|
subset_indices=dict(outputs=d, samples=ninan))
|
||||||
|
|
||||||
|
|
@ -266,8 +250,8 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
|
|
||||||
Lm = posterior.K_chol
|
Lm = posterior.K_chol
|
||||||
dL_dKmm = grad_dict['dL_dKmm']
|
dL_dKmm = grad_dict['dL_dKmm']
|
||||||
woodbury_inv[:, :, d] = posterior.woodbury_inv
|
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||||
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
|
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||||
self._log_marginal_likelihood += log_marginal_likelihood
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
if not self.stochastics:
|
if not self.stochastics:
|
||||||
print('')
|
print('')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue