var dtc missing] memory efficiency greatly improved

This commit is contained in:
mzwiessele 2014-07-02 09:17:04 -07:00
parent 89f7f2dc73
commit eb9fb180fb
2 changed files with 14 additions and 9 deletions

View file

@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9 D1, D2, D3, N, num_inducing, Q = 6, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
Y[inan] = _np.nan Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k) inference_method=VarDTCMissingData(inan=inan), kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)

View file

@ -237,10 +237,13 @@ class VarDTCMissingData(LatentFunctionInference):
ind = slice(None) ind = slice(None)
self._subarray_indices.append([v,ind]) self._subarray_indices.append([v,ind])
logger.info('preparing subarrays Y') logger.info('preparing subarrays Y')
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] #Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
logger.info('preparing traces Y') logger.info('preparing traces Y')
traces = [np.einsum('ij,ij->', y,y) for y in Ys] def trace(y, v, ind):
return Ys, traces y = y[v,:][:,ind]
return np.einsum('ij,ij->', y,y)
traces = [trace(Y, v, ind) for v, ind in self._subarray_indices]
return traces
else: else:
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()] return [Y], [(Y**2).sum()]
@ -257,7 +260,7 @@ class VarDTCMissingData(LatentFunctionInference):
psi1_all = kern.K(X, Z) psi1_all = kern.K(X, Z)
psi2_all = None psi2_all = None
Ys, traces = self._Y(Y) traces = self._Y(Y)
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
@ -288,7 +291,8 @@ class VarDTCMissingData(LatentFunctionInference):
#logger.info('computing dimension-wise likelihood and derivatives') #logger.info('computing dimension-wise likelihood and derivatives')
#size = len(Ys) #size = len(Ys)
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): for trYYT, [v, ind] in itertools.izip(traces, self._subarray_indices):
y = Y[v,:][:,ind]
#logger.info('{:.3%} dimensions:{}'.format((i+1.)/size, ind)) #logger.info('{:.3%} dimensions:{}'.format((i+1.)/size, ind))
if het_noise: beta = beta_all[ind] if het_noise: beta = beta_all[ind]
else: beta = beta_all else: beta = beta_all