merge the current devel into psi2

This commit is contained in:
Zhenwen Dai 2014-08-11 18:01:23 +01:00
commit 785c580032
49 changed files with 1839 additions and 581 deletions

View file

@ -32,7 +32,7 @@ class EP(LatentFunctionInference):
pass
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
num_data, output_dim = X.shape
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
K = kern.K(X)

View file

@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference):
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_data, output_dim = X.shape
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z)

View file

@ -9,6 +9,8 @@ import numpy as np
from ...util.misc import param_to_array
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
import logging, itertools
logger = logging.getLogger('vardtc')
class VarDTC(LatentFunctionInference):
"""
@ -180,11 +182,12 @@ class VarDTC(LatentFunctionInference):
return post, log_marginal, grad_dict
class VarDTCMissingData(LatentFunctionInference):
const_jitter = 1e-6
const_jitter = 1e-10
def __init__(self, limit=1, inan=None):
from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, limit)
self._inan = inan
if inan is not None: self._inan = ~inan
else: self._inan = None
pass
def set_limit(self, limit):
@ -205,21 +208,35 @@ class VarDTCMissingData(LatentFunctionInference):
if self._inan is None:
inan = np.isnan(Y)
has_none = inan.any()
self._inan = ~inan
else:
inan = self._inan
has_none = True
if has_none:
from ...util.subarray_and_sorting import common_subarrays
self._subarray_indices = []
for v,ind in common_subarrays(inan, 1).iteritems():
if not np.all(v):
v = ~np.array(v, dtype=bool)
ind = np.array(ind, dtype=int)
if ind.size == Y.shape[1]:
ind = slice(None)
self._subarray_indices.append([v,ind])
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
traces = [(y**2).sum() for y in Ys]
#print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
#csa = common_subarrays(inan, 1)
size = Y.shape[1]
#logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
Ys = []
next_ten = [0.]
count = itertools.count()
for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
Ys.append(y[v,:])
next_ten = [0.]
count = itertools.count()
def trace(y):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
y = y[inan[:,i],i:i+1]
return np.einsum('ij,ij->', y,y)
traces = [trace(Y) for _ in xrange(size)]
return Ys, traces
else:
self._subarray_indices = [[slice(None),slice(None)]]
@ -241,7 +258,6 @@ class VarDTCMissingData(LatentFunctionInference):
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1
import itertools
num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(Y.shape[0])
@ -261,22 +277,17 @@ class VarDTCMissingData(LatentFunctionInference):
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm)
VVT_factor_all = np.empty(Y.shape)
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
if not full_VVT_factor:
psi1V = np.dot(Y.T*beta_all, psi1_all).T
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
if het_noise: beta = beta_all[ind]
size = Y.shape[1]
next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
next_ten += .1
if het_noise: beta = beta_all[i]
else: beta = beta_all
VVT_factor = (beta*y)
try:
VVT_factor_all[v, ind].flat = VVT_factor.flat
except ValueError:
mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
VVT_factor_all.flat[mult] = VVT_factor
output_dim = y.shape[1]
VVT_factor = (y*beta)
output_dim = 1#len(ind)
psi0 = psi0_all[v]
psi1 = psi1_all[v, :]
@ -318,7 +329,6 @@ class VarDTCMissingData(LatentFunctionInference):
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
#import ipdb;ipdb.set_trace()
dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs:
@ -335,19 +345,20 @@ class VarDTCMissingData(LatentFunctionInference):
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, Y)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
else:
print 'foobar'
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#if full_VVT_factor:
woodbury_vector[:, i:i+1] = Cpsi1Vf
#else:
# print 'foobar'
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB, tmp, lower=1)
# woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
@ -364,23 +375,6 @@ class VarDTCMissingData(LatentFunctionInference):
'dL_dKnm':dL_dpsi1_all,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
#if not full_VVT_factor:
# print 'foobar'
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
#import ipdb;ipdb.set_trace()
#Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
#woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict

View file

@ -1,2 +1,3 @@
from scg import SCG
from optimization import *
from hmc import HMC,HMC_shortcut

View file

@ -0,0 +1,157 @@
"""HMC implementation"""
import numpy as np
class HMC:
def __init__(self,model,M=None,stepsize=1e-1):
self.model = model
self.stepsize = stepsize
self.p = np.empty_like(model.optimizer_array.copy())
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
theta_old = self.model.optimizer_array.copy()
params[i] = self.model.unfixed_param_array
#Matropolis
self._update(hmc_iters)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters):
for i in xrange(hmc_iters):
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
class HMC_shortcut:
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
self.model = model
self.stepsize_range = np.log(stepsize_range)
self.p = np.empty_like(model.optimizer_array.copy())
self.groupsize = groupsize
self.Hstd_th = Hstd_th
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
# sample a stepsize from the uniform distribution
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
params[i] = self.model.unfixed_param_array
theta_old = self.model.optimizer_array.copy()
#Matropolis
self._update(hmc_iters, stepsize)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters, stepsize):
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
p_buf = np.empty((2*hmc_iters+1,self.p.size))
H_buf = np.empty((2*hmc_iters+1,))
# Set initial position
theta_buf[hmc_iters] = self.model.optimizer_array
p_buf[hmc_iters] = self.p
H_buf[hmc_iters] = self._computeH()
reversal = []
pos = 1
i=0
while i<hmc_iters:
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
theta_buf[hmc_iters+pos] = self.model.optimizer_array
p_buf[hmc_iters+pos] = self.p
H_buf[hmc_iters+pos] = self._computeH()
i+=1
if i<self.groupsize:
pos += 1
continue
else:
if len(reversal)==0:
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
if self._testH(H_buf[Hlist]):
pos += 1
else:
# Reverse the trajectory for the 1st time
reversal.append(pos)
if hmc_iters-i>pos:
pos = -1
i += pos
self.model.optimizer_array = theta_buf[hmc_iters]
self.p[:] = -p_buf[hmc_iters]
else:
pos_new = pos-hmc_iters+i
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = -p_buf[hmc_iters+pos_new]
break
else:
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
# print Hlist
# print self._testH(H_buf[Hlist])
if self._testH(H_buf[Hlist]):
pos += -1
else:
# Reverse the trajectory for the 2nd time
r = (hmc_iters - i)%((reversal[0]-pos)*2)
if r>(reversal[0]-pos):
pos_new = 2*reversal[0] - r - pos
else:
pos_new = pos + r
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
# print reversal[0],pos,pos_new
# print H_buf
break
def _testH(self, Hlist):
Hstd = np.std(Hlist)
# print Hlist
# print Hstd
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
return False
else:
return True
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.

View file

@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-8
sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
#if any(np.isnan(gradnew)):
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.5 * beta, betamin)
beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
# beta = 1. # TODO: betareset!!
beta = 1. # This is not in the original paper
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)