commiting code for some changes to api for calculating ep_gradients, also fixing some issues with gaussian hermite quadrature, no we have both avaialable ...

This commit is contained in:
Akash Kumar Dhaka 2017-07-03 18:24:02 +03:00
parent ae27e1225d
commit 8b621a409c
3 changed files with 37 additions and 54 deletions

View file

@ -179,14 +179,14 @@ class EP(EPBase, ExactGaussianInference):
elif self.ep_mode=="alternated": elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None: if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation #if we've already run EP, just use the existing approximation stored in self._ep_approximation
post_params, ga_approx, log_Z_tilde = self._ep_approximation post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
else: else:
raise ValueError("ep_mode value not valid") raise ValueError("ep_mode value not valid")
return self._inference(Y, K, ga_approx, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde) return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
def expectation_propagation(self, K, Y, likelihood, Y_metadata): def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -221,7 +221,7 @@ class EP(EPBase, ExactGaussianInference):
# This terms cancel with the coreresponding terms in the marginal loglikelihood # This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params) log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde) # - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
return (post_params, ga_approx, log_Z_tilde) return (post_params, ga_approx, cav_params, log_Z_tilde)
def _init_approximations(self, K, num_data): def _init_approximations(self, K, num_data):
#initial values - Gaussian factors #initial values - Gaussian factors
@ -281,7 +281,7 @@ class EP(EPBase, ExactGaussianInference):
return log_marginal, post_params return log_marginal, post_params
def _inference(self, Y, K, ga_approx, likelihood, Z_tilde, Y_metadata=None): def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde) log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
tau_tilde_root = np.sqrt(ga_approx.tau) tau_tilde_root = np.sqrt(ga_approx.tau)
@ -294,10 +294,7 @@ class EP(EPBase, ExactGaussianInference):
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1) symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi) dL_dK = 0.5 * (tdot(alpha) - Wi)
if not isinstance(likelihood, Gaussian): dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
dL_dthetaL = likelihood.ep_gradients(Y, ga_approx.tau, ga_approx.v, Y_metadata=Y_metadata)
else:
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha} return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

View file

@ -57,7 +57,7 @@ class Gaussian(Likelihood):
def update_gradients(self, grad): def update_gradients(self, grad):
self.variance.gradient = grad self.variance.gradient = grad
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag,Y_metadata=None):
return dL_dKdiag.sum() return dL_dKdiag.sum()
def _preprocess_values(self, Y): def _preprocess_values(self, Y):

View file

@ -227,10 +227,10 @@ class Likelihood(Parameterized):
self.__gh_points = np.polynomial.hermite.hermgauss(T) self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points return self.__gh_points
def ep_gradients(self, Y, tau, v, Y_metadata=None, gh_points=None, approx=False, boost_grad=1.): def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
if self.size > 0: if self.size > 0:
shape = Y.shape shape = Y.shape
tau,v,Y = tau.flatten(), v.flatten(),Y.flatten() tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
mu = v/tau mu = v/tau
sigma2 = 1./tau sigma2 = 1./tau
@ -245,14 +245,19 @@ class Likelihood(Parameterized):
Y_metadata_i[key] = Y_metadata[key][index,:] Y_metadata_i[key] = Y_metadata[key][index,:]
Y_metadata_list.append(Y_metadata_i) Y_metadata_list.append(Y_metadata_i)
val = self.site_derivatives_ep(Y[index], tau[index], v[index], Y_metadata_i) if quad_mode == 'gk':
dlik_dtheta[:, index] = val.ravel() f = partial(self.integrate_gk)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
f = partial(self.integrate) quads = np.vstack(quads)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()))) quads.reshape(self.size, shape[0], shape[1])
quads = np.vstack(quads) elif quad_mode == 'gh':
quads.reshape(self.size, shape[0], shape[1]) f = partial(self.integrate_gh)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
quads = np.hstack(quads)
quads = quads.T
else:
raise Exception("no other quadrature mode available")
# do a gaussian-hermite integration
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1) dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
dL_dtheta = boost_grad * np.nansum(quads, axis=1) dL_dtheta = boost_grad * np.nansum(quads, axis=1)
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1) # dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
@ -261,23 +266,23 @@ class Likelihood(Parameterized):
return dL_dtheta return dL_dtheta
def integrate(self, Y, mu, sigma): def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
# gaussian-kronrod integration.
fmin = -np.inf fmin = -np.inf
fmax = np.inf fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi) SQRT_2PI = np.sqrt(2.*np.pi)
def generate_integral(f): def generate_integral(f):
a = np.exp(self.logpdf_link(f, Y)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / ( a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
SQRT_2PI * sigma) SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(f, Y) fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
fn = fn1 fn = fn1
return fn return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax) dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
return dF_dtheta_i return dF_dtheta_i
def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
# gaussian-hermite quadrature.
def site_derivatives_ep(self,obs,tau,v,Y_metadata_i=None, approx=False, gh_points=None):
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter # "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
# and the expectation is over the exact marginal posterior, which is not gaussian- and is # and the expectation is over the exact marginal posterior, which is not gaussian- and is
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term. # unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
@ -288,42 +293,23 @@ class Likelihood(Parameterized):
# "writing it explicitly " # "writing it explicitly "
# use them for gaussian-hermite quadrature # use them for gaussian-hermite quadrature
mu = v/tau
sigma2 = 1./tau
sigma = np.sqrt(sigma2)
# yc = 1 - Y_metadata_i['censored']
fmin = -np.inf
fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi) SQRT_2PI = np.sqrt(2.*np.pi)
def get_integral_limits(obs, tau, v, yc):
# find the mode fi, fmin, and fmax - limit the range of integration to gather better weights
pass
if gh_points is None: if gh_points is None:
gh_x, gh_w = self._gh_points(32) gh_x, gh_w = self._gh_points(32)
else: else:
gh_x, gh_w = gh_points gh_x, gh_w = gh_points
X = gh_x[None,:]*np.sqrt(2.*sigma2) + mu X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
# X = gh_x[None,:]
logp = self.logpdf(X, obs, Y_metadata=Y_metadata_i) # Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
dlogp_dtheta = self.dlogpdf_dtheta(X, obs, Y_metadata=Y_metadata_i) a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
a = a.repeat(self.num_params,0)
b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
old_shape = b.shape
fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
fn = fn.reshape(old_shape)
dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
if not approx:
def generate_integral(x):
a = np.exp(self.logpdf_link(x, obs, Y_metadata=Y_metadata_i)) * np.exp(-0.5 * np.square((x - mu) / sigma)) / (
SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(x, obs, Y_metadata=Y_metadata_i)
fn = fn1
return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin,fmax=fmax)
return dF_dtheta_i
dF_dtheta_i = np.dot(dlogp_dtheta, gh_w)/np.sqrt(np.pi)
return dF_dtheta_i return dF_dtheta_i
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):