mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-11 04:52:37 +02:00
commiting code for some changes to api for calculating ep_gradients, also fixing some issues with gaussian hermite quadrature, no we have both avaialable ...
This commit is contained in:
parent
ae27e1225d
commit
8b621a409c
3 changed files with 37 additions and 54 deletions
|
|
@ -179,14 +179,14 @@ class EP(EPBase, ExactGaussianInference):
|
||||||
elif self.ep_mode=="alternated":
|
elif self.ep_mode=="alternated":
|
||||||
if getattr(self, '_ep_approximation', None) is None:
|
if getattr(self, '_ep_approximation', None) is None:
|
||||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||||
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||||
else:
|
else:
|
||||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||||
post_params, ga_approx, log_Z_tilde = self._ep_approximation
|
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
|
||||||
else:
|
else:
|
||||||
raise ValueError("ep_mode value not valid")
|
raise ValueError("ep_mode value not valid")
|
||||||
|
|
||||||
return self._inference(Y, K, ga_approx, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
|
return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
|
||||||
|
|
||||||
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
|
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
|
||||||
|
|
||||||
|
|
@ -221,7 +221,7 @@ class EP(EPBase, ExactGaussianInference):
|
||||||
# This terms cancel with the coreresponding terms in the marginal loglikelihood
|
# This terms cancel with the coreresponding terms in the marginal loglikelihood
|
||||||
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
|
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
|
||||||
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
|
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
|
||||||
return (post_params, ga_approx, log_Z_tilde)
|
return (post_params, ga_approx, cav_params, log_Z_tilde)
|
||||||
|
|
||||||
def _init_approximations(self, K, num_data):
|
def _init_approximations(self, K, num_data):
|
||||||
#initial values - Gaussian factors
|
#initial values - Gaussian factors
|
||||||
|
|
@ -281,7 +281,7 @@ class EP(EPBase, ExactGaussianInference):
|
||||||
|
|
||||||
return log_marginal, post_params
|
return log_marginal, post_params
|
||||||
|
|
||||||
def _inference(self, Y, K, ga_approx, likelihood, Z_tilde, Y_metadata=None):
|
def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
|
||||||
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
|
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
|
||||||
|
|
||||||
tau_tilde_root = np.sqrt(ga_approx.tau)
|
tau_tilde_root = np.sqrt(ga_approx.tau)
|
||||||
|
|
@ -294,10 +294,7 @@ class EP(EPBase, ExactGaussianInference):
|
||||||
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
|
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
|
||||||
|
|
||||||
dL_dK = 0.5 * (tdot(alpha) - Wi)
|
dL_dK = 0.5 * (tdot(alpha) - Wi)
|
||||||
if not isinstance(likelihood, Gaussian):
|
dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
|
||||||
dL_dthetaL = likelihood.ep_gradients(Y, ga_approx.tau, ga_approx.v, Y_metadata=Y_metadata)
|
|
||||||
else:
|
|
||||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
|
|
||||||
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ class Gaussian(Likelihood):
|
||||||
def update_gradients(self, grad):
|
def update_gradients(self, grad):
|
||||||
self.variance.gradient = grad
|
self.variance.gradient = grad
|
||||||
|
|
||||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag,Y_metadata=None):
|
||||||
return dL_dKdiag.sum()
|
return dL_dKdiag.sum()
|
||||||
|
|
||||||
def _preprocess_values(self, Y):
|
def _preprocess_values(self, Y):
|
||||||
|
|
|
||||||
|
|
@ -227,10 +227,10 @@ class Likelihood(Parameterized):
|
||||||
self.__gh_points = np.polynomial.hermite.hermgauss(T)
|
self.__gh_points = np.polynomial.hermite.hermgauss(T)
|
||||||
return self.__gh_points
|
return self.__gh_points
|
||||||
|
|
||||||
def ep_gradients(self, Y, tau, v, Y_metadata=None, gh_points=None, approx=False, boost_grad=1.):
|
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
shape = Y.shape
|
shape = Y.shape
|
||||||
tau,v,Y = tau.flatten(), v.flatten(),Y.flatten()
|
tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
|
||||||
mu = v/tau
|
mu = v/tau
|
||||||
sigma2 = 1./tau
|
sigma2 = 1./tau
|
||||||
|
|
||||||
|
|
@ -245,14 +245,19 @@ class Likelihood(Parameterized):
|
||||||
Y_metadata_i[key] = Y_metadata[key][index,:]
|
Y_metadata_i[key] = Y_metadata[key][index,:]
|
||||||
Y_metadata_list.append(Y_metadata_i)
|
Y_metadata_list.append(Y_metadata_i)
|
||||||
|
|
||||||
val = self.site_derivatives_ep(Y[index], tau[index], v[index], Y_metadata_i)
|
if quad_mode == 'gk':
|
||||||
dlik_dtheta[:, index] = val.ravel()
|
f = partial(self.integrate_gk)
|
||||||
|
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
|
||||||
f = partial(self.integrate)
|
quads = np.vstack(quads)
|
||||||
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
|
quads.reshape(self.size, shape[0], shape[1])
|
||||||
quads = np.vstack(quads)
|
elif quad_mode == 'gh':
|
||||||
quads.reshape(self.size, shape[0], shape[1])
|
f = partial(self.integrate_gh)
|
||||||
|
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
|
||||||
|
quads = np.hstack(quads)
|
||||||
|
quads = quads.T
|
||||||
|
else:
|
||||||
|
raise Exception("no other quadrature mode available")
|
||||||
|
# do a gaussian-hermite integration
|
||||||
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
|
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
|
||||||
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
|
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
|
||||||
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
|
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
|
||||||
|
|
@ -261,23 +266,23 @@ class Likelihood(Parameterized):
|
||||||
return dL_dtheta
|
return dL_dtheta
|
||||||
|
|
||||||
|
|
||||||
def integrate(self, Y, mu, sigma):
|
def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
|
||||||
|
# gaussian-kronrod integration.
|
||||||
fmin = -np.inf
|
fmin = -np.inf
|
||||||
fmax = np.inf
|
fmax = np.inf
|
||||||
SQRT_2PI = np.sqrt(2.*np.pi)
|
SQRT_2PI = np.sqrt(2.*np.pi)
|
||||||
def generate_integral(f):
|
def generate_integral(f):
|
||||||
a = np.exp(self.logpdf_link(f, Y)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
|
a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
|
||||||
SQRT_2PI * sigma)
|
SQRT_2PI * sigma)
|
||||||
fn1 = a * self.dlogpdf_dtheta(f, Y)
|
fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
|
||||||
fn = fn1
|
fn = fn1
|
||||||
return fn
|
return fn
|
||||||
|
|
||||||
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
|
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
|
||||||
return dF_dtheta_i
|
return dF_dtheta_i
|
||||||
|
|
||||||
|
def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
|
||||||
|
# gaussian-hermite quadrature.
|
||||||
def site_derivatives_ep(self,obs,tau,v,Y_metadata_i=None, approx=False, gh_points=None):
|
|
||||||
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
|
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
|
||||||
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
|
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
|
||||||
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
|
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
|
||||||
|
|
@ -288,42 +293,23 @@ class Likelihood(Parameterized):
|
||||||
# "writing it explicitly "
|
# "writing it explicitly "
|
||||||
# use them for gaussian-hermite quadrature
|
# use them for gaussian-hermite quadrature
|
||||||
|
|
||||||
mu = v/tau
|
|
||||||
sigma2 = 1./tau
|
|
||||||
sigma = np.sqrt(sigma2)
|
|
||||||
|
|
||||||
# yc = 1 - Y_metadata_i['censored']
|
|
||||||
fmin = -np.inf
|
|
||||||
fmax = np.inf
|
|
||||||
SQRT_2PI = np.sqrt(2.*np.pi)
|
SQRT_2PI = np.sqrt(2.*np.pi)
|
||||||
|
|
||||||
def get_integral_limits(obs, tau, v, yc):
|
|
||||||
# find the mode fi, fmin, and fmax - limit the range of integration to gather better weights
|
|
||||||
pass
|
|
||||||
|
|
||||||
if gh_points is None:
|
if gh_points is None:
|
||||||
gh_x, gh_w = self._gh_points(32)
|
gh_x, gh_w = self._gh_points(32)
|
||||||
else:
|
else:
|
||||||
gh_x, gh_w = gh_points
|
gh_x, gh_w = gh_points
|
||||||
|
|
||||||
X = gh_x[None,:]*np.sqrt(2.*sigma2) + mu
|
X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
|
||||||
# X = gh_x[None,:]
|
|
||||||
|
|
||||||
logp = self.logpdf(X, obs, Y_metadata=Y_metadata_i)
|
# Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
|
||||||
dlogp_dtheta = self.dlogpdf_dtheta(X, obs, Y_metadata=Y_metadata_i)
|
a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
|
||||||
|
a = a.repeat(self.num_params,0)
|
||||||
|
b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
|
||||||
|
old_shape = b.shape
|
||||||
|
fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
|
||||||
|
fn = fn.reshape(old_shape)
|
||||||
|
|
||||||
|
dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
|
||||||
if not approx:
|
|
||||||
def generate_integral(x):
|
|
||||||
a = np.exp(self.logpdf_link(x, obs, Y_metadata=Y_metadata_i)) * np.exp(-0.5 * np.square((x - mu) / sigma)) / (
|
|
||||||
SQRT_2PI * sigma)
|
|
||||||
fn1 = a * self.dlogpdf_dtheta(x, obs, Y_metadata=Y_metadata_i)
|
|
||||||
fn = fn1
|
|
||||||
return fn
|
|
||||||
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin,fmax=fmax)
|
|
||||||
return dF_dtheta_i
|
|
||||||
|
|
||||||
dF_dtheta_i = np.dot(dlogp_dtheta, gh_w)/np.sqrt(np.pi)
|
|
||||||
return dF_dtheta_i
|
return dF_dtheta_i
|
||||||
|
|
||||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue