Merge pull request #542 from SheffieldML/EP_refactor

Merge in the EP refactoring
This commit is contained in:
Zhenwen Dai 2017-09-06 10:08:08 +01:00 committed by GitHub
commit 0fb273a41c
9 changed files with 721 additions and 202 deletions

View file

@ -6,9 +6,111 @@ from paramz import ObsAr
from . import ExactGaussianInference, VarDTC
from ...util import diag
from .posterior import PosteriorEP as Posterior
from ...likelihoods import Gaussian
log_2_pi = np.log(2*np.pi)
#Four wrapper classes to help modularisation of different EP versions
class marginalMoments(object):
def __init__(self, num_data):
self.Z_hat = np.empty(num_data,dtype=np.float64)
self.mu_hat = np.empty(num_data,dtype=np.float64)
self.sigma2_hat = np.empty(num_data,dtype=np.float64)
class cavityParams(object):
def __init__(self, num_data):
self.tau = np.empty(num_data,dtype=np.float64)
self.v = np.empty(num_data,dtype=np.float64)
def _update_i(self, eta, ga_approx, post_params, i):
self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
class gaussianApproximation(object):
def __init__(self, v, tau):
self.tau = tau
self.v = v
def _update_i(self, eta, delta, post_params, marg_moments, i):
#Site parameters update
delta_tau = delta/eta*(1./marg_moments.sigma2_hat[i] - 1./post_params.Sigma_diag[i])
delta_v = delta/eta*(marg_moments.mu_hat[i]/marg_moments.sigma2_hat[i] - post_params.mu[i]/post_params.Sigma_diag[i])
tau_tilde_prev = self.tau[i]
self.tau[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without runnint into instabilities issues.
if self.tau[i] < np.finfo(float).eps:
self.tau[i] = np.finfo(float).eps
delta_tau = self.tau[i] - tau_tilde_prev
self.v[i] += delta_v
return (delta_tau, delta_v)
class posteriorParamsBase(object):
def __init__(self, mu, Sigma_diag):
self.mu = mu
self.Sigma_diag = Sigma_diag
def _update_rank1(self, *arg):
pass
def _recompute(self, *arg):
pass
class posteriorParams(posteriorParamsBase):
def __init__(self, mu, Sigma, L=None):
self.Sigma = Sigma
self.L = L
Sigma_diag = np.diag(self.Sigma)
super(posteriorParams, self).__init__(mu, Sigma_diag)
def _update_rank1(self, delta_tau, ga_approx, i):
ci = delta_tau/(1.+ delta_tau*self.Sigma_diag[i])
DSYR(self.Sigma, self.Sigma[:,i].copy(), -ci)
self.mu = np.dot(self.Sigma, ga_approx.v)
@staticmethod
def _recompute(K, ga_approx):
num_data = len(ga_approx.tau)
tau_tilde_root = np.sqrt(ga_approx.tau)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
mu = np.dot(Sigma,ga_approx.v)
return posteriorParams(mu=mu, Sigma=Sigma, L=L)
class posteriorParamsDTC(posteriorParamsBase):
def __init__(self, mu, Sigma_diag):
super(posteriorParamsDTC, self).__init__(mu, Sigma_diag)
def _update_rank1(self, LLT, Kmn, delta_v, delta_tau, i):
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
self.mu += (delta_v-delta_tau*self.mu[i])*si
#mu = np.dot(Sigma, v_tilde)
@staticmethod
def _recompute(LLT0, Kmn, ga_approx):
LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V.T,V)
mu = np.dot(Sigma, ga_approx.v)
Sigma_diag = np.diag(Sigma).copy()
return posteriorParamsDTC(mu, Sigma_diag), LLT
class EPBase(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False):
"""
@ -28,6 +130,7 @@ class EPBase(object):
:parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
"""
super(EPBase, self).__init__()
self.always_reset = always_reset
self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
self.ep_mode = ep_mode
@ -35,7 +138,7 @@ class EPBase(object):
self.reset()
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
self.ga_approx_old = None
self._ep_approximation = None
def on_optimization_start(self):
@ -45,6 +148,11 @@ class EPBase(object):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def _stop_criteria(self, ga_approx):
tau_diff = np.mean(np.square(ga_approx.tau-self.ga_approx_old.tau))
v_diff = np.mean(np.square(ga_approx.v-self.ga_approx_old.v))
return ((tau_diff < self.epsilon) and (v_diff < self.epsilon))
def __setstate__(self, state):
super(EPBase, self).__setstate__(state[0])
self.epsilon, self.eta, self.delta = state[1]
@ -67,19 +175,18 @@ class EP(EPBase, ExactGaussianInference):
if self.ep_mode=="nested":
#Force EP at each step of the optimization
self._ep_approximation = None
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
else:
raise ValueError("ep_mode value not valid")
v_tilde = mu_tilde * tau_tilde
return self._inference(K, tau_tilde, v_tilde, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde.sum())
return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -90,41 +197,57 @@ class EP(EPBase, ExactGaussianInference):
# than ObsArrays
Y = Y.values.copy()
#Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(num_data,dtype=np.float64)
sigma2_hat = np.empty(num_data,dtype=np.float64)
#Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
marg_moments = marginalMoments(num_data)
cav_params = cavityParams(num_data)
ga_approx, post_params = self._init_approximations(K, num_data)
tau_cav = np.empty(num_data,dtype=np.float64)
v_cav = np.empty(num_data,dtype=np.float64)
#Approximation
stop = False
iterations = 0
while not stop and (iterations < self.max_iters):
self._local_updates(num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
#(re) compute Sigma and mu using full Cholesky decompy
post_params = posteriorParams._recompute(K, ga_approx)
#monitor convergence
if iterations > 0:
stop = self._stop_criteria(ga_approx)
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
iterations += 1
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
# This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
return (post_params, ga_approx, cav_params, log_Z_tilde)
def _init_approximations(self, K, num_data):
#initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
if self.ga_approx_old is None:
v_tilde, tau_tilde = np.zeros((2, num_data))
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
Sigma = K.copy()
diag.add(Sigma, 1e-7)
mu = np.zeros(num_data)
post_params = posteriorParams(mu, Sigma)
else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
diag.add(Sigma, 1e-7)
assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
post_params = posteriorParams._recompute(K, ga_approx)
diag.add(post_params.Sigma, 1e-7)
# TODO: Check the log-marginal under both conditions and choose the best one
return (ga_approx, post_params)
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
update_order = np.random.permutation(num_data)
def _local_updates(self, num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
if update_order is None:
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav[i] = 1./Sigma[i,i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
cav_params._update_i(self.eta, ga_approx, post_params, i)
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
@ -133,91 +256,45 @@ class EP(EPBase, ExactGaussianInference):
else:
Y_metadata_i = None
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
#Site parameters update
delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
if self.parallel_updates == False:
#Posterior distribution parameters update
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
DSYR(Sigma, Sigma[:,i].copy(), -ci)
mu = np.dot(Sigma, v_tilde)
post_params._update_rank1(delta_tau, ga_approx, i)
#(re) compute Sigma and mu using full Cholesky decompy
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+ga_approx.tau/cav_params.tau) - 0.5 * ((ga_approx.v)**2 * 1./(cav_params.tau + ga_approx.tau))
+ 0.5*(cav_params.v * ( ( (ga_approx.tau/cav_params.tau) * cav_params.v - 2.0 * ga_approx.v ) * 1./(cav_params.tau + ga_approx.tau)))))
#monitor convergence
if iterations > 0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
iterations += 1
mu_tilde = v_tilde/tau_tilde
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
# This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+tau_tilde/tau_cav)
- 0.5 * ((v_tilde)**2 * 1./(tau_cav + tau_tilde)) + 0.5*(v_cav * ( ( (tau_tilde/tau_cav) * v_cav - 2.0 * v_tilde ) * 1./(tau_cav + tau_tilde))))
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, K, tau_tilde, v_tilde):
num_data = len(tau_tilde)
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
mu = np.dot(Sigma,v_tilde)
return (mu, Sigma, L)
def _ep_marginal(self, K, tau_tilde, v_tilde, Z_tilde):
mu, Sigma, L = self._ep_compute_posterior(K, tau_tilde, v_tilde)
def _ep_marginal(self, K, ga_approx, Z_tilde):
post_params = posteriorParams._recompute(K, ga_approx)
# Gaussian log marginal excluding terms that can go to infinity due to arbitrarily small tau_tilde.
# These terms cancel out with the terms excluded from Z_tilde
B_logdet = np.sum(2.0*np.log(np.diag(L)))
log_marginal = 0.5*(-len(tau_tilde) * log_2_pi - B_logdet + np.sum(v_tilde * np.dot(Sigma,v_tilde)))
B_logdet = np.sum(2.0*np.log(np.diag(post_params.L)))
log_marginal = 0.5*(-len(ga_approx.tau) * log_2_pi - B_logdet + np.sum(ga_approx.v * np.dot(post_params.Sigma,ga_approx.v)))
log_marginal += Z_tilde
return log_marginal, mu, Sigma, L
return log_marginal, post_params
def _inference(self, K, tau_tilde, v_tilde, likelihood, Z_tilde, Y_metadata=None):
log_marginal, mu, Sigma, L = self._ep_marginal(K, tau_tilde, v_tilde, Z_tilde)
def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
tau_tilde_root = np.sqrt(tau_tilde)
tau_tilde_root = np.sqrt(ga_approx.tau)
Sroot_tilde_K = tau_tilde_root[:,None] * K
aux_alpha , _ = dpotrs(L, np.dot(Sroot_tilde_K, v_tilde), lower=1)
alpha = (v_tilde - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
LWi, _ = dtrtrs(L, np.diag(tau_tilde_root), lower=1)
aux_alpha , _ = dpotrs(post_params.L, np.dot(Sroot_tilde_K, ga_approx.v), lower=1)
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
Wi = np.dot(LWi.T,LWi)
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
@ -244,24 +321,25 @@ class EPDTC(EPBase, VarDTC):
if self.ep_mode=="nested":
#Force EP at each step of the optimization
self._ep_approximation = None
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
post_params, ga_approx, log_Z_tilde = self._ep_approximation
else:
raise ValueError("ep_mode value not valid")
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]),
mean_function=mean_function,
Y_metadata=Y_metadata,
precision=tau_tilde,
precision=ga_approx.tau,
Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde.sum())
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde)
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
@ -272,119 +350,91 @@ class EPDTC(EPBase, VarDTC):
# than ObsArrays
Y = Y.values.copy()
#Initial values - Marginal moments
Z_hat = np.zeros(num_data,dtype=np.float64)
mu_hat = np.zeros(num_data,dtype=np.float64)
sigma2_hat = np.zeros(num_data,dtype=np.float64)
#Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
marg_moments = marginalMoments(num_data)
cav_params = cavityParams(num_data)
ga_approx, post_params, LLT0, LLT = self._init_approximations(Kmm, Kmn, num_data)
tau_cav = np.empty(num_data,dtype=np.float64)
v_cav = np.empty(num_data,dtype=np.float64)
#Approximation
stop = False
iterations = 0
while not stop and (iterations < self.max_iters):
self._local_updates(num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
post_params.Sigma_diag = np.maximum(post_params.Sigma_diag, np.finfo(float).eps)
#monitor convergence
if iterations > 0:
stop = self._stop_criteria(ga_approx)
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
iterations += 1
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
return post_params, ga_approx, log_Z_tilde
def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
mu_tilde = ga_approx.v/ga_approx.tau
mu_cav = cav_params.v/cav_params.tau
sigma2_sigma2tilde = 1./cav_params.tau + 1./ga_approx.tau
return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde)))
def _init_approximations(self, Kmm, Kmn, num_data):
#initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT0 = Kmm.copy()
Lm = jitchol(LLT0) #K_m = L_m L_m^\top
Vm,info = dtrtrs(Lm,Kmn,lower=1)
Vm,info = dtrtrs(Lm, Kmn,lower=1)
# Lmi = dtrtri(Lm)
# Kmmi = np.dot(Lmi.T,Lmi)
# KmmiKmn = np.dot(Kmmi,Kmn)
# Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn)
#diag.add(LLT0, 1e-8)
if self.old_mutilde is None:
if self.ga_approx_old is None:
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT = LLT0.copy() #Sigma = K.copy()
mu = np.zeros(num_data)
Sigma_diag = Qnn_diag.copy() + 1e-8
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
v_tilde, tau_tilde = np.zeros((2, num_data))
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
post_params = posteriorParamsDTC(mu, Sigma_diag)
else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
Sigma_diag += 1e-8
assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
post_params.Sigma_diag += 1e-8
# TODO: Check the log-marginal under both conditions and choose the best one
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
return (ga_approx, post_params, LLT0, LLT)
def _local_updates(self, num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
if update_order is None:
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][i, :]
else:
Y_metadata_i = None
for i in update_order:
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
#Cavity distribution parameters
cav_params._update_i(self.eta, ga_approx, post_params, i)
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
#Posterior distribution parameters update
if self.parallel_updates == False:
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][i, :]
else:
Y_metadata_i = None
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps)
#Marginal moments
marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
#monitor convergence
if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
iterations += 1
mu_tilde = v_tilde/tau_tilde
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, LLT0, Kmn, tau_tilde, v_tilde):
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V.T,V)
mu = np.dot(Sigma,v_tilde)
Sigma_diag = np.diag(Sigma).copy()
return (mu, Sigma_diag, LLT)
#Posterior distribution parameters update
if self.parallel_updates == False:
post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)

View file

@ -66,7 +66,14 @@ class Binomial(Likelihood):
np.testing.assert_array_equal(N.shape, y.shape)
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = y[y>0]*np.log(inv_link_f[y>0])
t2[Ny>0] = Ny[Ny>0]*np.log(1.-inv_link_f[Ny>0])
return nchoosey + t1 + t2
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
@ -86,7 +93,13 @@ class Binomial(Likelihood):
N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape)
return y/inv_link_f - (N-y)/(1.-inv_link_f)
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = y[y>0]/inv_link_f[y>0]
t2[Ny>0] = (Ny[Ny>0])/(1.-inv_link_f[Ny>0])
return t1 - t2
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
@ -111,7 +124,13 @@ class Binomial(Likelihood):
"""
N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape)
return -y/np.square(inv_link_f) - (N-y)/np.square(1.-inv_link_f)
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = -y[y>0]/np.square(inv_link_f[y>0])
t2[Ny>0] = -(Ny[Ny>0])/np.square(1.-inv_link_f[Ny>0])
return t1+t2
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
"""
@ -135,8 +154,14 @@ class Binomial(Likelihood):
N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape)
inv_link_f2 = np.square(inv_link_f)
return 2*y/inv_link_f**3 - 2*(N-y)/(1.-inv_link_f)**3
#inv_link_f2 = np.square(inv_link_f) #TODO Remove. Why is this here?
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = 2*y[y>0]/inv_link_f[y>0]**3
t2[Ny>0] = - 2*(Ny[Ny>0])/(1.-inv_link_f[Ny>0])**3
return t1 + t2
def samples(self, gp, Y_metadata=None, **kw):
"""

View file

@ -57,7 +57,10 @@ class Gaussian(Likelihood):
def update_gradients(self, grad):
self.variance.gradient = grad
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
return self.exact_inference_gradients(dL_dKdiag)
def exact_inference_gradients(self, dL_dKdiag, Y_metadata=None):
return dL_dKdiag.sum()
def _preprocess_values(self, Y):

View file

@ -6,8 +6,12 @@ from scipy import stats,special
import scipy as sp
from . import link_functions
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from ..util.quad_integrate import quadgk_int
from scipy.integrate import quad
from functools import partial
import warnings
from ..core.parameterization import Parameterized
class Likelihood(Parameterized):
@ -223,6 +227,91 @@ class Likelihood(Parameterized):
self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
if self.size > 0:
shape = Y.shape
tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
mu = v/tau
sigma2 = 1./tau
# assert Y.shape == v.shape
dlik_dtheta = np.empty((self.size, Y.shape[0]))
# for j in range(self.size):
Y_metadata_list = []
for index in range(len(Y)):
Y_metadata_i = {}
if Y_metadata is not None:
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][index,:]
Y_metadata_list.append(Y_metadata_i)
if quad_mode == 'gk':
f = partial(self.integrate_gk)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
quads = np.vstack(quads)
quads.reshape(self.size, shape[0], shape[1])
elif quad_mode == 'gh':
f = partial(self.integrate_gh)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
quads = np.hstack(quads)
quads = quads.T
else:
raise Exception("no other quadrature mode available")
# do a gaussian-hermite integration
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
else:
dL_dtheta = np.zeros(self.num_params)
return dL_dtheta
def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
# gaussian-kronrod integration.
fmin = -np.inf
fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi)
def generate_integral(f):
a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
fn = fn1
return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
return dF_dtheta_i
def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
# gaussian-hermite quadrature.
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
#
# calculate the expectation wrt the approximate marginal posterior, which should be approximately the same.
# . This term is needed for evaluating the
# gradients of the marginal likelihood estimate Z_EP wrt likelihood parameters."
# "writing it explicitly "
# use them for gaussian-hermite quadrature
SQRT_2PI = np.sqrt(2.*np.pi)
if gh_points is None:
gh_x, gh_w = self._gh_points(32)
else:
gh_x, gh_w = gh_points
X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
# Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
a = a.repeat(self.num_params,0)
b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
old_shape = b.shape
fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
fn = fn.reshape(old_shape)
dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
return dF_dtheta_i
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
"""
Use Gauss-Hermite Quadrature to compute

View file

@ -0,0 +1,147 @@
import numpy as np
import unittest
import GPy
from GPy.models import GradientChecker
fixed_seed = 10
from nose.tools import with_setup, nottest
# this file will contain some high level tests, this is not unit testing, but will give us a higher level estimate
# if things are going well under the hood.
class TestObservationModels(unittest.TestCase):
def setUp(self):
np.random.seed(fixed_seed)
self.N = 100
self.D = 2
self.X = np.random.rand(self.N, self.D)
self.real_noise_std = 0.05
noise = np.random.randn(*self.X[:, 0].shape) * self.real_noise_std
self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None]
self.num_points = self.X.shape[0]
self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
# self.binary_Y[self.binary_Y == 0.0] = -1.0
self.positive_Y = np.exp(self.Y.copy())
self.Y_noisy = self.Y.copy()
self.Y_verynoisy = self.Y.copy()
self.Y_noisy[75] += 1.3
self.init_var = 0.15
self.deg_free = 4.
censored = np.zeros_like(self.Y)
random_inds = np.random.choice(self.N, int(self.N / 2), replace=True)
censored[random_inds] = 1
self.Y_metadata = dict()
self.Y_metadata['censored'] = censored
self.kernel1 = GPy.kern.RBF(self.X.shape[1]) + GPy.kern.White(self.X.shape[1])
def tearDown(self):
self.Y = None
self.X = None
self.binary_Y =None
self.positive_Y = None
self.kernel1 = None
@with_setup(setUp, tearDown)
def testEPClassification(self):
bernoulli = GPy.likelihoods.Bernoulli()
laplace_inf = GPy.inference.latent_function_inference.Laplace()
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
ep_inf_fractional = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.9)
m1 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=laplace_inf)
m1.randomize()
m2 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_alt)
m2.randomize()
m3 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_nested)
m3.randomize()
#
m4 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_fractional)
m4.randomize()
optimizer = 'bfgs'
#do gradcheck here ...
# self.assertTrue(m1.checkgrad())
# self.assertTrue(m2.checkgrad())
# self.assertTrue(m3.checkgrad())
# self.assertTrue(m4.checkgrad())
m1.optimize(optimizer=optimizer, max_iters=300)
m2.optimize(optimizer=optimizer, max_iters=300)
m3.optimize(optimizer=optimizer, max_iters=300)
m4.optimize(optimizer=optimizer, max_iters=300)
# taking laplace predictions as the ground truth
probs_mean_lap, probs_var_lap = m1.predict(self.X)
probs_mean_ep_alt, probs_var_ep_alt = m2.predict(self.X)
probs_mean_ep_nested, probs_var_ep_nested = m3.predict(self.X)
# for simple single dimension data , marginal likelihood for laplace and EP approximations should not be so far apart.
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=1)
self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), delta=1)
self.assertAlmostEqual(m1.log_likelihood(), m4.log_likelihood(), delta=5)
GPy.util.classification.conf_matrix(probs_mean_lap, self.binary_Y)
GPy.util.classification.conf_matrix(probs_mean_ep_alt, self.binary_Y)
GPy.util.classification.conf_matrix(probs_mean_ep_nested, self.binary_Y)
@nottest
def rmse(self, Y, Ystar):
return np.sqrt(np.mean((Y - Ystar) ** 2))
@with_setup(setUp, tearDown)
def test_EP_with_StudentT(self):
studentT = GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.init_var)
laplace_inf = GPy.inference.latent_function_inference.Laplace()
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
ep_inf_frac = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.7)
m1 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=laplace_inf)
# optimize
m1['.*white'].constrain_fixed(1e-5)
m1.randomize()
m2 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=ep_inf_alt)
m2['.*white'].constrain_fixed(1e-5)
# m2.constrain_bounded('.*t_scale2', 0.001, 10)
m2.randomize()
# m3 = GPy.core.GP(self.X, self.Y_noisy.copy(), kernel=self.kernel1, likelihood=studentT.copy(), inference_method=ep_inf_nested)
# m3['.*white'].constrain_fixed(1e-5)
# # m3.constrain_bounded('.*t_scale2', 0.001, 10)
# m3.randomize()
optimizer='bfgs'
m1.optimize(optimizer=optimizer,max_iters=400)
m2.optimize(optimizer=optimizer, max_iters=400)
# m3.optimize(optimizer=optimizer, max_iters=500)
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=200)
# self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), 3)
preds_mean_lap, preds_var_lap = m1.predict(self.X)
preds_mean_alt, preds_var_alt = m2.predict(self.X)
# preds_mean_nested, preds_var_nested = m3.predict(self.X)
rmse_lap = self.rmse(preds_mean_lap, self.Y)
rmse_alt = self.rmse(preds_mean_alt, self.Y)
# rmse_nested = self.rmse(preds_mean_nested, self.Y_noisy)
if rmse_alt > rmse_lap:
self.assertAlmostEqual(rmse_lap, rmse_alt, delta=1.5)
# m3.optimize(optimizer=optimizer, max_iters=500)
if __name__ == "__main__":
unittest.main()

View file

@ -61,6 +61,18 @@ class InferenceGPEP(unittest.TestCase):
Y = lik.samples(f).reshape(-1,1)
return X, Y
def genNoisyData(self):
np.random.seed(1)
X = np.random.rand(100,1)
self.real_std = 0.1
noise = np.random.randn(*X[:, 0].shape)*self.real_std
Y = (np.sin(X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(X.shape[0],1)
Y_extra_noisy = Y.copy()
Y_extra_noisy[50] += 4.
# Y_extra_noisy[80:83] -= 2.
return X, Y, Y_extra_noisy
def test_inference_EP(self):
from paramz import ObsAr
X, Y = self.genData()
@ -73,11 +85,45 @@ class InferenceGPEP(unittest.TestCase):
inference_method=inf,
likelihood=lik)
K = self.model.kern.K(X)
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
v_tilde = mu_tilde * tau_tilde
p, m, d = self.model.inference_method._inference(K, tau_tilde, v_tilde, lik, Y_metadata=None, Z_tilde=log_Z_tilde.sum())
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./tau_tilde, K=K, Z_tilde=log_Z_tilde.sum() + np.sum(- 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)))
post_params, ga_approx, cav_params, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
p, m, d = self.model.inference_method._inference(Y, K, ga_approx, cav_params, lik, Y_metadata=None, Z_tilde=log_Z_tilde)
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
assert (np.sum(np.array([m - m0,
np.sum(d['dL_dK'] - d0['dL_dK']),
np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
np.sum(d['dL_dm'] - d0['dL_dm']),
np.sum(p._woodbury_vector - p0._woodbury_vector),
np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6)
# NOTE: adding a test like above for parameterized likelihood- the above test is
# only for probit likelihood which does not have any tunable hyperparameter which is why
# the term in dictionary of gradients: dL_dthetaL will always be zero. So here we repeat tests for
# student-t likelihood and heterodescastic gaussian noise case. This test simply checks if the posterior
# and gradients of log marginal are roughly the same for inference through EP and exact gaussian inference using
# the gaussian approximation for the individual likelihood site terms. For probit likelihood, it is possible to
# calculate moments analytically, but for other likelihoods, we will need to use numerical quadrature techniques,
# and it is possible that any error might creep up because of quadrature implementation.
def test_inference_EP_non_classification(self):
from paramz import ObsAr
X, Y, Y_extra_noisy = self.genNoisyData()
deg_freedom = 5.
init_noise_var = 0.08
lik_studentT = GPy.likelihoods.StudentT(deg_free=deg_freedom, sigma2=init_noise_var)
# like_gaussian_noise = GPy.likelihoods.MixedNoise()
k = GPy.kern.RBF(1, variance=2., lengthscale=1.1)
ep_inf_alt = GPy.inference.latent_function_inference.expectation_propagation.EP(max_iters=4, delta=0.5)
# ep_inf_nested = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode='nested', max_iters=100, delta=0.5)
m = GPy.core.GP(X=X,Y=Y_extra_noisy,kernel=k,likelihood=lik_studentT,inference_method=ep_inf_alt)
K = m.kern.K(X)
post_params, ga_approx, cav_params, log_Z_tilde = m.inference_method.expectation_propagation(K, ObsAr(Y_extra_noisy), lik_studentT, None)
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
p, m, d = m.inference_method._inference(Y_extra_noisy, K, ga_approx, cav_params, lik_studentT, Y_metadata=None, Z_tilde=log_Z_tilde)
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, ep_inf_alt).inference(k, X,lik_studentT ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
assert (np.sum(np.array([m - m0,
np.sum(d['dL_dK'] - d0['dL_dK']),

View file

@ -0,0 +1,39 @@
from __future__ import print_function, division
import numpy as np
import GPy
import warnings
from ..util.quad_integrate import quadgk_int, quadvgk
class QuadTests(np.testing.TestCase):
"""
test file for checking implementation of gaussian-kronrod quadrature.
we will take a function which can be integrated analytically and check if quadgk result is similar or not!
through this file we can test how numerically accurate quadrature implementation in native numpy or manual code is.
"""
def setUp(self):
pass
def test_infinite_quad(self):
def f(x):
return np.exp(-0.5*x**2)*np.power(x,np.arange(3)[:,None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi * 2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
def test_finite_quad(self):
def f2(x):
return x**2
quad_int_val = quadvgk(f2, 1.,2.)
real_val = 7/3.
np.testing.assert_almost_equal(real_val, quad_int_val, decimal=5)
if __name__ == '__main__':
def f(x):
return np.exp(-0.5 * x ** 2) * np.power(x, np.arange(3)[:, None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi*2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
print(quadgk_int(f))

View file

@ -17,3 +17,4 @@ from . import multioutput
from . import parallel
from . import functions
from . import cluster_with_offset
from . import quad_integrate

119
GPy/util/quad_integrate.py Normal file
View file

@ -0,0 +1,119 @@
"""
The file for utilities related to integration by quadrature methods
- will contain implementation for gaussian-kronrod integration.
"""
import numpy as np
def getSubs(Subs, XK, NK=1):
M = (Subs[1, :] - Subs[0, :]) / 2
C = (Subs[1, :] + Subs[0, :]) / 2
I = XK[:, None] * M + np.ones((NK, 1)) * C
# A = [Subs(1,:); I]
A = np.vstack((Subs[0, :], I))
# B = [I;Subs(2,:)]
B = np.vstack((I, Subs[1, :]))
# Subs = [reshape(A, 1, []);
A = A.flatten()
# reshape(B, 1, [])];
B = B.flatten()
Subs = np.vstack((A,B))
# Subs = np.concatenate((A, B), axis=0)
return Subs
def quadvgk(feval, fmin, fmax, tol1=1e-5, tol2=1e-5):
"""
numpy implementation makes use of the code here: http://se.mathworks.com/matlabcentral/fileexchange/18801-quadvgk
We here use gaussian kronrod integration already used in gpstuff for evaluating one dimensional integrals.
This is vectorised quadrature which means that several functions can be evaluated at the same time over a grid of
points.
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
XK = np.array([-0.991455371120813, -0.949107912342759, -0.864864423359769, -0.741531185599394,
-0.586087235467691, -0.405845151377397, -0.207784955007898, 0.,
0.207784955007898, 0.405845151377397, 0.586087235467691,
0.741531185599394, 0.864864423359769, 0.949107912342759, 0.991455371120813])
WK = np.array([0.022935322010529, 0.063092092629979, 0.104790010322250, 0.140653259715525,
0.169004726639267, 0.190350578064785, 0.204432940075298, 0.209482141084728,
0.204432940075298, 0.190350578064785, 0.169004726639267,
0.140653259715525, 0.104790010322250, 0.063092092629979, 0.022935322010529])
# 7-point Gaussian weightings
WG = np.array([0.129484966168870, 0.279705391489277, 0.381830050505119, 0.417959183673469,
0.381830050505119, 0.279705391489277, 0.129484966168870])
NK = WK.size
G = np.arange(2,NK,2)
tol1 = 1e-4
tol2 = 1e-4
Subs = np.array([[fmin],[fmax]])
# number of functions to evaluate in the feval vector of functions.
NF = feval(np.zeros(1)).size
Q = np.zeros(NF)
neval = 0
while Subs.size > 0:
Subs = getSubs(Subs,XK)
M = (Subs[1,:] - Subs[0,:]) / 2
C = (Subs[1,:] + Subs[0,:]) / 2
# NM = length(M);
NM = M.size
# x = reshape(XK * M + ones(NK, 1) * C, 1, []);
x = XK[:,None]*M + C
x = x.flatten()
FV = feval(x)
# FV = FV[:,None]
Q1 = np.zeros((NF, NM))
Q2 = np.zeros((NF, NM))
# for n=1:NF
# F = reshape(FV(n,:), NK, []);
# Q1(n,:) = M. * sum((WK * ones(1, NM)). * F);
# Q2(n,:) = M. * sum((WG * ones(1, NM)). * F(G,:));
# end
# for i in range(NF):
# F = FV
# F = F.reshape((NK,-1))
# temp_mat = np.sum(np.multiply(WK[:,None]*np.ones((1,NM)), F),axis=0)
# Q1[i,:] = np.multiply(M, temp_mat)
# temp_mat = np.sum(np.multiply(WG[:,None]*np.ones((1, NM)), F[G-1,:]), axis=0)
# Q2[i,:] = np.multiply(M, temp_mat)
# ind = np.where(np.logical_or(np.max(np.abs(Q1 -Q2) / Q1) < tol1, (Subs[1,:] - Subs[0,:]) <= tol2) > 0)[0]
# Q = Q + np.sum(Q1[:,ind], axis=1)
# np.delete(Subs, ind,axis=1)
Q1 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1),WK)*M
Q2 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1)[:,:,1::2],WG)*M
#ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)/Q1), 0) < difftol , M < xtol))[0]
ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)), 0) < tol1 , (Subs[1,:] - Subs[0,:]) < tol2))[0]
Q = Q + np.sum(Q1[:,ind], axis=1)
Subs = np.delete(Subs, ind, axis=1)
return Q
def quadgk_int(f, fmin=-np.inf, fmax=np.inf, difftol=0.1):
"""
Integrate f from fmin to fmax,
do integration by substitution
x = r / (1-r**2)
when r goes from -1 to 1 , x goes from -inf to inf.
the interval for quadgk function is from -1 to +1, so we transform the space from (-inf,inf) to (-1,1)
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
difftol = 1e-4
def trans_func(r):
r2 = np.square(r)
x = r / (1-r2)
dx_dr = (1 + r2)/(1-r2)**2
return f(x)*dx_dr
integrand = quadvgk(trans_func, -1., 1., difftol, difftol)
return integrand