Changes in EP/EPDTC to fix numerical issues and increase the flexibility of the inference.

Changes to avoid numerical issues and improve the performance:
    - Keep value of the EP parameters between calls
    - Enforce positivity of tau_tilde
    - Stable computation of the EP moments for the Bernoulli likelihood
    - Compute marginal in the GP model without directly inverting tau_tilde

    Changes to improve the flexibility:
    - Add parameter for maximum number of iterations
    - Distinguish between alternated/nested mode
    - Distinguish between sequential/parallel updates in EP
This commit is contained in:
Moreno 2017-02-17 11:35:30 +00:00 committed by Akash Kumar Dhaka
parent 113f84d6a7
commit 63751de912
7 changed files with 522 additions and 117 deletions

View file

@ -1,15 +1,16 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import jitchol, DSYR, dtrtrs, dtrtri
from ...util.linalg import jitchol, DSYR, dtrtrs, dtrtri, pdinv, dpotrs, tdot, symmetrify
from paramz import ObsAr
from . import ExactGaussianInference, VarDTC
from ...util import diag
from .posterior import PosteriorEP as Posterior
log_2_pi = np.log(2*np.pi)
class EPBase(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False):
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
@ -22,11 +23,15 @@ class EPBase(object):
:type delta: float64
:param always_reset: setting to always reset the approximation at the beginning of every inference call.
:type always_reest: boolean
:max_iters: int
:ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters).
:parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
"""
super(EPBase, self).__init__()
self.always_reset = always_reset
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
self.ep_mode = ep_mode
self.parallel_updates = parallel_updates
self.reset()
def reset(self):
@ -59,25 +64,28 @@ class EP(EPBase, ExactGaussianInference):
if K is None:
K = kern.K(X)
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
if self.ep_mode=="nested":
#Force EP at each step of the optimization
self._ep_approximation = None
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
raise ValueError("ep_mode value not valid")
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, variance=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
v_tilde = mu_tilde * tau_tilde
return self._inference(K, tau_tilde, v_tilde, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde.sum())
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
Sigma = K.copy()
diag.add(Sigma, 1e-7)
# Makes computing the sign quicker if we work with numpy arrays rather
# than ObsArrays
Y = Y.values.copy()
@ -91,12 +99,19 @@ class EP(EPBase, ExactGaussianInference):
v_cav = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
Sigma = K.copy()
diag.add(Sigma, 1e-7)
mu = np.zeros(num_data)
else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
diag.add(Sigma, 1e-7)
# TODO: Check the log-marginal under both conditions and choose the best one
#Approximation
tau_diff = self.epsilon + 1.
@ -104,7 +119,7 @@ class EP(EPBase, ExactGaussianInference):
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
@ -122,21 +137,25 @@ class EP(EPBase, ExactGaussianInference):
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
#Posterior distribution parameters update
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
DSYR(Sigma, Sigma[:,i].copy(), -ci)
mu = np.dot(Sigma, v_tilde)
if self.parallel_updates == False:
#Posterior distribution parameters update
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
DSYR(Sigma, Sigma[:,i].copy(), -ci)
mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,v_tilde)
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
#monitor convergence
if iterations > 0:
@ -150,15 +169,70 @@ class EP(EPBase, ExactGaussianInference):
mu_tilde = v_tilde/tau_tilde
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
return mu, Sigma, mu_tilde, tau_tilde, Z_tilde
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
# This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+tau_tilde/tau_cav)
- 0.5 * ((v_tilde)**2 * 1./(tau_cav + tau_tilde)) + 0.5*(v_cav * ( ( (tau_tilde/tau_cav) * v_cav - 2.0 * v_tilde ) * 1./(tau_cav + tau_tilde))))
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, K, tau_tilde, v_tilde):
num_data = len(tau_tilde)
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
mu = np.dot(Sigma,v_tilde)
return (mu, Sigma, L)
def _ep_marginal(self, K, tau_tilde, v_tilde, Z_tilde):
mu, Sigma, L = self._ep_compute_posterior(K, tau_tilde, v_tilde)
# Gaussian log marginal excluding terms that can go to infinity due to arbitrarily small tau_tilde.
# These terms cancel out with the terms excluded from Z_tilde
B_logdet = np.sum(2.0*np.log(np.diag(L)))
log_marginal = 0.5*(-len(tau_tilde) * log_2_pi - B_logdet + np.sum(v_tilde * np.dot(Sigma,v_tilde)))
log_marginal += Z_tilde
return log_marginal, mu, Sigma, L
def _inference(self, K, tau_tilde, v_tilde, likelihood, Z_tilde, Y_metadata=None):
log_marginal, mu, Sigma, L = self._ep_marginal(K, tau_tilde, v_tilde, Z_tilde)
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
aux_alpha , _ = dpotrs(L, np.dot(Sroot_tilde_K, v_tilde), lower=1)
alpha = (v_tilde - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
LWi, _ = dtrtrs(L, np.diag(tau_tilde_root), lower=1)
Wi = np.dot(LWi.T,LWi)
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
class EPDTC(EPBase, VarDTC):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
assert Y.shape[1]==1, "ep in 1D only (for now!)"
if self.always_reset:
self.reset()
num_data, output_dim = Y.shape
assert output_dim == 1, "ep in 1D only (for now!)"
if Lm is None:
Kmm = kern.K(Z)
Lm = jitchol(Kmm)
Kmm = kern.K(Z)
if psi1 is None:
try:
Kmn = kern.K(Z, X)
@ -167,35 +241,36 @@ class EPDTC(EPBase, VarDTC):
else:
Kmn = psi1.T
if getattr(self, '_ep_approximation', None) is None:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
if self.ep_mode=="nested":
#Force EP at each step of the optimization
self._ep_approximation = None
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
else:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
raise ValueError("ep_mode value not valid")
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
mean_function=mean_function,
Y_metadata=Y_metadata,
precision=tau_tilde,
Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=np.log(Z_tilde).sum())
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde.sum())
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
num_data, output_dim = Y.shape
assert output_dim == 1, "This EP methods only works for 1D outputs"
LLT0 = Kmm.copy()
#diag.add(LLT0, 1e-8)
Lm = jitchol(LLT0)
Lmi = dtrtri(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
LLT = Kmm.copy() #Sigma = K.copy()
Sigma_diag = Qnn_diag.copy() + 1e-8
# Makes computing the sign quicker if we work with numpy arrays rather
# than ObsArrays
Y = Y.values.copy()
#Initial values - Marginal moments
Z_hat = np.zeros(num_data,dtype=np.float64)
@ -206,73 +281,110 @@ class EPDTC(EPBase, VarDTC):
v_cav = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT0 = Kmm.copy()
Lm = jitchol(LLT0) #K_m = L_m L_m^\top
Vm,info = dtrtrs(Lm,Kmn,lower=1)
# Lmi = dtrtri(Lm)
# Kmmi = np.dot(Lmi.T,Lmi)
# KmmiKmn = np.dot(Kmmi,Kmn)
# Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn)
#diag.add(LLT0, 1e-8)
if self.old_mutilde is None:
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT = LLT0.copy() #Sigma = K.copy()
mu = np.zeros(num_data)
Sigma_diag = Qnn_diag.copy() + 1e-8
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
Sigma_diag += 1e-8
# TODO: Check the log-marginal under both conditions and choose the best one
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
tau_tilde_old = 0.
v_tilde_old = 0.
update_order = np.random.permutation(num_data)
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][i, :]
else:
Y_metadata_i = None
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i])#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
#Posterior distribution parameters update
if self.parallel_updates == False:
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT+np.eye(LLT.shape[0])*1e-7)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
#diag.add(LLT, 1e-8)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
V2, _ = dtrtrs(L.T,V,lower=0)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V2.T,V2)
mu = np.dot(Sigma,v_tilde)
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps)
#monitor convergence
#if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
# Only to while loop once:?
tau_diff = 0
v_diff = 0
iterations += 1
mu_tilde = v_tilde/tau_tilde
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_tilde
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, LLT0, Kmn, tau_tilde, v_tilde):
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V.T,V)
mu = np.dot(Sigma,v_tilde)
Sigma_diag = np.diag(Sigma).copy()
return (mu, Sigma_diag, LLT)

View file

@ -239,6 +239,7 @@ class Posterior(object):
var = np.clip(var,1e-15,np.inf)
return mu, var
class PosteriorExact(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
@ -270,3 +271,37 @@ class PosteriorExact(Posterior):
var[:, i] = (Kxx - np.square(tmp).sum(0))
var = var
return mu, var
class PosteriorEP(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, self.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(Xnew)
if self._woodbury_inv.ndim == 2:
tmp = np.dot(Kx.T,np.dot(self._woodbury_inv, Kx))
var = Kxx - tmp
elif self._woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_inv.shape[2]))
for i in range(var.shape[2]):
tmp = np.dot(Kx.T,np.dot(self._woodbury_inv[:,:,i], Kx))
var[:, :, i] = (Kxx - tmp)
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self._woodbury_inv.ndim == 2:
tmp = (np.dot(self._woodbury_inv, Kx) * Kx).sum(0)
var = (Kxx - tmp)[:,None]
elif self._woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self._woodbury_inv.shape[2]))
for i in range(var.shape[1]):
tmp = (Kx * np.dot(self._woodbury_inv[:,:,i], Kx)).sum(0)
var[:, i] = (Kxx - tmp)
var = var
return mu, var

View file

@ -88,6 +88,10 @@ class VarDTC(LatentFunctionInference):
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
else:
Kmm = tdot(Lm)
symmetrify(Kmm)
# The rather complex computations of A, and the psi stats
if uncertain_inputs:

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf, derivLogCdfNormal, logCdfNormal
from . import link_functions
from .likelihood import Likelihood
@ -59,24 +59,24 @@ class Bernoulli(Likelihood):
raise ValueError("bad value for Bernoulli observation (0, 1)")
if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
Z_hat = np.where(Z_hat==0, 1e-15, Z_hat)
phi = std_norm_pdf(z)
phi_div_Phi = derivLogCdfNormal(z)
log_Z_hat = logCdfNormal(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i**2 + tau_i)
sigma2_hat = 1./tau_i - (phi_div_Phi/(tau_i**2+tau_i))*(z+phi_div_Phi)
elif isinstance(self.gp_link, link_functions.Heaviside):
a = sign*v_i/np.sqrt(tau_i)
Z_hat = np.max(1e-13, std_norm_cdf(z))
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
z = sign*v_i/np.sqrt(tau_i)
phi_div_Phi = derivLogCdfNormal(z)
log_Z_hat = logCdfNormal(z)
mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i)
sigma2_hat = (1. - a*phi_div_Phi - np.square(phi_div_Phi))/tau_i
else:
#TODO: do we want to revert to numerical quadrature here?
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.__name__))
return Z_hat, mu_hat, sigma2_hat
# TODO: Output log_Z_hat instead of Z_hat (needs to be change in all others likelihoods)
return np.exp(log_Z_hat), mu_hat, sigma2_hat
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):

View file

@ -49,6 +49,43 @@ class InferenceXTestCase(unittest.TestCase):
m.optimize()
x, mi = m.infer_newX(m.Y, optimize=True)
np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
class InferenceGPEP(unittest.TestCase):
def genData(self):
np.random.seed(1)
k = GPy.kern.RBF(1, variance=7., lengthscale=0.2)
X = np.random.rand(200,1)
f = np.random.multivariate_normal(np.zeros(200), k.K(X) + 1e-5 * np.eye(X.shape[0]))
lik = GPy.likelihoods.Bernoulli()
p = lik.gp_link.transf(f) # squash the latent function
Y = lik.samples(f).reshape(-1,1)
return X, Y
def test_inference_EP(self):
from paramz import ObsAr
X, Y = self.genData()
lik = GPy.likelihoods.Bernoulli()
k = GPy.kern.RBF(1, variance=7., lengthscale=0.2)
inf = GPy.inference.latent_function_inference.expectation_propagation.EP(max_iters=30, delta=0.5)
self.model = GPy.core.GP(X=X,
Y=Y,
kernel=k,
inference_method=inf,
likelihood=lik)
K = self.model.kern.K(X)
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
v_tilde = mu_tilde * tau_tilde
p, m, d = self.model.inference_method._inference(K, tau_tilde, v_tilde, lik, Y_metadata=None, Z_tilde=log_Z_tilde.sum())
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./tau_tilde, K=K, Z_tilde=log_Z_tilde.sum() + np.sum(- 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)))
assert (np.sum(np.array([m - m0,
np.sum(d['dL_dK'] - d0['dL_dK']),
np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
np.sum(d['dL_dm'] - d0['dL_dm']),
np.sum(p._woodbury_vector - p0._woodbury_vector),
np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6)
class HMCSamplerTest(unittest.TestCase):

View file

@ -155,3 +155,59 @@ class TestDebug(unittest.TestCase):
clusters = set([frozenset(cluster) for cluster in active])
assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"
class TestUnivariateGaussian(unittest.TestCase):
def setUp(self):
self.zz = [-5.0, -0.8, 0.0, 0.5, 2.0, 10.0]
def test_logPdfNormal(self):
from GPy.util.univariate_Gaussian import logPdfNormal
pySols = [-13.4189385332,
-1.2389385332,
-0.918938533205,
-1.0439385332,
-2.9189385332,
-50.9189385332]
diff = 0.0
for i in range(len(pySols)):
diff += abs(logPdfNormal(self.zz[i]) - pySols[i])
self.assertTrue(diff < 1e-10)
def test_cdfNormal(self):
from GPy.util.univariate_Gaussian import cdfNormal
pySols = [2.86651571879e-07,
0.211855398583,
0.5,
0.691462461274,
0.977249868052,
1.0]
diff = 0.0
for i in range(len(pySols)):
diff += abs(cdfNormal(self.zz[i]) - pySols[i])
self.assertTrue(diff < 1e-10)
def test_logCdfNormal(self):
from GPy.util.univariate_Gaussian import logCdfNormal
pySols = [-15.064998394,
-1.55185131919,
-0.69314718056,
-0.368946415289,
-0.023012909329,
0.0]
diff = 0.0
for i in range(len(pySols)):
diff += abs(logCdfNormal(self.zz[i]) - pySols[i])
self.assertTrue(diff < 1e-10)
def test_derivLogCdfNormal(self):
from GPy.util.univariate_Gaussian import derivLogCdfNormal
pySols = [5.18650396941,
1.3674022693,
0.79788456081,
0.50916043387,
0.0552478626962,
0.0]
diff = 0.0
for i in range(len(pySols)):
diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
self.assertTrue(diff < 1e-8)

View file

@ -23,3 +23,164 @@ def inv_std_norm_cdf(x):
inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b )
return np.sqrt(2) * inv_erf
def logPdfNormal(z):
"""
Robust implementations of log pdf of a standard normal.
@see [[https://github.com/mseeger/apbsint/blob/master/src/eptools/potentials/SpecfunServices.h original implementation]]
in C from Matthias Seeger.
"""
return -0.5 * (M_LN2PI + z * z)
def cdfNormal(z):
"""
Robust implementations of cdf of a standard normal.
@see [[https://github.com/mseeger/apbsint/blob/master/src/eptools/potentials/SpecfunServices.h original implementation]]
in C from Matthias Seeger.
*/
"""
if (abs(z) < ERF_CODY_LIMIT1):
# Phi(z) approx (1+y R_3(y^2))/2, y=z/sqrt(2)
return 0.5 * (1.0 + (z / M_SQRT2) * _erfRationalHelperR3(0.5 * z * z))
elif (z < 0.0):
# Phi(z) approx N(z)Q(-z)/(-z), z<0
return np.exp(logPdfNormal(z)) * _erfRationalHelper(-z) / (-z)
else:
return 1.0 - np.exp(logPdfNormal(z)) * _erfRationalHelper(z) / z
def logCdfNormal(z):
"""
Robust implementations of log cdf of a standard normal.
@see [[https://github.com/mseeger/apbsint/blob/master/src/eptools/potentials/SpecfunServices.h original implementation]]
in C from Matthias Seeger.
"""
if (abs(z) < ERF_CODY_LIMIT1):
# Phi(z) approx (1+y R_3(y^2))/2, y=z/sqrt(2)
return np.log1p((z / M_SQRT2) * _erfRationalHelperR3(0.5 * z * z)) - M_LN2
elif (z < 0.0):
# Phi(z) approx N(z)Q(-z)/(-z), z<0
return logPdfNormal(z) - np.log(-z) + np.log(_erfRationalHelper(-z))
else:
return np.log1p(-(np.exp(logPdfNormal(z))) * _erfRationalHelper(z) / z)
def derivLogCdfNormal(z):
"""
Robust implementations of derivative of the log cdf of a standard normal.
@see [[https://github.com/mseeger/apbsint/blob/master/src/eptools/potentials/SpecfunServices.h original implementation]]
in C from Matthias Seeger.
"""
if (abs(z) < ERF_CODY_LIMIT1):
# Phi(z) approx (1 + y R_3(y^2))/2, y = z/sqrt(2)
return 2.0 * np.exp(logPdfNormal(z)) / (1.0 + (z / M_SQRT2) * _erfRationalHelperR3(0.5 * z * z))
elif (z < 0.0):
# Phi(z) approx N(z) Q(-z)/(-z), z<0
return -z / _erfRationalHelper(-z)
else:
t = np.exp(logPdfNormal(z))
return t / (1.0 - t * _erfRationalHelper(z) / z)
def _erfRationalHelper(x):
assert x > 0.0, "Arg of erfRationalHelper should be >0.0; was {}".format(x)
if (x >= ERF_CODY_LIMIT2):
"""
x/sqrt(2) >= 4
Q(x) = 1 + sqrt(pi) y R_1(y),
R_1(y) = poly(p_j,y) / poly(q_j,y), where y = 2/(x*x)
Ordering of arrays: 4,3,2,1,0,5 (only for numerator p_j; q_5=1)
ATTENTION: The p_j are negative of the entries here
p (see P1_ERF)
q (see Q1_ERF)
"""
y = 2.0 / (x * x)
res = y * P1_ERF[5]
den = y
i = 0
while (i <= 3):
res = (res + P1_ERF[i]) * y
den = (den + Q1_ERF[i]) * y
i += 1
# Minus, because p(j) values have to be negated
return 1.0 - M_SQRTPI * y * (res + P1_ERF[4]) / (den + Q1_ERF[4])
else:
"""
x/sqrt(2) < 4, x/sqrt(2) >= 0.469
Q(x) = sqrt(pi) y R_2(y),
R_2(y) = poly(p_j,y) / poly(q_j,y), y = x/sqrt(2)
Ordering of arrays: 7,6,5,4,3,2,1,0,8 (only p_8; q_8=1)
p (see P2_ERF)
q (see Q2_ERF
"""
y = x / M_SQRT2
res = y * P2_ERF[8]
den = y
i = 0
while (i <= 6):
res = (res + P2_ERF[i]) * y
den = (den + Q2_ERF[i]) * y
i += 1
return M_SQRTPI * y * (res + P2_ERF[7]) / (den + Q2_ERF[7])
def _erfRationalHelperR3(y):
assert y >= 0.0, "Arg of erfRationalHelperR3 should be >=0.0; was {}".format(y)
nom = y * P3_ERF[4]
den = y
i = 0
while (i <= 2):
nom = (nom + P3_ERF[i]) * y
den = (den + Q3_ERF[i]) * y
i += 1
return (nom + P3_ERF[3]) / (den + Q3_ERF[3])
ERF_CODY_LIMIT1 = 0.6629
ERF_CODY_LIMIT2 = 5.6569
M_LN2PI = 1.83787706640934533908193770913
M_LN2 = 0.69314718055994530941723212146
M_SQRTPI = 1.77245385090551602729816748334
M_SQRT2 = 1.41421356237309504880168872421
#weights for the erfHelpers (defined here to avoid redefinitions at every call)
P1_ERF = [
3.05326634961232344e-1, 3.60344899949804439e-1,
1.25781726111229246e-1, 1.60837851487422766e-2,
6.58749161529837803e-4, 1.63153871373020978e-2]
Q1_ERF = [
2.56852019228982242e+0, 1.87295284992346047e+0,
5.27905102951428412e-1, 6.05183413124413191e-2,
2.33520497626869185e-3]
P2_ERF = [
5.64188496988670089e-1, 8.88314979438837594e+0,
6.61191906371416295e+1, 2.98635138197400131e+2,
8.81952221241769090e+2, 1.71204761263407058e+3,
2.05107837782607147e+3, 1.23033935479799725e+3,
2.15311535474403846e-8]
Q2_ERF = [
1.57449261107098347e+1, 1.17693950891312499e+2,
5.37181101862009858e+2, 1.62138957456669019e+3,
3.29079923573345963e+3, 4.36261909014324716e+3,
3.43936767414372164e+3, 1.23033935480374942e+3]
P3_ERF = [
3.16112374387056560e+0, 1.13864154151050156e+2,
3.77485237685302021e+2, 3.20937758913846947e+3,
1.85777706184603153e-1]
Q3_ERF = [
2.36012909523441209e+1, 2.44024637934444173e+2,
1.28261652607737228e+3, 2.84423683343917062e+3]