adding file for gaussian-kronrod integration, test cases and calculating gradients of log marginal wrt theta-likelihood params

This commit is contained in:
Akash Kumar Dhaka 2017-07-03 13:30:39 +03:00
parent 19598cf807
commit ae27e1225d
5 changed files with 269 additions and 4 deletions

View file

@ -6,6 +6,7 @@ from paramz import ObsAr
from . import ExactGaussianInference, VarDTC
from ...util import diag
from .posterior import PosteriorEP as Posterior
from ...likelihoods import Gaussian
log_2_pi = np.log(2*np.pi)
@ -185,7 +186,7 @@ class EP(EPBase, ExactGaussianInference):
else:
raise ValueError("ep_mode value not valid")
return self._inference(K, ga_approx, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
return self._inference(Y, K, ga_approx, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -280,7 +281,7 @@ class EP(EPBase, ExactGaussianInference):
return log_marginal, post_params
def _inference(self, K, ga_approx, likelihood, Z_tilde, Y_metadata=None):
def _inference(self, Y, K, ga_approx, likelihood, Z_tilde, Y_metadata=None):
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
tau_tilde_root = np.sqrt(ga_approx.tau)
@ -293,8 +294,10 @@ class EP(EPBase, ExactGaussianInference):
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
if not isinstance(likelihood, Gaussian):
dL_dthetaL = likelihood.ep_gradients(Y, ga_approx.tau, ga_approx.v, Y_metadata=Y_metadata)
else:
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

View file

@ -6,8 +6,12 @@ from scipy import stats,special
import scipy as sp
from . import link_functions
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from ..util.quad_integrate import quadgk_int
from scipy.integrate import quad
from functools import partial
import warnings
from ..core.parameterization import Parameterized
class Likelihood(Parameterized):
@ -223,6 +227,105 @@ class Likelihood(Parameterized):
self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points
def ep_gradients(self, Y, tau, v, Y_metadata=None, gh_points=None, approx=False, boost_grad=1.):
if self.size > 0:
shape = Y.shape
tau,v,Y = tau.flatten(), v.flatten(),Y.flatten()
mu = v/tau
sigma2 = 1./tau
# assert Y.shape == v.shape
dlik_dtheta = np.empty((self.size, Y.shape[0]))
# for j in range(self.size):
Y_metadata_list = []
for index in range(len(Y)):
Y_metadata_i = {}
if Y_metadata is not None:
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][index,:]
Y_metadata_list.append(Y_metadata_i)
val = self.site_derivatives_ep(Y[index], tau[index], v[index], Y_metadata_i)
dlik_dtheta[:, index] = val.ravel()
f = partial(self.integrate)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
quads = np.vstack(quads)
quads.reshape(self.size, shape[0], shape[1])
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
else:
dL_dtheta = np.zeros(self.num_params)
return dL_dtheta
def integrate(self, Y, mu, sigma):
fmin = -np.inf
fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi)
def generate_integral(f):
a = np.exp(self.logpdf_link(f, Y)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(f, Y)
fn = fn1
return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
return dF_dtheta_i
def site_derivatives_ep(self,obs,tau,v,Y_metadata_i=None, approx=False, gh_points=None):
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
#
# calculate the expectation wrt the approximate marginal posterior, which should be approximately the same.
# . This term is needed for evaluating the
# gradients of the marginal likelihood estimate Z_EP wrt likelihood parameters."
# "writing it explicitly "
# use them for gaussian-hermite quadrature
mu = v/tau
sigma2 = 1./tau
sigma = np.sqrt(sigma2)
# yc = 1 - Y_metadata_i['censored']
fmin = -np.inf
fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi)
def get_integral_limits(obs, tau, v, yc):
# find the mode fi, fmin, and fmax - limit the range of integration to gather better weights
pass
if gh_points is None:
gh_x, gh_w = self._gh_points(32)
else:
gh_x, gh_w = gh_points
X = gh_x[None,:]*np.sqrt(2.*sigma2) + mu
# X = gh_x[None,:]
logp = self.logpdf(X, obs, Y_metadata=Y_metadata_i)
dlogp_dtheta = self.dlogpdf_dtheta(X, obs, Y_metadata=Y_metadata_i)
if not approx:
def generate_integral(x):
a = np.exp(self.logpdf_link(x, obs, Y_metadata=Y_metadata_i)) * np.exp(-0.5 * np.square((x - mu) / sigma)) / (
SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(x, obs, Y_metadata=Y_metadata_i)
fn = fn1
return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin,fmax=fmax)
return dF_dtheta_i
dF_dtheta_i = np.dot(dlogp_dtheta, gh_w)/np.sqrt(np.pi)
return dF_dtheta_i
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
"""
Use Gauss-Hermite Quadrature to compute

View file

@ -0,0 +1,39 @@
from __future__ import print_function, division
import numpy as np
import GPy
import warnings
from ..util.quad_integrate import quadgk_int, quadvgk
class QuadTests(np.testing.TestCase):
"""
test file for checking implementation of gaussian-kronrod quadrature.
we will take a function which can be integrated analytically and check if quadgk result is similar or not!
through this file we can test how numerically accurate quadrature implementation in native numpy or manual code is.
"""
def setUp(self):
pass
def test_infinite_quad(self):
def f(x):
return np.exp(-0.5*x**2)*np.power(x,np.arange(3)[:,None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi * 2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
def test_finite_quad(self):
def f2(x):
return x**2
quad_int_val = quadvgk(f2, 1.,2.)
real_val = 7/3.
np.testing.assert_almost_equal(real_val, quad_int_val, decimal=5)
if __name__ == '__main__':
def f(x):
return np.exp(-0.5 * x ** 2) * np.power(x, np.arange(3)[:, None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi*2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
print(quadgk_int(f))

View file

@ -17,3 +17,4 @@ from . import multioutput
from . import parallel
from . import functions
from . import cluster_with_offset
from . import quad_integrate

119
GPy/util/quad_integrate.py Normal file
View file

@ -0,0 +1,119 @@
"""
The file for utilities related to integration by quadrature methods
- will contain implementation for gaussian-kronrod integration.
"""
import numpy as np
def getSubs(Subs, XK, NK=1):
M = (Subs[1, :] - Subs[0, :]) / 2
C = (Subs[1, :] + Subs[0, :]) / 2
I = XK[:, None] * M + np.ones((NK, 1)) * C
# A = [Subs(1,:); I]
A = np.vstack((Subs[0, :], I))
# B = [I;Subs(2,:)]
B = np.vstack((I, Subs[1, :]))
# Subs = [reshape(A, 1, []);
A = A.flatten()
# reshape(B, 1, [])];
B = B.flatten()
Subs = np.vstack((A,B))
# Subs = np.concatenate((A, B), axis=0)
return Subs
def quadvgk(feval, fmin, fmax, tol1=1e-5, tol2=1e-5):
"""
numpy implementation makes use of the code here: http://se.mathworks.com/matlabcentral/fileexchange/18801-quadvgk
We here use gaussian kronrod integration already used in gpstuff for evaluating one dimensional integrals.
This is vectorised quadrature which means that several functions can be evaluated at the same time over a grid of
points.
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
XK = np.array([-0.991455371120813, -0.949107912342759, -0.864864423359769, -0.741531185599394,
-0.586087235467691, -0.405845151377397, -0.207784955007898, 0.,
0.207784955007898, 0.405845151377397, 0.586087235467691,
0.741531185599394, 0.864864423359769, 0.949107912342759, 0.991455371120813])
WK = np.array([0.022935322010529, 0.063092092629979, 0.104790010322250, 0.140653259715525,
0.169004726639267, 0.190350578064785, 0.204432940075298, 0.209482141084728,
0.204432940075298, 0.190350578064785, 0.169004726639267,
0.140653259715525, 0.104790010322250, 0.063092092629979, 0.022935322010529])
# 7-point Gaussian weightings
WG = np.array([0.129484966168870, 0.279705391489277, 0.381830050505119, 0.417959183673469,
0.381830050505119, 0.279705391489277, 0.129484966168870])
NK = WK.size
G = np.arange(2,NK,2)
tol1 = 1e-4
tol2 = 1e-4
Subs = np.array([[fmin],[fmax]])
# number of functions to evaluate in the feval vector of functions.
NF = feval(np.zeros(1)).size
Q = np.zeros(NF)
neval = 0
while Subs.size > 0:
Subs = getSubs(Subs,XK)
M = (Subs[1,:] - Subs[0,:]) / 2
C = (Subs[1,:] + Subs[0,:]) / 2
# NM = length(M);
NM = M.size
# x = reshape(XK * M + ones(NK, 1) * C, 1, []);
x = XK[:,None]*M + C
x = x.flatten()
FV = feval(x)
# FV = FV[:,None]
Q1 = np.zeros((NF, NM))
Q2 = np.zeros((NF, NM))
# for n=1:NF
# F = reshape(FV(n,:), NK, []);
# Q1(n,:) = M. * sum((WK * ones(1, NM)). * F);
# Q2(n,:) = M. * sum((WG * ones(1, NM)). * F(G,:));
# end
# for i in range(NF):
# F = FV
# F = F.reshape((NK,-1))
# temp_mat = np.sum(np.multiply(WK[:,None]*np.ones((1,NM)), F),axis=0)
# Q1[i,:] = np.multiply(M, temp_mat)
# temp_mat = np.sum(np.multiply(WG[:,None]*np.ones((1, NM)), F[G-1,:]), axis=0)
# Q2[i,:] = np.multiply(M, temp_mat)
# ind = np.where(np.logical_or(np.max(np.abs(Q1 -Q2) / Q1) < tol1, (Subs[1,:] - Subs[0,:]) <= tol2) > 0)[0]
# Q = Q + np.sum(Q1[:,ind], axis=1)
# np.delete(Subs, ind,axis=1)
Q1 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1),WK)*M
Q2 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1)[:,:,1::2],WG)*M
#ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)/Q1), 0) < difftol , M < xtol))[0]
ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)), 0) < tol1 , (Subs[1,:] - Subs[0,:]) < tol2))[0]
Q = Q + np.sum(Q1[:,ind], axis=1)
Subs = np.delete(Subs, ind, axis=1)
return Q
def quadgk_int(f, fmin=-np.inf, fmax=np.inf, difftol=0.1):
"""
Integrate f from fmin to fmax,
do integration by substitution
x = r / (1-r**2)
when r goes from -1 to 1 , x goes from -inf to inf.
the interval for quadgk function is from -1 to +1, so we transform the space from (-inf,inf) to (-1,1)
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
difftol = 1e-4
def trans_func(r):
r2 = np.square(r)
x = r / (1-r2)
dx_dr = (1 + r2)/(1-r2)**2
return f(x)*dx_dr
integrand = quadvgk(trans_func, -1., 1., difftol, difftol)
return integrand