Merge branch 'params' of github.com:SheffieldML/GPy into params

This commit is contained in:
Max Zwiessele 2014-03-31 12:45:17 +01:00
commit b8566e551e
8 changed files with 562 additions and 134 deletions

View file

@ -3,7 +3,7 @@ from _src.rbf import RBF
from _src.linear import Linear, LinearFull from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.sympykern import Sympykern from _src.symbolic import Symbolic
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52

View file

@ -11,7 +11,7 @@ from kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Sympykern(Kern): class Symbolic(Kern):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work in done by sympy.
@ -26,10 +26,8 @@ class Sympykern(Kern):
- to handle multiple inputs, call them x_1, z_1, etc - to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
""" """
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None): def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
if name is None:
name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(Sympykern, self).__init__(input_dim, active_dims, name) super(Sympykern, self).__init__(input_dim, active_dims, name)
@ -60,7 +58,6 @@ class Sympykern(Kern):
# extract parameter names from the covariance # extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs. # Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1: if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
@ -117,6 +114,12 @@ class Sympykern(Kern):
self.arg_list += self._sp_theta_i + self._sp_theta_j self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i self.diag_arg_list += self._sp_theta_i
# Check if there are additional linear operators on the covariance.
self._sp_operators = operators
# TODO: Deal with linear operators
#if self._sp_operators:
# for operator in self._sp_operators:
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()
@ -254,3 +257,176 @@ class Sympykern(Kern):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
if False:
class Symcombine(CombinationKernel):
"""
Combine list of given sympy covariances together with the provided operations.
"""
def __init__(self, subkerns, operations, name='sympy_combine'):
super(Symcombine, self).__init__(subkerns, name)
for subkern, operation in zip(subkerns, operations):
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
#def _double_operate(self, k, operation):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Combine covariances with a linear operator.
"""
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape)
for p1 in self._parameters_:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self._parameters_:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu += a
target_S += b
return target_mu, target_S
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Add, self)._getstate()
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_.copy()
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

Binary file not shown.

View file

@ -6,3 +6,4 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
from symbolic import Symbolic

View file

@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note:: .. Note::
Y is expected to take values in {-1, 1} TODO: {0, 1}?? Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside link function should have the domain [0, 1], e.g. probit (default) or Heaviside
.. See also:: .. See also::
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
""" """
if Y_i == 1: if Y_i == 1:
sign = 1. sign = 1.
elif Y_i == 0: elif Y_i == 0 or Y_i == -1:
sign = -1 sign = -1
else: else:
raise ValueError("bad value for Bernouilli observation (0, 1)") raise ValueError("bad value for Bernoulli observation (0, 1)")
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i) z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
else: else:
return np.nan return np.nan
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given inverse link of f.
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
.. Note: .. Note:
Each y_i must be in {0, 1} Each y_i must be in {0, 1}
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = (link_f**y) * ((1.-link_f)**(1.-y)) #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
objective = np.where(y, link_f, 1.-link_f) objective = np.where(y, inv_link_f, 1.-inv_link_f)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood function given link(f) Log Likelihood function given inverse link of f.
.. math:: .. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points link(f) :returns: log likelihood evaluated at points inverse link of f.
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f) #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
.. math:: .. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#grad = (y/link_f) - (1.-y)/(1-link_f) #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
grad = np.where(y, 1./link_f, -1./(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
.. math:: .. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
.. Note:: .. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) # TODO check y \in {0, 1} or {-1, 1}
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
np.seterr(**state) np.seterr(**state)
return d2logpdf_dlink2 return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables passed through inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points inverse_link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) # TODO check y \in {0, 1} or {-1, 1}
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
np.seterr(**state) np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3

View file

@ -16,20 +16,20 @@ class Likelihood(Parameterized):
Likelihood base class, used to defing p(y|f). Likelihood base class, used to defing p(y|f).
All instances use _inverse_ link functions, which can be swapped out. It is All instances use _inverse_ link functions, which can be swapped out. It is
expected that inherriting classes define a default inverse link function expected that inheriting classes define a default inverse link function
To use this class, inherrit and define missing functionality. To use this class, inherit and define missing functionality.
Inherriting classes *must* implement: Inheriting classes *must* implement:
pdf_link : a bound method which turns the output of the link function into the pdf pdf_link : a bound method which turns the output of the link function into the pdf
logpdf_link : the logarithm of the above logpdf_link : the logarithm of the above
To enable use with EP, inherriting classes *must* define: To enable use with EP, inheriting classes *must* define:
TODO: a suitable derivative function for any parameters of the class TODO: a suitable derivative function for any parameters of the class
It is also desirable to define: It is also desirable to define:
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
To enable use with Laplace approximation, inherriting classes *must* define: To enable use with Laplace approximation, inheriting classes *must* define:
Some derivative functions *AS TODO* Some derivative functions *AS TODO*
For exact Gaussian inference, define *JH TODO* For exact Gaussian inference, define *JH TODO*
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used: The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def pdf(self, f, y, Y_metadata=None): def pdf(self, f, y, Y_metadata=None):
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, Y_metadata=Y_metadata) return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, Y_metadata=None): def logpdf(self, f, y, Y_metadata=None):
""" """
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None): def dlogpdf_df(self, f, y, Y_metadata=None):
""" """
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df) return chain_1(dlogpdf_dlink, dlink_df)
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f) d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([1, 0]) return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
@ -411,7 +411,10 @@ class Likelihood(Parameterized):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]

View file

@ -26,8 +26,8 @@ class StudentT(Likelihood):
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T') super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_noise', float(sigma2), Logexp()) self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2) self.add_parameter(self.sigma2)
self.add_parameter(self.v) self.add_parameter(self.v)
@ -46,23 +46,23 @@ class StudentT(Likelihood):
self.sigma2.gradient = grads[0] self.sigma2.gradient = grads[0]
self.v.gradient = grads[1] self.v.gradient = grads[1]
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#Careful gamma(big_number) is infinity! #Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2))) / (np.sqrt(self.v * np.pi * self.sigma2)))
@ -70,15 +70,15 @@ class StudentT(Likelihood):
) )
return np.prod(objective) return np.prod(objective)
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
.. math:: .. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f)) :param inv_link_f: latent variables (link(f))
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -86,11 +86,11 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#FIXME: #FIXME:
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -99,15 +99,15 @@ class StudentT(Likelihood):
) )
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f) :param inv_link_f: latent variables (f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -115,12 +115,12 @@ class StudentT(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -129,8 +129,8 @@ class StudentT(Likelihood):
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inv_link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -141,90 +141,90 @@ class StudentT(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
) )
return d3lik_dlink3 return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f :param inv_link_f: latent variables inv_link_f
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
) )
@ -246,11 +246,12 @@ class StudentT(Likelihood):
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
return self.gp_link.transf(mu) # only true in link is monotoci, which it is. # The comment here confuses mean and median.
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
if self.deg_free <2.: if self.deg_free<=2.:
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
else: else:
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)

243
GPy/likelihoods/symbolic.py Normal file
View file

@ -0,0 +1,243 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import sympy as sp
from sympy.utilities.lambdify import lambdify
import link_functions
from scipy import stats, integrate
from scipy.special import gammaln, gamma, erf
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
class Symbolic(Likelihood):
"""
Symbolic likelihood.
Likelihood where the form of the likelihood is provided by a sympy expression.
"""
def __init__(self, likelihood=None, log_likelihood=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None):
if gp_link is None:
gp_link = link_functions.Identity()
if likelihood is None and log_likelihood is None and cdf is None:
raise ValueError, "You must provide an argument for the likelihood or the log likelihood."
super(Symbolic, self).__init__(gp_link, name=name)
if likelihood is None and log_likelihood:
self._sp_likelihood = sp.exp(log_likelihood).simplify()
self._sp_log_likelihood = log_likelihood
if log_likelihood is None and likelihood:
self._sp_likelihood = likelihood
self._sp_log_likelihood = sp.log(likelihood).simplify()
# TODO: build likelihood and log likelihood from CDF or
# compute CDF given likelihood/log-likelihood. Also check log
# likelihood, likelihood and CDF are consistent.
# pull the variable names out of the symbolic likelihood
sp_vars = [e for e in self._sp_likelihood.atoms() if e.is_Symbol]
self._sp_f = [e for e in sp_vars if e.name=='f']
if not self._sp_f:
raise ValueError('No variable f in likelihood or log likelihood.')
self._sp_y = [e for e in sp_vars if e.name=='y']
if not self._sp_f:
raise ValueError('No variable y in likelihood or log likelihood.')
self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name)
# These are all the arguments need to compute likelihoods.
self.arg_list = self._sp_y + self._sp_f + self._sp_theta
# these are arguments for computing derivatives.
derivative_arguments = self._sp_f + self._sp_theta
# Do symbolic work to compute derivatives.
self._log_likelihood_derivatives = {theta.name : sp.diff(self._sp_log_likelihood,theta).simplify() for theta in derivative_arguments}
self._log_likelihood_second_derivatives = {theta.name : sp.diff(self._log_likelihood_derivatives['f'],theta).simplify() for theta in derivative_arguments}
self._log_likelihood_third_derivatives = {theta.name : sp.diff(self._log_likelihood_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
# Add parameters to the model.
for theta in self._sp_theta:
val = 1.0
# TODO: need to decide how to handle user passing values for the se parameter vectors.
if param is not None:
if param.has_key(theta):
val = param[theta]
setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name))
# Is there some way to check whether the likelihood is log
# concave? For the moment, need user to specify.
self.log_concave = log_concave
# initialise code arguments
self._arguments = {}
# generate the code for the likelihood and derivatives
self._gen_code()
def _gen_code(self):
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
# TODO: Check here whether theano is available and set up
# functions accordingly.
self._likelihood_function = lambdify(self.arg_list, self._sp_likelihood, 'numpy')
self._log_likelihood_function = lambdify(self.arg_list, self._sp_log_likelihood, 'numpy')
# compute code for derivatives (for implicit likelihood terms
# we need up to 3rd derivatives)
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_derivatives[key], 'numpy') for key in self._log_likelihood_derivatives.keys()})
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_second_derivatives[key], 'numpy') for key in self._log_likelihood_second_derivatives.keys()})
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_likelihood_third_derivatives[key], 'numpy') for key in self._log_likelihood_third_derivatives.keys()})
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
def parameters_changed(self):
pass
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
# The way the Laplace approximation is run requires the
# covariance function to compute the true gradient (because it
# is dependent on the mode). This means we actually compute
# the gradient outside this object. This function would
# normally ask the object to update its gradients internally,
# but here it provides them externally, because they are
# computed in the inference code. TODO: Thought: How does this
# effect EP? Shouldn't this be done by a separate
# Laplace-approximation specific call?
for grad, theta in zip(grads, self._sp_theta):
parameter = getattr(self, theta.name)
setattr(parameter, 'gradient', grad)
def _arguments_update(self, f, y):
"""Set up argument lists for the derivatives."""
# If we do make use of Theano, then at this point we would
# need to do a lot of precomputation to ensure that the
# likelihoods and gradients are computed together, then check
# for parameter changes before updating.
for i, fvar in enumerate(self._sp_f):
self._arguments[fvar.name] = f
for i, yvar in enumerate(self._sp_y):
self._arguments[yvar.name] = y
for theta in self._sp_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
def pdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Likelihood function given inverse link of f.
:param inv_link_f: inverse link of latent variables.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
l = self._likelihood_function(**self._arguments)
return np.prod(l)
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Log Likelihood Function given inverse link of latent variables.
:param inv_inv_link_f: latent variables (inverse link of f)
:type inv_inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
ll = self._log_likelihood_function(**self._arguments)
return np.sum(ll)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of log likelihood with respect to the inverse link function.
:param inv_inv_link_f: latent variables (inverse link of f)
:type inv_inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: gradient of likelihood with respect to each point.
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return self._first_derivative_code['f'](**self._arguments)
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
:param inv_link_f: inverse link of the latent variables.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Returns diagonal of Hessian, since every where else it is
0, as the likelihood factorizes over cases (the
distribution for y_i depends only on link(f_i) not on
link(f_(j!=i))
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return self._second_derivative_code['f'](**self._arguments)
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return self._third_derivative_code['f'](**self._arguments)
raise NotImplementedError
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return np.asarray([self._first_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta])
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return np.asarray([self._second_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta])
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
return np.asarray([self._third_derivative_code[theta.name](**self._arguments).sum() for theta in self._sp_theta])
def predictive_mean(self, mu, sigma, Y_metadata=None):
raise NotImplementedError
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
raise NotImplementedError
def conditional_mean(self, gp):
raise NotImplementedError
def conditional_variance(self, gp):
raise NotImplementedError
def samples(self, gp, Y_metadata=None):
raise NotImplementedError