Update of symbolic likelihoods.

This commit is contained in:
Neil Lawrence 2014-03-31 09:18:31 +01:00
parent 60a071f18f
commit 6f86b9b0fa
6 changed files with 319 additions and 134 deletions

View file

@ -3,7 +3,7 @@ from _src.rbf import RBF
from _src.linear import Linear, LinearFull from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.sympykern import Sympykern from _src.symbolic import Symbolic
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52

View file

@ -11,7 +11,7 @@ from kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Sympykern(Kern): class Symbolic(Kern):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work in done by sympy.
@ -26,10 +26,8 @@ class Sympykern(Kern):
- to handle multiple inputs, call them x_1, z_1, etc - to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
""" """
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None): def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
if name is None:
name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(Sympykern, self).__init__(input_dim, active_dims, name) super(Sympykern, self).__init__(input_dim, active_dims, name)
@ -60,7 +58,6 @@ class Sympykern(Kern):
# extract parameter names from the covariance # extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs. # Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1: if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
@ -117,6 +114,12 @@ class Sympykern(Kern):
self.arg_list += self._sp_theta_i + self._sp_theta_j self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i self.diag_arg_list += self._sp_theta_i
# Check if there are additional linear operators on the covariance.
self._sp_operators = operators
# TODO: Deal with linear operators
#if self._sp_operators:
# for operator in self._sp_operators:
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()
@ -254,3 +257,176 @@ class Sympykern(Kern):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
if False:
class Symcombine(CombinationKernel):
"""
Combine list of given sympy covariances together with the provided operations.
"""
def __init__(self, subkerns, operations, name='sympy_combine'):
super(Symcombine, self).__init__(subkerns, name)
for subkern, operation in zip(subkerns, operations):
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
#def _double_operate(self, k, operation):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Combine covariances with a linear operator.
"""
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape)
for p1 in self._parameters_:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self._parameters_:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu += a
target_S += b
return target_mu, target_S
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Add, self)._getstate()
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_.copy()
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

View file

@ -6,3 +6,4 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
from symbolic import Symbolic

View file

@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note:: .. Note::
Y is expected to take values in {-1, 1} TODO: {0, 1}?? Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside link function should have the domain [0, 1], e.g. probit (default) or Heaviside
.. See also:: .. See also::
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
""" """
if Y_i == 1: if Y_i == 1:
sign = 1. sign = 1.
elif Y_i == 0: elif Y_i == 0 or Y_i == -1:
sign = -1 sign = -1
else: else:
raise ValueError("bad value for Bernouilli observation (0, 1)") raise ValueError("bad value for Bernoulli observation (0, 1)")
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i) z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
else: else:
return np.nan return np.nan
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given inverse link of f.
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
.. Note: .. Note:
Each y_i must be in {0, 1} Each y_i must be in {0, 1}
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = (link_f**y) * ((1.-link_f)**(1.-y)) #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
objective = np.where(y, link_f, 1.-link_f) objective = np.where(y, inv_link_f, 1.-inv_link_f)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood function given link(f) Log Likelihood function given inverse link of f.
.. math:: .. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points link(f) :returns: log likelihood evaluated at points inverse link of f.
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f) #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
.. math:: .. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#grad = (y/link_f) - (1.-y)/(1-link_f) #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
grad = np.where(y, 1./link_f, -1./(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
.. math:: .. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
.. Note:: .. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) # TODO check y \in {0, 1} or {-1, 1}
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
np.seterr(**state) np.seterr(**state)
return d2logpdf_dlink2 return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables passed through inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points inverse_link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) # TODO check y \in {0, 1} or {-1, 1}
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
np.seterr(**state) np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3

View file

@ -16,20 +16,20 @@ class Likelihood(Parameterized):
Likelihood base class, used to defing p(y|f). Likelihood base class, used to defing p(y|f).
All instances use _inverse_ link functions, which can be swapped out. It is All instances use _inverse_ link functions, which can be swapped out. It is
expected that inherriting classes define a default inverse link function expected that inheriting classes define a default inverse link function
To use this class, inherrit and define missing functionality. To use this class, inherit and define missing functionality.
Inherriting classes *must* implement: Inheriting classes *must* implement:
pdf_link : a bound method which turns the output of the link function into the pdf pdf_link : a bound method which turns the output of the link function into the pdf
logpdf_link : the logarithm of the above logpdf_link : the logarithm of the above
To enable use with EP, inherriting classes *must* define: To enable use with EP, inheriting classes *must* define:
TODO: a suitable derivative function for any parameters of the class TODO: a suitable derivative function for any parameters of the class
It is also desirable to define: It is also desirable to define:
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
To enable use with Laplace approximation, inherriting classes *must* define: To enable use with Laplace approximation, inheriting classes *must* define:
Some derivative functions *AS TODO* Some derivative functions *AS TODO*
For exact Gaussian inference, define *JH TODO* For exact Gaussian inference, define *JH TODO*
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used: The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def pdf(self, f, y, Y_metadata=None): def pdf(self, f, y, Y_metadata=None):
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, Y_metadata=Y_metadata) return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, Y_metadata=None): def logpdf(self, f, y, Y_metadata=None):
""" """
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None): def dlogpdf_df(self, f, y, Y_metadata=None):
""" """
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df) return chain_1(dlogpdf_dlink, dlink_df)
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f) d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([1, 0]) return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
@ -411,7 +411,10 @@ class Likelihood(Parameterized):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]

View file

@ -26,8 +26,8 @@ class StudentT(Likelihood):
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T') super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_noise', float(sigma2), Logexp()) self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2) self.add_parameter(self.sigma2)
self.add_parameter(self.v) self.add_parameter(self.v)
@ -46,23 +46,23 @@ class StudentT(Likelihood):
self.sigma2.gradient = grads[0] self.sigma2.gradient = grads[0]
self.v.gradient = grads[1] self.v.gradient = grads[1]
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#Careful gamma(big_number) is infinity! #Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2))) / (np.sqrt(self.v * np.pi * self.sigma2)))
@ -70,15 +70,15 @@ class StudentT(Likelihood):
) )
return np.prod(objective) return np.prod(objective)
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
.. math:: .. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f)) :param inv_link_f: latent variables (link(f))
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -86,11 +86,11 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#FIXME: #FIXME:
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -99,15 +99,15 @@ class StudentT(Likelihood):
) )
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f) :param inv_link_f: latent variables (f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -115,12 +115,12 @@ class StudentT(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -129,8 +129,8 @@ class StudentT(Likelihood):
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inv_link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -141,90 +141,90 @@ class StudentT(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
) )
return d3lik_dlink3 return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f :param inv_link_f: latent variables inv_link_f
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
) )
@ -246,11 +246,12 @@ class StudentT(Likelihood):
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
return self.gp_link.transf(mu) # only true in link is monotoci, which it is. # The comment here confuses mean and median.
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
if self.deg_free <2.: if self.deg_free<=2.:
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
else: else:
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)