mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-30 14:35:15 +02:00
Update of symbolic likelihoods.
This commit is contained in:
parent
60a071f18f
commit
6f86b9b0fa
6 changed files with 319 additions and 134 deletions
|
|
@ -6,3 +6,4 @@ from poisson import Poisson
|
|||
from student_t import StudentT
|
||||
from likelihood import Likelihood
|
||||
from mixed_noise import MixedNoise
|
||||
from symbolic import Symbolic
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
|
|||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
.. Note::
|
||||
Y is expected to take values in {-1, 1} TODO: {0, 1}??
|
||||
Y takes values in either {-1, 1} or {0, 1}.
|
||||
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||
|
||||
.. See also::
|
||||
|
|
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
|
|||
"""
|
||||
if Y_i == 1:
|
||||
sign = 1.
|
||||
elif Y_i == 0:
|
||||
elif Y_i == 0 or Y_i == -1:
|
||||
sign = -1
|
||||
else:
|
||||
raise ValueError("bad value for Bernouilli observation (0, 1)")
|
||||
raise ValueError("bad value for Bernoulli observation (0, 1)")
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||
Z_hat = std_norm_cdf(z)
|
||||
|
|
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
|
|||
else:
|
||||
return np.nan
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
|
|
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
|
|||
.. Note:
|
||||
Each y_i must be in {0, 1}
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
||||
objective = np.where(y, link_f, 1.-link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
||||
objective = np.where(y, inv_link_f, 1.-inv_link_f)
|
||||
return np.exp(np.sum(np.log(objective)))
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood function given link(f)
|
||||
Log Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: log likelihood evaluated at points link(f)
|
||||
:returns: log likelihood evaluated at points inverse link of f.
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
||||
state = np.seterr(divide='ignore')
|
||||
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return np.sum(objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#grad = (y/link_f) - (1.-y)/(1-link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
||||
state = np.seterr(divide='ignore')
|
||||
grad = np.where(y, 1./link_f, -1./(1-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
||||
state = np.seterr(divide='ignore')
|
||||
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return d2logpdf_dlink2
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables passed through inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
|
||||
state = np.seterr(divide='ignore')
|
||||
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
|
||||
np.seterr(**state)
|
||||
return d3logpdf_dlink3
|
||||
|
||||
|
|
|
|||
|
|
@ -16,20 +16,20 @@ class Likelihood(Parameterized):
|
|||
Likelihood base class, used to defing p(y|f).
|
||||
|
||||
All instances use _inverse_ link functions, which can be swapped out. It is
|
||||
expected that inherriting classes define a default inverse link function
|
||||
expected that inheriting classes define a default inverse link function
|
||||
|
||||
To use this class, inherrit and define missing functionality.
|
||||
To use this class, inherit and define missing functionality.
|
||||
|
||||
Inherriting classes *must* implement:
|
||||
Inheriting classes *must* implement:
|
||||
pdf_link : a bound method which turns the output of the link function into the pdf
|
||||
logpdf_link : the logarithm of the above
|
||||
|
||||
To enable use with EP, inherriting classes *must* define:
|
||||
To enable use with EP, inheriting classes *must* define:
|
||||
TODO: a suitable derivative function for any parameters of the class
|
||||
It is also desirable to define:
|
||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||
|
||||
To enable use with Laplace approximation, inherriting classes *must* define:
|
||||
To enable use with Laplace approximation, inheriting classes *must* define:
|
||||
Some derivative functions *AS TODO*
|
||||
|
||||
For exact Gaussian inference, define *JH TODO*
|
||||
|
|
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
"""
|
||||
Numerical approximation to the predictive variance: V(Y_star)
|
||||
Approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
|
|
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
|
|||
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
||||
return exp_var + var_exp
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def pdf(self, f, y, Y_metadata=None):
|
||||
|
|
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
|
|||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def logpdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
|
|||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
|
|
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
|
|||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
|
||||
|
|
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
|
|||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
|
||||
|
|
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([1, 0])
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||
|
|
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||
|
|
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
|
|
@ -411,7 +411,10 @@ class Likelihood(Parameterized):
|
|||
#compute the quantiles by sampling!!!
|
||||
N_samp = 1000
|
||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||
#ss_f = s.flatten()
|
||||
#ss_y = self.samples(ss_f, Y_metadata)
|
||||
ss_y = self.samples(s, Y_metadata)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||
|
||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ class StudentT(Likelihood):
|
|||
gp_link = link_functions.Identity()
|
||||
|
||||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||
|
||||
self.sigma2 = Param('t_noise', float(sigma2), Logexp())
|
||||
# sigma2 is not a noise parameter, it is a squared scale.
|
||||
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
|
||||
self.v = Param('deg_free', float(deg_free))
|
||||
self.add_parameter(self.sigma2)
|
||||
self.add_parameter(self.v)
|
||||
|
|
@ -46,23 +46,23 @@ class StudentT(Likelihood):
|
|||
self.sigma2.gradient = grads[0]
|
||||
self.v.gradient = grads[1]
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#Careful gamma(big_number) is infinity!
|
||||
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||
|
|
@ -70,15 +70,15 @@ class StudentT(Likelihood):
|
|||
)
|
||||
return np.prod(objective)
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables (link(f))
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -86,11 +86,11 @@ class StudentT(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
|
||||
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
|
||||
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||
- gammaln(self.v * 0.5)
|
||||
|
|
@ -99,15 +99,15 @@ class StudentT(Likelihood):
|
|||
)
|
||||
return np.sum(objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables (f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -115,12 +115,12 @@ class StudentT(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
|
@ -129,8 +129,8 @@ class StudentT(Likelihood):
|
|||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inv_link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -141,90 +141,90 @@ class StudentT(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||
((e**2 + self.sigma2*self.v)**3)
|
||||
)
|
||||
return d3lik_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
return np.sum(dlogpdf_dvar)
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
|
||||
:param link_f: latent variables link_f
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||
/ ((self.sigma2*self.v + (e**2))**3)
|
||||
)
|
||||
|
|
@ -246,11 +246,12 @@ class StudentT(Likelihood):
|
|||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
|
||||
# The comment here confuses mean and median.
|
||||
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
if self.deg_free <2.:
|
||||
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
|
||||
if self.deg_free<=2.:
|
||||
return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
|
||||
else:
|
||||
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue