mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Added binomial likelihood
Also some changes to pass through Y_metadata, where it had previously been (errorneously) omitted.
This commit is contained in:
parent
89b8b0d298
commit
48821a6b73
5 changed files with 133 additions and 14 deletions
|
|
@ -43,7 +43,7 @@ class SVGP(LatentFunctionInference):
|
||||||
|
|
||||||
|
|
||||||
#quadrature for the likelihood
|
#quadrature for the likelihood
|
||||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)
|
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
#rescale the F term if working on a batch
|
#rescale the F term if working on a batch
|
||||||
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
|
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
|
||||||
|
|
|
||||||
|
|
@ -6,3 +6,4 @@ from poisson import Poisson
|
||||||
from student_t import StudentT
|
from student_t import StudentT
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
from mixed_noise import MixedNoise
|
from mixed_noise import MixedNoise
|
||||||
|
from binomial import Binomial
|
||||||
|
|
|
||||||
125
GPy/likelihoods/binomial.py
Normal file
125
GPy/likelihoods/binomial.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||||
|
import link_functions
|
||||||
|
from likelihood import Likelihood
|
||||||
|
from scipy import special
|
||||||
|
|
||||||
|
class Binomial(Likelihood):
|
||||||
|
"""
|
||||||
|
Binomial likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Y takes values in either {-1, 1} or {0, 1}.
|
||||||
|
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||||
|
|
||||||
|
.. See also::
|
||||||
|
likelihood.py, for the parent class
|
||||||
|
"""
|
||||||
|
def __init__(self, gp_link=None):
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = link_functions.Probit()
|
||||||
|
|
||||||
|
super(Binomial, self).__init__(gp_link, 'Binomial')
|
||||||
|
|
||||||
|
def conditional_mean(self, gp, Y_metadata):
|
||||||
|
return self.gp_link(gp)*Y_metadata['trials']
|
||||||
|
|
||||||
|
def pdf_link(self, inv_link_f, y, Y_metadata):
|
||||||
|
"""
|
||||||
|
Likelihood function given inverse link of f.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata must contain 'trials'
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
.. Note:
|
||||||
|
Each y_i must be in {0, 1}
|
||||||
|
"""
|
||||||
|
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata))
|
||||||
|
|
||||||
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood function given inverse link of f.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||||
|
|
||||||
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata must contain 'trials'
|
||||||
|
:returns: log likelihood evaluated at points inverse link of f.
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
N = Y_metadata['trials']
|
||||||
|
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
|
||||||
|
|
||||||
|
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||||
|
|
||||||
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata must contain 'trials'
|
||||||
|
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
N = Y_metadata['trials']
|
||||||
|
return y/inv_link_f - (N-y)/(1-inv_link_f)
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||||
|
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||||
|
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||||
|
|
||||||
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata not used in binomial
|
||||||
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||||
|
"""
|
||||||
|
N = Y_metadata['trials']
|
||||||
|
return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
|
||||||
|
|
||||||
|
def samples(self, gp, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
N = Y_metadata['trials']
|
||||||
|
Ysim = np.random.binomial(N, self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
||||||
|
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||||
|
pass
|
||||||
|
|
@ -131,7 +131,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
return z, mean, variance
|
return z, mean, variance
|
||||||
|
|
||||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Use Gauss-Hermite Quadrature to compute
|
Use Gauss-Hermite Quadrature to compute
|
||||||
|
|
||||||
|
|
@ -158,9 +158,9 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
||||||
# broadcast needs to be handled carefully.
|
# broadcast needs to be handled carefully.
|
||||||
logp = self.logpdf(X,Y[:,None])
|
logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata)
|
||||||
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
|
dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata)
|
||||||
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
|
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata)
|
||||||
|
|
||||||
#clipping for numerical stability
|
#clipping for numerical stability
|
||||||
#logp = np.clip(logp,-1e9,1e9)
|
#logp = np.clip(logp,-1e9,1e9)
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,7 @@ class Poisson(Likelihood):
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
return -link_f + y*np.log(link_f) - special.gammaln(y+1)
|
||||||
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -83,7 +82,6 @@ class Poisson(Likelihood):
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
|
||||||
return y/link_f - 1
|
return y/link_f - 1
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||||
|
|
@ -107,12 +105,7 @@ class Poisson(Likelihood):
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
return -y/(link_f**2)
|
||||||
hess = -y/(link_f**2)
|
|
||||||
return hess
|
|
||||||
#d2_df = self.gp_link.d2transf_df2(gp)
|
|
||||||
#transf = self.gp_link.transf(gp)
|
|
||||||
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue