mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 19:42:39 +02:00
some more messing with the likelihood directory
This commit is contained in:
parent
daaad3c30c
commit
83a495645d
10 changed files with 1862 additions and 0 deletions
BIN
GPy/likelihoods/.DS_Store
vendored
Normal file
BIN
GPy/likelihoods/.DS_Store
vendored
Normal file
Binary file not shown.
6
GPy/likelihoods/__init__.py
Normal file
6
GPy/likelihoods/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
from bernoulli import Bernoulli
|
||||||
|
from exponential import Exponential
|
||||||
|
from gaussian import Gaussian
|
||||||
|
from gamma import Gamma
|
||||||
|
from poisson import Poisson
|
||||||
|
from student_t import Student_t
|
||||||
221
GPy/likelihoods/bernoulli.py
Normal file
221
GPy/likelihoods/bernoulli.py
Normal file
|
|
@ -0,0 +1,221 @@
|
||||||
|
# Copyright (c) 2012, 2013 The GPy authors
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class Bernoulli(Likelihood):
|
||||||
|
"""
|
||||||
|
Bernoulli likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Y is expected to take values in {-1,1}
|
||||||
|
Probit likelihood usually used
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
if isinstance(gp_link , (gp_transformations.Heaviside, gp_transformations.Probit)):
|
||||||
|
self.log_concave = True
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
|
||||||
|
..Note:: Binary classification algorithm works better with classes {-1,1}
|
||||||
|
"""
|
||||||
|
Y_prep = Y.copy()
|
||||||
|
Y1 = Y[Y.flatten()==1].size
|
||||||
|
Y2 = Y[Y.flatten()==0].size
|
||||||
|
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
|
||||||
|
Y_prep[Y.flatten() == 0] = -1
|
||||||
|
return Y_prep
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
if data_i == 1:
|
||||||
|
sign = 1.
|
||||||
|
elif data_i == 0:
|
||||||
|
sign = -1
|
||||||
|
else:
|
||||||
|
raise ValueError("bad value for Bernouilli observation (0,1)")
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Probit):
|
||||||
|
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
|
Z_hat = std_norm_cdf(z)
|
||||||
|
phi = std_norm_pdf(z)
|
||||||
|
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
||||||
|
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
||||||
|
|
||||||
|
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
a = sign*v_i/np.sqrt(tau_i)
|
||||||
|
Z_hat = std_norm_cdf(a)
|
||||||
|
N = std_norm_pdf(a)
|
||||||
|
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
|
||||||
|
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
|
||||||
|
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
|
||||||
|
stop
|
||||||
|
else:
|
||||||
|
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
|
||||||
|
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,variance):
|
||||||
|
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Probit):
|
||||||
|
return stats.norm.cdf(mu/np.sqrt(1+variance))
|
||||||
|
|
||||||
|
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
return stats.norm.cdf(mu/np.sqrt(variance))
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,variance, pred_mean):
|
||||||
|
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
return 0.
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
.. Note:
|
||||||
|
Each y_i must be in {0,1}
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
||||||
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: log likelihood evaluated at points link(f)
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
||||||
|
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = (y/link_f) - (1.-y)/(1-link_f)
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
||||||
|
return d2logpdf_dlink2
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
||||||
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return p*(1.-p)
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
ns = np.ones_like(gp, dtype=int)
|
||||||
|
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
156
GPy/likelihoods/exponential.py
Normal file
156
GPy/likelihoods/exponential.py
Normal file
|
|
@ -0,0 +1,156 @@
|
||||||
|
# Copyright (c) 2012, 2013 GPy Authors
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class Exponential(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Expoential likelihood
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
log_objective = link_f*np.exp(-y*link_f)
|
||||||
|
return np.exp(np.sum(np.log(log_objective)))
|
||||||
|
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
log_objective = np.log(link_f) - y*link_f
|
||||||
|
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
|
||||||
|
return np.sum(log_objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = 1./link_f - y
|
||||||
|
#grad = y/(link_f**2) - 1./link_f
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -1./(link_f**2)
|
||||||
|
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = 2./(link_f**3)
|
||||||
|
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)**2
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.random.exponential(1.0/self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
155
GPy/likelihoods/gamma.py
Normal file
155
GPy/likelihoods/gamma.py
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class Gamma(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Gamma likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
||||||
|
self.beta = beta
|
||||||
|
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
|
||||||
|
alpha = link_f*self.beta
|
||||||
|
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
|
||||||
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
#alpha = self.gp_link.transf(gp)*self.beta
|
||||||
|
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||||
|
alpha = link_f*self.beta
|
||||||
|
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||||
|
return np.sum(log_objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
|
||||||
|
#old
|
||||||
|
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
|
||||||
|
#old
|
||||||
|
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)/self.beta
|
||||||
300
GPy/likelihoods/gaussian.py
Normal file
300
GPy/likelihoods/gaussian.py
Normal file
|
|
@ -0,0 +1,300 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class Gaussian(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Gaussian likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param variance: variance value of the Gaussian distribution
|
||||||
|
:param N: Number of data points
|
||||||
|
:type N: int
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
|
||||||
|
self.variance = variance
|
||||||
|
self.N = N
|
||||||
|
self._set_params(np.asarray(variance))
|
||||||
|
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
if isinstance(gp_link , gp_transformations.Identity):
|
||||||
|
self.log_concave = True
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.array([self.variance])
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ['noise_model_variance']
|
||||||
|
|
||||||
|
def _set_params(self, p):
|
||||||
|
self.variance = float(p)
|
||||||
|
self.I = np.eye(self.N)
|
||||||
|
self.covariance_matrix = self.I * self.variance
|
||||||
|
self.Ki = self.I*(1.0 / self.variance)
|
||||||
|
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
|
||||||
|
self.ln_det_K = self.N*np.log(self.variance)
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.zeros(1)
|
||||||
|
#return np.sum(partial)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
"""
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
sigma2_hat = 1./(1./self.variance + tau_i)
|
||||||
|
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
|
||||||
|
sum_var = self.variance + 1./tau_i
|
||||||
|
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
new_sigma2 = self.predictive_variance(mu,sigma)
|
||||||
|
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
|
||||||
|
return 1./(1./self.variance + 1./sigma**2)
|
||||||
|
|
||||||
|
def _mass(self, link_f, y, extra_data=None):
|
||||||
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
def _nlog_mass(self, link_f, y, extra_data=None):
|
||||||
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
|
||||||
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
|
||||||
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
#Assumes no covariance, exp, sum, log for numerical stability
|
||||||
|
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s2_i = (1.0/self.variance)
|
||||||
|
grad = s2_i*y - s2_i*link_f
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link_f, w.r.t link_f.
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
hess = -(1.0/self.variance)*np.ones((self.N, 1))
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
|
||||||
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
|
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||||
|
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||||
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||||
|
return np.asarray([[dlogpdf_dvar]])
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||||
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Expected value of y under the Mass (or density) function p(y|f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
E_{p(y|f)}[y]
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Variance of y under the Mass (or density) function p(y|f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
Var_{p(y|f)}[y]
|
||||||
|
"""
|
||||||
|
return self.variance
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
437
GPy/likelihoods/likelihood.py
Normal file
437
GPy/likelihoods/likelihood.py
Normal file
|
|
@ -0,0 +1,437 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from GPy.util.plot import gpplot
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from GPy.util.misc import chain_1, chain_2, chain_3
|
||||||
|
from scipy.integrate import quad
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
class Likelihood(object):
|
||||||
|
"""
|
||||||
|
Likelihood base class
|
||||||
|
|
||||||
|
To use this class, inherrit and define missing functionality.
|
||||||
|
|
||||||
|
The minimum required funciotnality is... TODO
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
||||||
|
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||||
|
self.gp_link = gp_link
|
||||||
|
self.analytical_mean = analytical_mean
|
||||||
|
self.analytical_variance = analytical_variance
|
||||||
|
if self.analytical_mean:
|
||||||
|
self.moments_match = self._moments_match_analytical
|
||||||
|
self.predictive_mean = self._predictive_mean_analytical
|
||||||
|
else:
|
||||||
|
self.moments_match = self._moments_match_numerical
|
||||||
|
self.predictive_mean = self._predictive_mean_numerical
|
||||||
|
if self.analytical_variance:
|
||||||
|
self.predictive_variance = self._predictive_variance_analytical
|
||||||
|
else:
|
||||||
|
self.predictive_variance = self._predictive_variance_numerical
|
||||||
|
|
||||||
|
self.log_concave = False
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.zeros(0)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.zeros(0)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||||
|
|
||||||
|
:param Y: observed output
|
||||||
|
:type Y: Nx1 numpy.darray
|
||||||
|
|
||||||
|
"""
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,obs,tau,v):
|
||||||
|
"""
|
||||||
|
If available, this function computes the moments analytically.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
assert y_test.shape==mu_star.shape
|
||||||
|
assert y_test.shape==var_star.shape
|
||||||
|
assert y_test.shape[1] == 1
|
||||||
|
def integral_generator(y, m, v):
|
||||||
|
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||||
|
def f(f_star):
|
||||||
|
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||||
|
return f
|
||||||
|
|
||||||
|
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
|
||||||
|
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||||
|
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||||
|
return np.log(p_ystar)
|
||||||
|
|
||||||
|
def _moments_match_numerical(self,obs,tau,v):
|
||||||
|
"""
|
||||||
|
Calculation of moments using quadrature
|
||||||
|
|
||||||
|
:param obs: observed output
|
||||||
|
:param tau: cavity distribution 1st natural parameter (precision)
|
||||||
|
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||||
|
"""
|
||||||
|
#Compute first integral for zeroth moment.
|
||||||
|
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
|
||||||
|
mu = v/tau
|
||||||
|
def int_1(f):
|
||||||
|
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
|
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
|
||||||
|
|
||||||
|
#Compute second integral for first moment
|
||||||
|
def int_2(f):
|
||||||
|
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
|
mean, accuracy = quad(int_2, -np.inf, np.inf)
|
||||||
|
mean /= z_scaled
|
||||||
|
|
||||||
|
#Compute integral for variance
|
||||||
|
def int_3(f):
|
||||||
|
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
|
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
|
||||||
|
Ef2 /= z_scaled
|
||||||
|
variance = Ef2 - mean**2
|
||||||
|
|
||||||
|
#Add constant to the zeroth moment
|
||||||
|
#NOTE: this constant is not needed in the other moments because it cancells out.
|
||||||
|
z = z_scaled/np.sqrt(2*np.pi/tau)
|
||||||
|
|
||||||
|
return z, mean, variance
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
Predictive mean
|
||||||
|
.. math::
|
||||||
|
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
|
||||||
|
|
||||||
|
If available, this function computes the predictive mean analytically.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
Predictive variance
|
||||||
|
.. math::
|
||||||
|
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
|
||||||
|
|
||||||
|
If available, this function computes the predictive variance analytically.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _predictive_mean_numerical(self,mu,variance):
|
||||||
|
"""
|
||||||
|
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
||||||
|
|
||||||
|
:param mu: mean of posterior
|
||||||
|
:param sigma: standard deviation of posterior
|
||||||
|
|
||||||
|
"""
|
||||||
|
def int_mean(f,m,v):
|
||||||
|
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m))
|
||||||
|
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||||
|
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
|
||||||
|
|
||||||
|
return mean
|
||||||
|
|
||||||
|
def _predictive_variance_numerical(self,mu,variance,predictive_mean=None):
|
||||||
|
"""
|
||||||
|
Numerical approximation to the predictive variance: V(Y_star)
|
||||||
|
|
||||||
|
The following variance decomposition is used:
|
||||||
|
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
|
|
||||||
|
:param mu: mean of posterior
|
||||||
|
:param sigma: standard deviation of posterior
|
||||||
|
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||||
|
|
||||||
|
"""
|
||||||
|
#sigma2 = sigma**2
|
||||||
|
normalizer = np.sqrt(2*np.pi*variance)
|
||||||
|
|
||||||
|
# E( V(Y_star|f_star) )
|
||||||
|
def int_var(f,m,v):
|
||||||
|
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m))
|
||||||
|
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||||
|
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
|
||||||
|
|
||||||
|
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||||
|
|
||||||
|
#E( E(Y_star|f_star) )**2
|
||||||
|
if predictive_mean is None:
|
||||||
|
predictive_mean = self.predictive_mean(mu,variance)
|
||||||
|
predictive_mean_sq = predictive_mean**2
|
||||||
|
|
||||||
|
#E( E(Y_star|f_star)**2 )
|
||||||
|
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
|
||||||
|
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
|
||||||
|
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||||
|
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
|
||||||
|
|
||||||
|
var_exp = exp_exp2 - predictive_mean_sq
|
||||||
|
|
||||||
|
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
|
return exp_var + var_exp
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def pdf(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y|\\lambda(f))
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
return self.pdf_link(link_f, y, extra_data=extra_data)
|
||||||
|
|
||||||
|
def logpdf(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
\\log p(y|\\lambda(f))
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
return self.logpdf_link(link_f, y, extra_data=extra_data)
|
||||||
|
|
||||||
|
def dlogpdf_df(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: derivative of log likelihood evaluated for this point
|
||||||
|
:rtype: 1xN array
|
||||||
|
"""
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
return chain_1(dlogpdf_dlink, dlink_df)
|
||||||
|
|
||||||
|
def d2logpdf_df2(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||||
|
:rtype: 1xN array
|
||||||
|
"""
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||||
|
|
||||||
|
def d3logpdf_df3(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: third derivative of log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||||
|
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||||
|
|
||||||
|
def dlogpdf_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([1, 0])
|
||||||
|
|
||||||
|
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([f.shape[0], 0])
|
||||||
|
|
||||||
|
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([f.shape[0], 0])
|
||||||
|
|
||||||
|
def _laplace_gradients(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
|
||||||
|
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
|
||||||
|
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
|
||||||
|
|
||||||
|
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||||
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
|
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||||
|
|
||||||
|
def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000):
|
||||||
|
"""
|
||||||
|
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||||
|
|
||||||
|
:param mu: mean of the latent variable, f, of posterior
|
||||||
|
:param var: variance of the latent variable, f, of posterior
|
||||||
|
:param full_cov: whether to use the full covariance or just the diagonal
|
||||||
|
:type full_cov: Boolean
|
||||||
|
:param num_samples: number of samples to use in computing quantiles and
|
||||||
|
possibly mean variance
|
||||||
|
:type num_samples: integer
|
||||||
|
:param sampling: Whether to use samples for mean and variances anyway
|
||||||
|
:type sampling: Boolean
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if sampling:
|
||||||
|
#Get gp_samples f* using posterior mean and variance
|
||||||
|
if not full_cov:
|
||||||
|
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
|
||||||
|
size=num_samples).T
|
||||||
|
else:
|
||||||
|
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
|
||||||
|
size=num_samples).T
|
||||||
|
#Push gp samples (f*) through likelihood to give p(y*|f*)
|
||||||
|
samples = self.samples(gp_samples)
|
||||||
|
axis=-1
|
||||||
|
|
||||||
|
#Calculate mean, variance and precentiles from samples
|
||||||
|
print "WARNING: Using sampling to calculate mean, variance and predictive quantiles."
|
||||||
|
pred_mean = np.mean(samples, axis=axis)[:,None]
|
||||||
|
pred_var = np.var(samples, axis=axis)[:,None]
|
||||||
|
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
|
||||||
|
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
pred_mean = self.predictive_mean(mu, var)
|
||||||
|
pred_var = self.predictive_variance(mu, var, pred_mean)
|
||||||
|
print "WARNING: Predictive quantiles are only computed when sampling."
|
||||||
|
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
|
||||||
|
q3 = q1.copy()
|
||||||
|
|
||||||
|
return pred_mean, pred_var, q1, q3
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
158
GPy/likelihoods/link_functions.py
Normal file
158
GPy/likelihoods/link_functions.py
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
# Copyright (c) 2012, 2013 The GPy authors
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
||||||
|
|
||||||
|
class GPTransformation(object):
|
||||||
|
"""
|
||||||
|
Link function class for doing non-Gaussian likelihoods approximation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
|
||||||
|
.. note:: Y values allowed depend on the likelihood_function used
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
"""
|
||||||
|
Gaussian process tranformation function, latent space -> output space
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
"""
|
||||||
|
derivative of transf(f) w.r.t. f
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
"""
|
||||||
|
second derivative of transf(f) w.r.t. f
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
"""
|
||||||
|
third derivative of transf(f) w.r.t. f
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class Identity(GPTransformation):
|
||||||
|
"""
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
g(f) = f
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.ones_like(f)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return np.zeros_like(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return np.zeros_like(f)
|
||||||
|
|
||||||
|
|
||||||
|
class Probit(GPTransformation):
|
||||||
|
"""
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
g(f) = \\Phi^{-1} (mu)
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
return std_norm_cdf(f)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return std_norm_pdf(f)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
#FIXME
|
||||||
|
return -f * std_norm_pdf(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
#FIXME
|
||||||
|
f2 = f**2
|
||||||
|
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||||
|
|
||||||
|
class Log(GPTransformation):
|
||||||
|
"""
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
g(f) = \\log(\\mu)
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
class Log_ex_1(GPTransformation):
|
||||||
|
"""
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
g(f) = \\log(\\exp(\\mu) - 1)
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
return np.log(1.+np.exp(f))
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.exp(f)/(1.+np.exp(f))
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
aux = np.exp(f)/(1.+np.exp(f))
|
||||||
|
return aux*(1.-aux)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
aux = np.exp(f)/(1.+np.exp(f))
|
||||||
|
daux_df = aux*(1.-aux)
|
||||||
|
return daux_df - (2.*aux*daux_df)
|
||||||
|
|
||||||
|
class Reciprocal(GPTransformation):
|
||||||
|
def transf(self,f):
|
||||||
|
return 1./f
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return -1./(f**2)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return 2./(f**3)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return -6./(f**4)
|
||||||
|
|
||||||
|
class Heaviside(GPTransformation):
|
||||||
|
"""
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
g(f) = I_{x \\in A}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
#transformation goes here
|
||||||
|
return np.where(f>0, 1, 0)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
raise NotImplementedError, "This function is not differentiable!"
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
raise NotImplementedError, "This function is not differentiable!"
|
||||||
152
GPy/likelihoods/poisson.py
Normal file
152
GPy/likelihoods/poisson.py
Normal file
|
|
@ -0,0 +1,152 @@
|
||||||
|
from __future__ import division
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class Poisson(Likelihood):
|
||||||
|
"""
|
||||||
|
Poisson likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return np.prod(stats.poisson.pmf(y,link_f))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return y/link_f - 1
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -y/(link_f**2)
|
||||||
|
return hess
|
||||||
|
#d2_df = self.gp_link.d2transf_df2(gp)
|
||||||
|
#transf = self.gp_link.transf(gp)
|
||||||
|
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = 2*y/(link_f)**3
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.random.poisson(self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
277
GPy/likelihoods/student_t.py
Normal file
277
GPy/likelihoods/student_t.py
Normal file
|
|
@ -0,0 +1,277 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats, special
|
||||||
|
import scipy as sp
|
||||||
|
import gp_transformations
|
||||||
|
from scipy import stats, integrate
|
||||||
|
from scipy.special import gammaln, gamma
|
||||||
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
class StudentT(Likelihood):
|
||||||
|
"""
|
||||||
|
Student T likelihood
|
||||||
|
|
||||||
|
For nomanclature see Bayesian Data Analysis 2003 p576
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
|
||||||
|
self.v = deg_free
|
||||||
|
self.sigma2 = sigma2
|
||||||
|
|
||||||
|
self._set_params(np.asarray(sigma2))
|
||||||
|
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
self.log_concave = False
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self.sigma2)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["t_noise_std2"]
|
||||||
|
|
||||||
|
def _set_params(self, x):
|
||||||
|
self.sigma2 = float(x)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variance(self, extra_data=None):
|
||||||
|
return (self.v / float(self.v - 2)) * self.sigma2
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
#Careful gamma(big_number) is infinity!
|
||||||
|
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||||
|
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||||
|
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
|
||||||
|
)
|
||||||
|
return np.prod(objective)
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
|
- gammaln(self.v * 0.5)
|
||||||
|
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
|
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||||
|
)
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
|
)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
|
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
|
return np.sum(dlogpdf_dvar)
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link_f
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
|
)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||||
|
return np.asarray([[dlogpdf_dvar]])
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||||
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
|
||||||
|
"""
|
||||||
|
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
|
||||||
|
|
||||||
|
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
|
||||||
|
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||||
|
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||||
|
"""
|
||||||
|
|
||||||
|
#FIXME: Not correct
|
||||||
|
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
|
||||||
|
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
|
||||||
|
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
|
||||||
|
#Which was also given to us as (var)
|
||||||
|
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
|
||||||
|
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
|
||||||
|
true_var = 1/(1/sigma**2 + 1/self.variance)
|
||||||
|
|
||||||
|
return true_var
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self, mu, sigma):
|
||||||
|
"""
|
||||||
|
Compute mean of the prediction
|
||||||
|
"""
|
||||||
|
#FIXME: Not correct
|
||||||
|
return mu
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
#FIXME: Very slow as we are computing a new random variable per input!
|
||||||
|
#Can't get it to sample all at the same time
|
||||||
|
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
|
||||||
|
dfs = np.ones_like(gp)*self.v
|
||||||
|
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
|
||||||
|
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
|
||||||
|
scale=scales)
|
||||||
|
return student_t_samples.reshape(orig_shape)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue