some more messing with the likelihood directory

This commit is contained in:
James Hensman 2013-12-04 11:32:12 +00:00
parent daaad3c30c
commit 83a495645d
10 changed files with 1862 additions and 0 deletions

BIN
GPy/likelihoods/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,6 @@
from bernoulli import Bernoulli
from exponential import Exponential
from gaussian import Gaussian
from gamma import Gamma
from poisson import Poisson
from student_t import Student_t

View file

@ -0,0 +1,221 @@
# Copyright (c) 2012, 2013 The GPy authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from likelihood import Likelihood
class Bernoulli(Likelihood):
"""
Bernoulli likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note::
Y is expected to take values in {-1,1}
Probit likelihood usually used
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
if isinstance(gp_link , (gp_transformations.Heaviside, gp_transformations.Probit)):
self.log_concave = True
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if data_i == 1:
sign = 1.
elif data_i == 0:
sign = -1
else:
raise ValueError("bad value for Bernouilli observation (0,1)")
if isinstance(self.gp_link,gp_transformations.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Heaviside):
a = sign*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
stop
else:
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,variance):
if isinstance(self.gp_link,gp_transformations.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance))
elif isinstance(self.gp_link,gp_transformations.Heaviside):
return stats.norm.cdf(mu/np.sqrt(variance))
else:
raise NotImplementedError
def _predictive_variance_analytical(self,mu,variance, pred_mean):
if isinstance(self.gp_link,gp_transformations.Heaviside):
return 0.
else:
raise NotImplementedError
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_i must be in {0,1}
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
objective = (link_f**y) * ((1.-link_f)**(1.-y))
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: log likelihood evaluated at points link(f)
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = (y/link_f) - (1.-y)/(1-link_f)
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
return d3logpdf_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
ns = np.ones_like(gp, dtype=int)
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -0,0 +1,156 @@
# Copyright (c) 2012, 2013 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from likelihood import Likelihood
class Exponential(NoiseDistribution):
"""
Expoential likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = link_f*np.exp(-y*link_f)
return np.exp(np.sum(np.log(log_objective)))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.random.exponential(1.0/self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

155
GPy/likelihoods/gamma.py Normal file
View file

@ -0,0 +1,155 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from likelihood import Likelihood
class Gamma(NoiseDistribution):
"""
Gamma likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
alpha = link_f*self.beta
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#alpha = self.gp_link.transf(gp)*self.beta
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
alpha = link_f*self.beta
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
#old
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
#old
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta

300
GPy/likelihoods/gaussian.py Normal file
View file

@ -0,0 +1,300 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from likelihood import Likelihood
class Gaussian(NoiseDistribution):
"""
Gaussian likelihood
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param variance: variance value of the Gaussian distribution
:param N: Number of data points
:type N: int
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
self.variance = variance
self.N = N
self._set_params(np.asarray(variance))
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
if isinstance(gp_link , gp_transformations.Identity):
self.log_concave = True
def _get_params(self):
return np.array([self.variance])
def _get_param_names(self):
return ['noise_model_variance']
def _set_params(self, p):
self.variance = float(p)
self.I = np.eye(self.N)
self.covariance_matrix = self.I * self.variance
self.Ki = self.I*(1.0 / self.variance)
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
self.ln_det_K = self.N*np.log(self.variance)
def _gradients(self,partial):
return np.zeros(1)
#return np.sum(partial)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
"""
return Y
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
sigma2_hat = 1./(1./self.variance + tau_i)
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
sum_var = self.variance + 1./tau_i
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
return 1./(1./self.variance + 1./sigma**2)
def _mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _nlog_mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: likelihood evaluated for this point
:rtype: float
"""
#Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: log likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f
s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _mean(self,gp):
"""
Expected value of y under the Mass (or density) function p(y|f)
.. math::
E_{p(y|f)}[y]
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Variance of y under the Mass (or density) function p(y|f)
.. math::
Var_{p(y|f)}[y]
"""
return self.variance
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -0,0 +1,437 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
import pylab as pb
from GPy.util.plot import gpplot
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from GPy.util.misc import chain_1, chain_2, chain_3
from scipy.integrate import quad
import warnings
class Likelihood(object):
"""
Likelihood base class
To use this class, inherrit and define missing functionality.
The minimum required funciotnality is... TODO
"""
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.analytical_mean = analytical_mean
self.analytical_variance = analytical_variance
if self.analytical_mean:
self.moments_match = self._moments_match_analytical
self.predictive_mean = self._predictive_mean_analytical
else:
self.moments_match = self._moments_match_numerical
self.predictive_mean = self._predictive_mean_numerical
if self.analytical_variance:
self.predictive_variance = self._predictive_variance_analytical
else:
self.predictive_variance = self._predictive_variance_numerical
self.log_concave = False
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass
def _gradients(self,partial):
return np.zeros(0)
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output
:type Y: Nx1 numpy.darray
"""
return Y
def _moments_match_analytical(self,obs,tau,v):
"""
If available, this function computes the moments analytically.
"""
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
def integral_generator(y, m, v):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(f_star):
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
return np.log(p_ystar)
def _moments_match_numerical(self,obs,tau,v):
"""
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment.
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
mu = v/tau
def int_1(f):
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
#Compute second integral for first moment
def int_2(f):
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
mean, accuracy = quad(int_2, -np.inf, np.inf)
mean /= z_scaled
#Compute integral for variance
def int_3(f):
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
Ef2 /= z_scaled
variance = Ef2 - mean**2
#Add constant to the zeroth moment
#NOTE: this constant is not needed in the other moments because it cancells out.
z = z_scaled/np.sqrt(2*np.pi/tau)
return z, mean, variance
def _predictive_mean_analytical(self,mu,sigma):
"""
Predictive mean
.. math::
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive mean analytically.
"""
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma):
"""
Predictive variance
.. math::
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive variance analytically.
"""
raise NotImplementedError
def _predictive_mean_numerical(self,mu,variance):
"""
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
"""
def int_mean(f,m,v):
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m))
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _predictive_variance_numerical(self,mu,variance,predictive_mean=None):
"""
Numerical approximation to the predictive variance: V(Y_star)
The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
#sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*variance)
# E( V(Y_star|f_star) )
def int_var(f,m,v):
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m))
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
#E( E(Y_star|f_star) )**2
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,variance)
predictive_mean_sq = predictive_mean**2
#E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
return exp_var + var_exp
def pdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def logpdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def pdf(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the likelihood (pdf) using it
.. math:
p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, extra_data=extra_data)
def logpdf(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
.. math:
\\log p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, extra_data=extra_data)
def dlogpdf_df(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
def d2logpdf_df2(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def d3logpdf_df3(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def dlogpdf_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
:param num_samples: number of samples to use in computing quantiles and
possibly mean variance
:type num_samples: integer
:param sampling: Whether to use samples for mean and variances anyway
:type sampling: Boolean
"""
if sampling:
#Get gp_samples f* using posterior mean and variance
if not full_cov:
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
size=num_samples).T
else:
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
size=num_samples).T
#Push gp samples (f*) through likelihood to give p(y*|f*)
samples = self.samples(gp_samples)
axis=-1
#Calculate mean, variance and precentiles from samples
print "WARNING: Using sampling to calculate mean, variance and predictive quantiles."
pred_mean = np.mean(samples, axis=axis)[:,None]
pred_var = np.var(samples, axis=axis)[:,None]
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
else:
pred_mean = self.predictive_mean(mu, var)
pred_var = self.predictive_variance(mu, var, pred_mean)
print "WARNING: Predictive quantiles are only computed when sampling."
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
q3 = q1.copy()
return pred_mean, pred_var, q1, q3
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
raise NotImplementedError

View file

@ -0,0 +1,158 @@
# Copyright (c) 2012, 2013 The GPy authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
class GPTransformation(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
.. note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
def transf(self,f):
"""
Gaussian process tranformation function, latent space -> output space
"""
raise NotImplementedError
def dtransf_df(self,f):
"""
derivative of transf(f) w.r.t. f
"""
raise NotImplementedError
def d2transf_df2(self,f):
"""
second derivative of transf(f) w.r.t. f
"""
raise NotImplementedError
def d3transf_df3(self,f):
"""
third derivative of transf(f) w.r.t. f
"""
raise NotImplementedError
class Identity(GPTransformation):
"""
.. math::
g(f) = f
"""
def transf(self,f):
return f
def dtransf_df(self,f):
return np.ones_like(f)
def d2transf_df2(self,f):
return np.zeros_like(f)
def d3transf_df3(self,f):
return np.zeros_like(f)
class Probit(GPTransformation):
"""
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
return std_norm_cdf(f)
def dtransf_df(self,f):
return std_norm_pdf(f)
def d2transf_df2(self,f):
#FIXME
return -f * std_norm_pdf(f)
def d3transf_df3(self,f):
#FIXME
f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Log(GPTransformation):
"""
.. math::
g(f) = \\log(\\mu)
"""
def transf(self,f):
return np.exp(f)
def dtransf_df(self,f):
return np.exp(f)
def d2transf_df2(self,f):
return np.exp(f)
def d3transf_df3(self,f):
return np.exp(f)
class Log_ex_1(GPTransformation):
"""
.. math::
g(f) = \\log(\\exp(\\mu) - 1)
"""
def transf(self,f):
return np.log(1.+np.exp(f))
def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f))
def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f))
return aux*(1.-aux)
def d3transf_df3(self,f):
aux = np.exp(f)/(1.+np.exp(f))
daux_df = aux*(1.-aux)
return daux_df - (2.*aux*daux_df)
class Reciprocal(GPTransformation):
def transf(self,f):
return 1./f
def dtransf_df(self,f):
return -1./(f**2)
def d2transf_df2(self,f):
return 2./(f**3)
def d3transf_df3(self,f):
return -6./(f**4)
class Heaviside(GPTransformation):
"""
.. math::
g(f) = I_{x \\in A}
"""
def transf(self,f):
#transformation goes here
return np.where(f>0, 1, 0)
def dtransf_df(self,f):
raise NotImplementedError, "This function is not differentiable!"
def d2transf_df2(self,f):
raise NotImplementedError, "This function is not differentiable!"

152
GPy/likelihoods/poisson.py Normal file
View file

@ -0,0 +1,152 @@
from __future__ import division
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from likelihood import Likelihood
class Poisson(Likelihood):
"""
Poisson likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
.. Note::
Y is expected to take values in {0,1,2,...}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.prod(stats.poisson.pmf(y,link_f))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -y/(link_f**2)
return hess
#d2_df = self.gp_link.d2transf_df2(gp)
#transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.random.poisson(self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -0,0 +1,277 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import scipy as sp
import gp_transformations
from scipy import stats, integrate
from scipy.special import gammaln, gamma
from likelihood import Likelihood
class StudentT(Likelihood):
"""
Student T likelihood
For nomanclature see Bayesian Data Analysis 2003 p576
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
"""
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
self.v = deg_free
self.sigma2 = sigma2
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False
def _get_params(self):
return np.asarray(self.sigma2)
def _get_param_names(self):
return ["t_noise_std2"]
def _set_params(self, x):
self.sigma2 = float(x)
@property
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
#Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2)))
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
)
return np.prod(objective)
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
"""
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#FIXME: Not correct
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = 1/(1/sigma**2 + 1/self.variance)
return true_var
def _predictive_mean_analytical(self, mu, sigma):
"""
Compute mean of the prediction
"""
#FIXME: Not correct
return mu
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#FIXME: Very slow as we are computing a new random variable per input!
#Can't get it to sample all at the same time
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
dfs = np.ones_like(gp)*self.v
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
scale=scales)
return student_t_samples.reshape(orig_shape)