diff --git a/GPy/likelihoods/noise_models/bernoulli_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py index 7ef8aa82..1d27d48b 100644 --- a/GPy/likelihoods/noise_models/bernoulli_noise.py +++ b/GPy/likelihoods/noise_models/bernoulli_noise.py @@ -11,12 +11,14 @@ from noise_distributions import NoiseDistribution class Bernoulli(NoiseDistribution): """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ + Bernoulli likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + .. Note:: + Y is expected to take values in {-1,1} + Probit likelihood usually used """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance) @@ -82,7 +84,7 @@ class Bernoulli(NoiseDistribution): Likelihood function given link(f) .. math:: - \\p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} :param link_f: latent variables link(f) :type link_f: Nx1 array @@ -111,7 +113,7 @@ class Bernoulli(NoiseDistribution): :param y: data :type y: Nx1 array :param extra_data: extra_data not used in bernoulli - :returns: log likelihood evaluated for this point + :returns: log likelihood evaluated at points link(f) :rtype: float """ assert np.asarray(link_f).shape == np.asarray(y).shape @@ -130,8 +132,8 @@ class Bernoulli(NoiseDistribution): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian - :returns: gradient of log likelihood evaluated at points + :param extra_data: extra_data not used in bernoulli + :returns: gradient of log likelihood evaluated at points link(f) :rtype: Nx1 array """ assert np.asarray(link_f).shape == np.asarray(y).shape @@ -151,7 +153,7 @@ class Bernoulli(NoiseDistribution): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param extra_data: extra_data not used in bernoulli :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :rtype: Nx1 array @@ -174,7 +176,7 @@ class Bernoulli(NoiseDistribution): :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data not used in gaussian + :param extra_data: extra_data not used in bernoulli :returns: third derivative of log likelihood evaluated at points link(f) :rtype: Nx1 array """ diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 1c5ac1db..63d3a52a 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -12,12 +12,15 @@ class Gaussian(NoiseDistribution): """ Gaussian likelihood - :param mean: mean value of the Gaussian distribution - :param variance: mean value of the Gaussian distribution + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param variance: variance value of the Gaussian distribution + :param N: Number of data points + :type N: int """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None): self.variance = variance - self.D = D self.N = N self._set_params(np.asarray(variance)) super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) @@ -109,7 +112,6 @@ class Gaussian(NoiseDistribution): #Assumes no covariance, exp, sum, log for numerical stability return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) - def logpdf_link(self, link_f, y, extra_data=None): """ Log likelihood function given link(f) @@ -150,9 +152,11 @@ class Gaussian(NoiseDistribution): def d2logpdf_dlink2(self, link_f, y, extra_data=None): """ - Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j + Hessian at y, given link_f, w.r.t link_f. i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + .. math:: \\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}} @@ -193,10 +197,10 @@ class Gaussian(NoiseDistribution): def dlogpdf_link_dvar(self, link_f, y, extra_data=None): """ - Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) + Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) .. math:: - \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}} + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}} :param link_f: latent variables link(f) :type link_f: Nx1 array @@ -209,7 +213,7 @@ class Gaussian(NoiseDistribution): assert np.asarray(link_f).shape == np.asarray(y).shape e = y - link_f s_4 = 1.0/(self.variance**2) - dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e) + dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.square(e) return np.sum(dlik_dsigma) # Sure about this sum? def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): @@ -228,8 +232,9 @@ class Gaussian(NoiseDistribution): :rtype: Nx1 array """ assert np.asarray(link_f).shape == np.asarray(y).shape - s_4 = 1.0/(self.variance**2) - dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f) + s_4 = 1./(self.variance**2) + #dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f) + dlik_grad_dsigma = -s_4*y + s_4*link_f return dlik_grad_dsigma def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 82071a50..897986a5 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -12,14 +12,9 @@ import gp_transformations from GPy.util.misc import chain_1, chain_2, chain_3 from scipy.integrate import quad - class NoiseDistribution(object): """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - - .. note:: Y values allowed depend on the LikelihoodFunction used + Likelihood class for doing approximations """ def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation." diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py index 49de781f..7937a507 100644 --- a/GPy/likelihoods/noise_models/student_t_noise.py +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -16,7 +16,7 @@ class StudentT(NoiseDistribution): For nomanclature see Bayesian Data Analysis 2003 p576 .. math:: - \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2) + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} """ def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): @@ -45,13 +45,13 @@ class StudentT(NoiseDistribution): Likelihood function given link(f) .. math:: - \\ln p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} :param link_f: latent variables link(f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float """ @@ -69,13 +69,13 @@ class StudentT(NoiseDistribution): Log Likelihood Function given link(f) .. math:: - \\ln p(y_{i}|f_{i}) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right) + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) :param link_f: latent variables (link(f)) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float @@ -94,13 +94,13 @@ class StudentT(NoiseDistribution): Gradient of the log likelihood function at y, given link(f) w.r.t link(f) .. math:: - \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} :param link_f: latent variables (f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -112,17 +112,18 @@ class StudentT(NoiseDistribution): def d2logpdf_dlink2(self, link_f, y, extra_data=None): """ - Hessian at y, given link(f), w.r.t link(f) the hessian will be 0 unless i == j + Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j .. math:: - \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} :param link_f: latent variables link(f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -137,16 +138,16 @@ class StudentT(NoiseDistribution): def d3logpdf_dlink3(self, link_f, y, extra_data=None): """ - Third order derivative log-likelihood function at y given f w.r.t f + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) .. math:: - \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} :param link_f: latent variables link(f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ @@ -162,13 +163,13 @@ class StudentT(NoiseDistribution): Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) .. math:: - \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} :param link_f: latent variables link(f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ @@ -182,13 +183,13 @@ class StudentT(NoiseDistribution): Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) .. math:: - \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} :param link_f: latent variables link_f :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ @@ -202,13 +203,13 @@ class StudentT(NoiseDistribution): Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) .. math:: - \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}} + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} :param link_f: latent variables link(f) :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param extra_data: extra_data which is not used in student t distribution - not used + :param extra_data: extra_data which is not used in student t distribution :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ diff --git a/doc/GPy.likelihoods.noise_models.rst b/doc/GPy.likelihoods.noise_models.rst index c16ee7d1..6fec5aff 100644 --- a/doc/GPy.likelihoods.noise_models.rst +++ b/doc/GPy.likelihoods.noise_models.rst @@ -4,10 +4,10 @@ GPy.likelihoods.noise_models package Submodules ---------- -GPy.likelihoods.noise_models.binomial_noise module --------------------------------------------------- +GPy.likelihoods.noise_models.bernoulli_noise module +--------------------------------------------------- -.. automodule:: GPy.likelihoods.noise_models.binomial_noise +.. automodule:: GPy.likelihoods.noise_models.bernoulli_noise :members: :undoc-members: :show-inheritance: diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst index 2d41d5fc..98b001c0 100644 --- a/doc/GPy.testing.rst +++ b/doc/GPy.testing.rst @@ -36,6 +36,14 @@ GPy.testing.examples_tests module :undoc-members: :show-inheritance: +GPy.testing.gp_transformation_tests module +------------------------------------------ + +.. automodule:: GPy.testing.gp_transformation_tests + :members: + :undoc-members: + :show-inheritance: + GPy.testing.gplvm_tests module ------------------------------