From 91f194cd29874be61c11067552c7034b3ca2ac04 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 4 Oct 2013 16:32:04 +0100 Subject: [PATCH] More doc strings --- GPy/likelihoods/laplace.py | 9 +- GPy/likelihoods/noise_model_constructors.py | 11 +- .../noise_models/gaussian_noise.py | 104 ++++++++++++++---- .../noise_models/student_t_noise.py | 34 +++--- 4 files changed, 110 insertions(+), 48 deletions(-) diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py index 46ca66bb..11b1731b 100644 --- a/GPy/likelihoods/laplace.py +++ b/GPy/likelihoods/laplace.py @@ -203,8 +203,9 @@ class Laplace(likelihood): """ The laplace approximation algorithm, find K and expand hessian For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability - :param K: Covariance matrix evaluated at locations X - :type K: NxD matrix + + :param K: Prior covariance matrix evaluated at locations X + :type K: NxN matrix """ self.K = K.copy() @@ -236,8 +237,8 @@ class Laplace(likelihood): Rasmussen suggests the use of a numerically stable positive definite matrix B Which has a positive diagonal element and can be easyily inverted - :param K: Covariance matrix evaluated at locations X - :type K: NxD matrix + :param K: Prior covariance matrix evaluated at locations X + :type K: NxN matrix :param W: Negative hessian at a point (diagonal matrix) :type W: Vector of diagonal values of hessian (1xN) :param a: Matrix to calculate W12BiW12a diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index 05d8db55..26d07391 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -90,7 +90,9 @@ def gaussian(gp_link=None, variance=2, D=None, N=None): Construct a Gaussian likelihood :param gp_link: a GPy gp_link function - :param variance: scalar, variance + :param variance: variance + :type variance: scalar + :returns: Gaussian noise model: """ if gp_link is None: gp_link = noise_models.gp_transformations.Identity() @@ -104,8 +106,11 @@ def student_t(gp_link=None, deg_free=5, sigma2=2): Construct a Student t likelihood :param gp_link: a GPy gp_link function - :param deg_free: scalar, degrees of freedom - :param sigma2: scalar, variance + :param deg_free: degrees of freedom of student-t + :type deg_free: scalar + :param sigma2: variance + :type sigma2: scalar + :returns: Student-T noise model """ if gp_link is None: gp_link = noise_models.gp_transformations.Identity() diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index f4251ff3..2ca6c373 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -117,14 +117,19 @@ class Gaussian(NoiseDistribution): return 0 def lik_function(self, y, f, extra_data=None): - """lik_function $\ln p(y|f)$ - $$\ln p(y_{i}|f_{i}) = \ln $$ + """ + Log likelihood function - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution - :returns: float(likelihood evaluated for this point) + .. math:: + \\ln p(y_{i}|f_{i}) = -\\frac{D \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - f_{i})^{T}\\sigma^{-2}(y_{i} - f_{i})}{2} + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: likelihood evaluated for this point + :rtype: float """ assert y.shape == f.shape e = y - f @@ -138,10 +143,16 @@ class Gaussian(NoiseDistribution): """ Gradient of the link function at y, given f w.r.t f - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution + .. math:: + \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i}) + + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array """ assert y.shape == f.shape @@ -151,16 +162,23 @@ class Gaussian(NoiseDistribution): def d2lik_d2f(self, y, f, extra_data=None): """ - Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j - i.e. second derivative lik_function at y given f f_j w.r.t f and f_j + Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j + i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j} - Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases - (the distribution for y_{i} depends only on f_{i} not on f_{j!=i} + .. math:: + \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}} - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution - :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_{i} depends only on f_{i} not on f_{j!=i} """ assert y.shape == f.shape hess = -(1.0/self.variance)*np.ones((self.N, 1)) @@ -168,9 +186,18 @@ class Gaussian(NoiseDistribution): def d3lik_d3f(self, y, f, extra_data=None): """ - Third order derivative lik_function (log-likelihood ) at y given f f_j w.r.t f and f_j + Third order derivative log-likelihood function at y given f w.r.t f - $$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$ + .. math:: + \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0 + + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array """ assert y.shape == f.shape d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? @@ -178,7 +205,18 @@ class Gaussian(NoiseDistribution): def dlik_dvar(self, y, f, extra_data=None): """ - Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) + Gradient of the log-likelihood function at y given f, w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}} + + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: float """ assert y.shape == f.shape e = y - f @@ -188,7 +226,18 @@ class Gaussian(NoiseDistribution): def dlik_df_dvar(self, y, f, extra_data=None): """ - Gradient of the dlik_df w.r.t sigma parameter (standard deviation) + Derivative of the dlik_df w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i}) + + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: Nx1 array """ assert y.shape == f.shape s_4 = 1.0/(self.variance**2) @@ -197,9 +246,18 @@ class Gaussian(NoiseDistribution): def d2lik_d2f_dvar(self, y, f, extra_data=None): """ - Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) + Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (noise_variance) - $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}} + + :param y: data + :type y: Nx1 array + :param f: latent variables f + :type f: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter + :rtype: Nx1 array """ assert y.shape == f.shape dlik_hess_dsigma = np.diag((1.0/(self.variance**2))*self.I)[:, None] diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py index dc78b582..0ba517a6 100644 --- a/GPy/likelihoods/noise_models/student_t_noise.py +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -48,9 +48,9 @@ class StudentT(NoiseDistribution): \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2 :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: likelihood evaluated for this point :rtype: float @@ -73,9 +73,9 @@ class StudentT(NoiseDistribution): \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: gradient of likelihood evaluated at points :rtype: Nx1 array @@ -95,9 +95,9 @@ class StudentT(NoiseDistribution): \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -119,9 +119,9 @@ class StudentT(NoiseDistribution): \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array @@ -140,12 +140,10 @@ class StudentT(NoiseDistribution): .. math:: \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} - -\\frac{1}{\\sigma} + \\frac{(1+v)(y_{i}-f_{i})^2}{\\sigma^3 v(1 + \\frac{1}{v}(\\frac{(y_{i} - f_{i})}{\\sigma^2})^2)} - :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float @@ -164,9 +162,9 @@ class StudentT(NoiseDistribution): \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array @@ -178,15 +176,15 @@ class StudentT(NoiseDistribution): def d2lik_d2f_dvar(self, y, f, extra_data=None): """ - Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) + Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (t_noise) .. math:: - \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{2\\sigma v(v + 1)(\\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \\sigma^2 v)^3} + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}} :param y: data - :type y: Nx1 matrix + :type y: Nx1 array :param f: latent variables f - :type f: Nx1 matrix + :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array