More doc strings

This commit is contained in:
Alan Saul 2013-10-04 16:32:04 +01:00
parent 4925d8a0d9
commit 91f194cd29
4 changed files with 110 additions and 48 deletions

View file

@ -203,8 +203,9 @@ class Laplace(likelihood):
""" """
The laplace approximation algorithm, find K and expand hessian The laplace approximation algorithm, find K and expand hessian
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix :param K: Prior covariance matrix evaluated at locations X
:type K: NxN matrix
""" """
self.K = K.copy() self.K = K.copy()
@ -236,8 +237,8 @@ class Laplace(likelihood):
Rasmussen suggests the use of a numerically stable positive definite matrix B Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted Which has a positive diagonal element and can be easyily inverted
:param K: Covariance matrix evaluated at locations X :param K: Prior covariance matrix evaluated at locations X
:type K: NxD matrix :type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix) :param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN) :type W: Vector of diagonal values of hessian (1xN)
:param a: Matrix to calculate W12BiW12a :param a: Matrix to calculate W12BiW12a

View file

@ -90,7 +90,9 @@ def gaussian(gp_link=None, variance=2, D=None, N=None):
Construct a Gaussian likelihood Construct a Gaussian likelihood
:param gp_link: a GPy gp_link function :param gp_link: a GPy gp_link function
:param variance: scalar, variance :param variance: variance
:type variance: scalar
:returns: Gaussian noise model:
""" """
if gp_link is None: if gp_link is None:
gp_link = noise_models.gp_transformations.Identity() gp_link = noise_models.gp_transformations.Identity()
@ -104,8 +106,11 @@ def student_t(gp_link=None, deg_free=5, sigma2=2):
Construct a Student t likelihood Construct a Student t likelihood
:param gp_link: a GPy gp_link function :param gp_link: a GPy gp_link function
:param deg_free: scalar, degrees of freedom :param deg_free: degrees of freedom of student-t
:param sigma2: scalar, variance :type deg_free: scalar
:param sigma2: variance
:type sigma2: scalar
:returns: Student-T noise model
""" """
if gp_link is None: if gp_link is None:
gp_link = noise_models.gp_transformations.Identity() gp_link = noise_models.gp_transformations.Identity()

View file

@ -117,14 +117,19 @@ class Gaussian(NoiseDistribution):
return 0 return 0
def lik_function(self, y, f, extra_data=None): def lik_function(self, y, f, extra_data=None):
"""lik_function $\ln p(y|f)$ """
$$\ln p(y_{i}|f_{i}) = \ln $$ Log likelihood function
:y: data .. math::
:f: latent variables f \\ln p(y_{i}|f_{i}) = -\\frac{D \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - f_{i})^{T}\\sigma^{-2}(y_{i} - f_{i})}{2}
:extra_data: extra_data which is not used in student t distribution
:returns: float(likelihood evaluated for this point)
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
@ -138,10 +143,16 @@ class Gaussian(NoiseDistribution):
""" """
Gradient of the link function at y, given f w.r.t f Gradient of the link function at y, given f w.r.t f
:y: data .. math::
:f: latent variables f \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i})
:extra_data: extra_data which is not used in student t distribution
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == f.shape
@ -151,16 +162,23 @@ class Gaussian(NoiseDistribution):
def d2lik_d2f(self, y, f, extra_data=None): def d2lik_d2f(self, y, f, extra_data=None):
""" """
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j
i.e. second derivative lik_function at y given f f_j w.r.t f and f_j i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j}
.. math::
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
""" """
assert y.shape == f.shape assert y.shape == f.shape
hess = -(1.0/self.variance)*np.ones((self.N, 1)) hess = -(1.0/self.variance)*np.ones((self.N, 1))
@ -168,9 +186,18 @@ class Gaussian(NoiseDistribution):
def d3lik_d3f(self, y, f, extra_data=None): def d3lik_d3f(self, y, f, extra_data=None):
""" """
Third order derivative lik_function (log-likelihood ) at y given f f_j w.r.t f and f_j Third order derivative log-likelihood function at y given f w.r.t f
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$ .. math::
\\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == f.shape
d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
@ -178,7 +205,18 @@ class Gaussian(NoiseDistribution):
def dlik_dvar(self, y, f, extra_data=None): def dlik_dvar(self, y, f, extra_data=None):
""" """
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (noise_variance)
.. math::
\\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}}
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
@ -188,7 +226,18 @@ class Gaussian(NoiseDistribution):
def dlik_df_dvar(self, y, f, extra_data=None): def dlik_df_dvar(self, y, f, extra_data=None):
""" """
Gradient of the dlik_df w.r.t sigma parameter (standard deviation) Derivative of the dlik_df w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i})
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == f.shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
@ -197,9 +246,18 @@ class Gaussian(NoiseDistribution):
def d2lik_d2f_dvar(self, y, f, extra_data=None): def d2lik_d2f_dvar(self, y, f, extra_data=None):
""" """
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (noise_variance)
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}}
:param y: data
:type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == f.shape
dlik_hess_dsigma = np.diag((1.0/(self.variance**2))*self.I)[:, None] dlik_hess_dsigma = np.diag((1.0/(self.variance**2))*self.I)[:, None]

View file

@ -48,9 +48,9 @@ class StudentT(NoiseDistribution):
\\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2 \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
@ -73,9 +73,9 @@ class StudentT(NoiseDistribution):
\\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
@ -95,9 +95,9 @@ class StudentT(NoiseDistribution):
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -119,9 +119,9 @@ class StudentT(NoiseDistribution):
\\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
@ -140,12 +140,10 @@ class StudentT(NoiseDistribution):
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})}
-\\frac{1}{\\sigma} + \\frac{(1+v)(y_{i}-f_{i})^2}{\\sigma^3 v(1 + \\frac{1}{v}(\\frac{(y_{i} - f_{i})}{\\sigma^2})^2)}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
@ -164,9 +162,9 @@ class StudentT(NoiseDistribution):
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
@ -178,15 +176,15 @@ class StudentT(NoiseDistribution):
def d2lik_d2f_dvar(self, y, f, extra_data=None): def d2lik_d2f_dvar(self, y, f, extra_data=None):
""" """
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{2\\sigma v(v + 1)(\\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \\sigma^2 v)^3} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}}
:param y: data :param y: data
:type y: Nx1 matrix :type y: Nx1 array
:param f: latent variables f :param f: latent variables f
:type f: Nx1 matrix :type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array