From 83a495645d1a41039967162443fc8173224017b6 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 4 Dec 2013 11:32:12 +0000 Subject: [PATCH] some more messing with the likelihood directory --- GPy/likelihoods/.DS_Store | Bin 0 -> 15364 bytes GPy/likelihoods/__init__.py | 6 + GPy/likelihoods/bernoulli.py | 221 +++++++++++++++ GPy/likelihoods/exponential.py | 156 +++++++++++ GPy/likelihoods/gamma.py | 155 +++++++++++ GPy/likelihoods/gaussian.py | 300 ++++++++++++++++++++ GPy/likelihoods/likelihood.py | 437 ++++++++++++++++++++++++++++++ GPy/likelihoods/link_functions.py | 158 +++++++++++ GPy/likelihoods/poisson.py | 152 +++++++++++ GPy/likelihoods/student_t.py | 277 +++++++++++++++++++ 10 files changed, 1862 insertions(+) create mode 100644 GPy/likelihoods/.DS_Store create mode 100644 GPy/likelihoods/__init__.py create mode 100644 GPy/likelihoods/bernoulli.py create mode 100644 GPy/likelihoods/exponential.py create mode 100644 GPy/likelihoods/gamma.py create mode 100644 GPy/likelihoods/gaussian.py create mode 100644 GPy/likelihoods/likelihood.py create mode 100644 GPy/likelihoods/link_functions.py create mode 100644 GPy/likelihoods/poisson.py create mode 100644 GPy/likelihoods/student_t.py diff --git a/GPy/likelihoods/.DS_Store b/GPy/likelihoods/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..69fb486161bf8551e4fd68e9704c09ed9e25bf8c GIT binary patch literal 15364 zcmeI1&2G~`5XWcR0-*_26Y(X%!4j9A5WGQD6(p{3;bsyCibWDfaV&*f9-|M%gYXvN zKeL9e*Xtxj0!7tq=EnVtR3?54XRB6D7hUK2$`)WXH`w1X;8_&r~>a!@{b0coI5 zl+uD?d|uM5)>WLxhWEzmzL0W$8dA zTL8!-JeGoc>;pt6DrLWvD=S46Htp`gl&YrOVi>EA_YpUT?3Z$7rBx?m)yb4Sn{tO@ zq<82=(wt1+O0AWE5~vg4y?a~s>?utv`}gnf##u5PXG65$N%a=j*;)$jh&o{{N<6(E zo?efJ^qJ=9Eu(_Q_@uDJf2EY;4WAwOefu`ru)+RjG=Utz#x7cil%U6j=O2&j${E)k z_%I9Fla^?GZ@orMboaWYKWg(H?_H~1X&Z~bOWcpPjae+XynEX%_+CiNTrcj!#uB^| zoYM%gkcMeFG)_cg2GUd9O`+GwyXNu32Hjnv?Rwp!OZYs&XV4s%iqF^WxAawuQ(O8f zkcqjBbQ5Ixc?Yy>hk{*mWB0=7E@F&T^4?+o(6nlkqUi#{56;F_Pb6 z&1L9K3F#U>^>lVv3@gdllh73iqFwrVB5$$5A*?`=5o)Cblzqx}s z|8JUq|KAsP)ej}01pWa5b9C|H;v9y{v-MgReAXV}`h<%Mw_8~$DyZZ*9#W3u@dqEr zpW))RuUK-yeZQ0|EAa&7zyBEEoNMy_wQ_2p`+q)J)%}0%RN;2*|7GH7r393K5>Nt4 dKnW-TC7=Y9fD%vwN 0: + link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([1, 0]) + + def dlogpdf_df_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_1(dlogpdf_dlink_dtheta, dlink_df) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def d2logpdf_df2_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + d2link_df2 = self.gp_link.d2transf_df2(f) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def _laplace_gradients(self, f, y, extra_data=None): + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) + dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data) + d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data) + + #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' + # ensure we have gradients for every parameter we want to optimize + assert dlogpdf_dtheta.shape[1] == len(self._get_param_names()) + assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names()) + assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) + return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta + + def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000): + """ + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. + + :param mu: mean of the latent variable, f, of posterior + :param var: variance of the latent variable, f, of posterior + :param full_cov: whether to use the full covariance or just the diagonal + :type full_cov: Boolean + :param num_samples: number of samples to use in computing quantiles and + possibly mean variance + :type num_samples: integer + :param sampling: Whether to use samples for mean and variances anyway + :type sampling: Boolean + + """ + + if sampling: + #Get gp_samples f* using posterior mean and variance + if not full_cov: + gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()), + size=num_samples).T + else: + gp_samples = np.random.multivariate_normal(mu.flatten(), var, + size=num_samples).T + #Push gp samples (f*) through likelihood to give p(y*|f*) + samples = self.samples(gp_samples) + axis=-1 + + #Calculate mean, variance and precentiles from samples + print "WARNING: Using sampling to calculate mean, variance and predictive quantiles." + pred_mean = np.mean(samples, axis=axis)[:,None] + pred_var = np.var(samples, axis=axis)[:,None] + q1 = np.percentile(samples, 2.5, axis=axis)[:,None] + q3 = np.percentile(samples, 97.5, axis=axis)[:,None] + + else: + + pred_mean = self.predictive_mean(mu, var) + pred_var = self.predictive_variance(mu, var, pred_mean) + print "WARNING: Predictive quantiles are only computed when sampling." + q1 = np.repeat(np.nan,pred_mean.size)[:,None] + q3 = q1.copy() + + return pred_mean, pred_var, q1, q3 + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + raise NotImplementedError diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py new file mode 100644 index 00000000..9c046223 --- /dev/null +++ b/GPy/likelihoods/link_functions.py @@ -0,0 +1,158 @@ +# Copyright (c) 2012, 2013 The GPy authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats +import scipy as sp +import pylab as pb +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf + +class GPTransformation(object): + """ + Link function class for doing non-Gaussian likelihoods approximation + + :param Y: observed output (Nx1 numpy.darray) + + .. note:: Y values allowed depend on the likelihood_function used + + """ + def __init__(self): + pass + + def transf(self,f): + """ + Gaussian process tranformation function, latent space -> output space + """ + raise NotImplementedError + + def dtransf_df(self,f): + """ + derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + + def d2transf_df2(self,f): + """ + second derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + + def d3transf_df3(self,f): + """ + third derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + +class Identity(GPTransformation): + """ + .. math:: + + g(f) = f + + """ + def transf(self,f): + return f + + def dtransf_df(self,f): + return np.ones_like(f) + + def d2transf_df2(self,f): + return np.zeros_like(f) + + def d3transf_df3(self,f): + return np.zeros_like(f) + + +class Probit(GPTransformation): + """ + .. math:: + + g(f) = \\Phi^{-1} (mu) + + """ + def transf(self,f): + return std_norm_cdf(f) + + def dtransf_df(self,f): + return std_norm_pdf(f) + + def d2transf_df2(self,f): + #FIXME + return -f * std_norm_pdf(f) + + def d3transf_df3(self,f): + #FIXME + f2 = f**2 + return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) + +class Log(GPTransformation): + """ + .. math:: + + g(f) = \\log(\\mu) + + """ + def transf(self,f): + return np.exp(f) + + def dtransf_df(self,f): + return np.exp(f) + + def d2transf_df2(self,f): + return np.exp(f) + + def d3transf_df3(self,f): + return np.exp(f) + +class Log_ex_1(GPTransformation): + """ + .. math:: + + g(f) = \\log(\\exp(\\mu) - 1) + + """ + def transf(self,f): + return np.log(1.+np.exp(f)) + + def dtransf_df(self,f): + return np.exp(f)/(1.+np.exp(f)) + + def d2transf_df2(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + return aux*(1.-aux) + + def d3transf_df3(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + daux_df = aux*(1.-aux) + return daux_df - (2.*aux*daux_df) + +class Reciprocal(GPTransformation): + def transf(self,f): + return 1./f + + def dtransf_df(self,f): + return -1./(f**2) + + def d2transf_df2(self,f): + return 2./(f**3) + + def d3transf_df3(self,f): + return -6./(f**4) + +class Heaviside(GPTransformation): + """ + + .. math:: + + g(f) = I_{x \\in A} + + """ + def transf(self,f): + #transformation goes here + return np.where(f>0, 1, 0) + + def dtransf_df(self,f): + raise NotImplementedError, "This function is not differentiable!" + + def d2transf_df2(self,f): + raise NotImplementedError, "This function is not differentiable!" diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py new file mode 100644 index 00000000..73a10570 --- /dev/null +++ b/GPy/likelihoods/poisson.py @@ -0,0 +1,152 @@ +from __future__ import division +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Poisson(Likelihood): + """ + Poisson likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} + + .. Note:: + Y is expected to take values in {0,1,2,...} + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.prod(stats.poisson.pmf(y,link_f)) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}! + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1 + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return y/link_f - 1 + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -y/(link_f**2) + return hess + #d2_df = self.gp_link.d2transf_df2(gp) + #transf = self.gp_link.transf(gp) + #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2*y/(link_f)**3 + return d3lik_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.random.poisson(self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py new file mode 100644 index 00000000..3336235b --- /dev/null +++ b/GPy/likelihoods/student_t.py @@ -0,0 +1,277 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats, special +import scipy as sp +import gp_transformations +from scipy import stats, integrate +from scipy.special import gammaln, gamma +from likelihood import Likelihood + +class StudentT(Likelihood): + """ + Student T likelihood + + For nomanclature see Bayesian Data Analysis 2003 p576 + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + """ + def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): + self.v = deg_free + self.sigma2 = sigma2 + + self._set_params(np.asarray(sigma2)) + super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance) + self.log_concave = False + + def _get_params(self): + return np.asarray(self.sigma2) + + def _get_param_names(self): + return ["t_noise_std2"] + + def _set_params(self, x): + self.sigma2 = float(x) + + @property + def variance(self, extra_data=None): + return (self.v / float(self.v - 2)) * self.sigma2 + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + #Careful gamma(big_number) is infinity! + objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) + / (np.sqrt(self.v * np.pi * self.sigma2))) + * ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1))) + ) + return np.prod(objective) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + objective = (+ gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + - 0.5*np.log(self.sigma2 * self.v * np.pi) + - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) + ) + return np.sum(objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / + ((e**2 + self.sigma2*self.v)**3) + ) + return d3lik_dlink3 + + def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) + return np.sum(dlogpdf_dvar) + + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + """ + Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} + + :param link_f: latent variables link_f + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) + / ((self.sigma2*self.v + (e**2))**3) + ) + return d2logpdf_dlink2_dvar + + def dlogpdf_link_dtheta(self, f, y, extra_data=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + return np.asarray([[dlogpdf_dvar]]) + + def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + return d2logpdf_dlink2_dvar + + def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None): + """ + Compute predictive variance of student_t*normal p(y*|f*)p(f*) + + Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*) + (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) + *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + """ + + #FIXME: Not correct + #We want the variance around test points y which comes from int p(y*|f*)p(f*) df* + #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] + #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this + #Which was also given to us as (var) + #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution + #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom + true_var = 1/(1/sigma**2 + 1/self.variance) + + return true_var + + def _predictive_mean_analytical(self, mu, sigma): + """ + Compute mean of the prediction + """ + #FIXME: Not correct + return mu + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + #FIXME: Very slow as we are computing a new random variable per input! + #Can't get it to sample all at the same time + #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) + dfs = np.ones_like(gp)*self.v + scales = np.ones_like(gp)*np.sqrt(self.sigma2) + student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), + scale=scales) + return student_t_samples.reshape(orig_shape)