diff --git a/GPy/likelihoods/noise_models/gamma_noise.py b/GPy/likelihoods/noise_models/gamma_noise.py index 5229cb4f..2e4e7d15 100644 --- a/GPy/likelihoods/noise_models/gamma_noise.py +++ b/GPy/likelihoods/noise_models/gamma_noise.py @@ -12,11 +12,11 @@ from noise_distributions import NoiseDistribution class Gamma(NoiseDistribution): """ Gamma likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): self.beta = beta @@ -25,26 +25,120 @@ class Gamma(NoiseDistribution): def _preprocess_values(self,Y): return Y - def _mass(self,gp,obs): + def pdf_link(self, link_f, y, extra_data=None): """ - Mass (or density) function + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) - alpha = self.gp_link.transf(gp)*self.beta - return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha) + alpha = link_f*self.beta + return (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha) - def _nlog_mass(self,gp,obs): + def logpdf_link(self, link_f, y, extra_data=None): """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ - alpha = self.gp_link.transf(gp)*self.beta - return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #alpha = self.gp_link.transf(gp)*self.beta + #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + alpha = link_f*self.beta + return alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y - def _dnlog_mass_dgp(self,gp,obs): - return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) - def _d2nlog_mass_dgp2(self,gp,obs): - return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta + #old + #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) + #old + #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) + return d3lik_dlink3 def _mean(self,gp): """ diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py index 155842fd..8d1466fb 100644 --- a/GPy/testing/likelihoods_tests.py +++ b/GPy/testing/likelihoods_tests.py @@ -84,10 +84,8 @@ class TestNoiseModels(object): self.f = np.random.rand(self.N, 1) self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] self.positive_Y = np.exp(self.Y.copy()) - self.integer_Y = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None] - self.integer_Y = np.where(self.integer_Y > 0, self.integer_Y, 0) - print self.integer_Y - print self.Y + tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None] + self.integer_Y = np.where(tmp > 0, tmp, 0) self.var = 0.2 @@ -234,6 +232,12 @@ class TestNoiseModels(object): "Y": self.integer_Y, "laplace": True, "ep": False #Should work though... + }, + "Gamma_default": { + "model": GPy.likelihoods.gamma(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True } }