diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index 95247c03..e626c6a3 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -37,7 +37,7 @@ def exponential(gp_link=None): :param gp_link: a GPy gp_link function """ if gp_link is None: - gp_link = noise_models.gp_transformations.Identity() + gp_link = noise_models.gp_transformations.Log_ex_1() analytical_mean = False analytical_variance = False diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py index 450c11be..8e916353 100644 --- a/GPy/likelihoods/noise_models/exponential_noise.py +++ b/GPy/likelihoods/noise_models/exponential_noise.py @@ -24,24 +24,112 @@ class Exponential(NoiseDistribution): def _preprocess_values(self,Y): return Y - def _mass(self,gp,obs): + def pdf_link(self, link_f, y, extra_data=None): """ - Mass (or density) function - """ - return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) + Likelihood function given link(f) - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i})) - def _dnlog_mass_dgp(self,gp,obs): - return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.exp(np.sum(np.log(link_f*np.exp(-y*link_f)))) + #return np.exp(np.sum(-y/link_f - np.log(link_f) )) - def _d2nlog_mass_dgp2(self,gp,obs): - fgp = self.gp_link.transf(gp) - return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i}) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + logpdf_link = np.sum(np.log(link_f) - y*link_f) + #logpdf_link = np.sum(-np.log(link_f) - y/link_f) + return logpdf_link + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = 1./link_f - y + #grad = y/(link_f**2) - 1./link_f + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -1./(link_f**2) + #hess = -2*y/(link_f**3) + 1/(link_f**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2./(link_f**3) + #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) + return d3lik_dlink3 def _mean(self,gp): """ diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 3cd46013..165f8d2e 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -222,21 +222,12 @@ class NoiseDistribution(object): raise NotImplementedError def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): - """ - Need to check if it should even exist by checking length of getparams - """ raise NotImplementedError def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): - """ - Need to check if it should even exist by checking length of getparams - """ raise NotImplementedError def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): - """ - Need to check if it should even exist by checking length of getparams - """ raise NotImplementedError def pdf(self, f, y, extra_data=None): diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py index 7937a507..f268c644 100644 --- a/GPy/likelihoods/noise_models/student_t_noise.py +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -55,7 +55,7 @@ class StudentT(NoiseDistribution): :returns: likelihood evaluated for this point :rtype: float """ - assert np.asarray(link_f).shape == np.asarray(y).shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f #Careful gamma(big_number) is infinity! objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) @@ -80,7 +80,7 @@ class StudentT(NoiseDistribution): :rtype: float """ - assert np.asarray(link_f).shape == np.asarray(y).shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) @@ -105,7 +105,7 @@ class StudentT(NoiseDistribution): :rtype: Nx1 array """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad @@ -131,7 +131,7 @@ class StudentT(NoiseDistribution): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess @@ -151,7 +151,7 @@ class StudentT(NoiseDistribution): :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) @@ -173,7 +173,7 @@ class StudentT(NoiseDistribution): :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) return np.sum(dlogpdf_dvar) @@ -193,7 +193,7 @@ class StudentT(NoiseDistribution): :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlogpdf_dlink_dvar @@ -213,7 +213,7 @@ class StudentT(NoiseDistribution): :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert y.shape == link_f.shape + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape e = y - link_f d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) @@ -314,3 +314,19 @@ class StudentT(NoiseDistribution): p_025 = mu - p p_975 = mu + p return mu, np.nan*mu, p_025, p_975 + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param size: number of samples to compute + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + f = self.gp_link.transf(gp) + #student_t_samples = stats.t.rvs(self.v, loc=f, + #scale=np.sqrt(self.sigma2), + #size=(num_test_points, num_y_samples, num_f_samples)) + #Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp]) + return Ysim.reshape(orig_shape) diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py index fff5dcac..c3ea6a43 100644 --- a/GPy/testing/likelihoods_tests.py +++ b/GPy/testing/likelihoods_tests.py @@ -83,6 +83,7 @@ class TestNoiseModels(object): self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] self.f = np.random.rand(self.N, 1) self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] + self.positive_Y = np.exp(self.Y.copy()) self.var = 0.2 @@ -216,6 +217,12 @@ class TestNoiseModels(object): "laplace": True, "Y": self.binary_Y, "ep": True + }, + "Exponential_default": { + "model": GPy.likelihoods.exponential(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True, } }