Reimplemented gradients for exponential, seems to work for

laplace now, needs a visual test though
2026-07-08 16:12:15 +02:00 · 2013-10-25 12:21:11 +01:00 · 2013-10-25 12:21:11 +01:00 · 8ef3625832
commit 8ef3625832
parent a46121c430
5 changed files with 134 additions and 32 deletions
--- a/GPy/likelihoods/noise_model_constructors.py
+++ b/GPy/likelihoods/noise_model_constructors.py
@ -37,7 +37,7 @@ def exponential(gp_link=None):
    :param gp_link: a GPy gp_link function
    """
    if gp_link is None:
-        gp_link = noise_models.gp_transformations.Identity()
+        gp_link = noise_models.gp_transformations.Log_ex_1()

    analytical_mean = False
    analytical_variance = False
--- a/GPy/likelihoods/noise_models/exponential_noise.py
+++ b/GPy/likelihoods/noise_models/exponential_noise.py
@ -24,24 +24,112 @@ class Exponential(NoiseDistribution):
    def _preprocess_values(self,Y):
        return Y

-    def _mass(self,gp,obs):
+    def pdf_link(self, link_f, y, extra_data=None):
        """
-        Mass (or density) function
-        """
-        return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
+        Likelihood function given link(f)

-    def _nlog_mass(self,gp,obs):
-        """
-        Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
-        """
-        return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
+        .. math::
+            p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))

-    def _dnlog_mass_dgp(self,gp,obs):
-        return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
+        :param link_f: latent variables link(f)
+        :type link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param extra_data: extra_data which is not used in exponential distribution
+        :returns: likelihood evaluated for this point
+        :rtype: float
+        """
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
+        return np.exp(np.sum(np.log(link_f*np.exp(-y*link_f))))
+        #return np.exp(np.sum(-y/link_f - np.log(link_f) ))

-    def _d2nlog_mass_dgp2(self,gp,obs):
-        fgp = self.gp_link.transf(gp)
-        return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
+    def logpdf_link(self, link_f, y, extra_data=None):
+        """
+        Log Likelihood Function given link(f)
+
+        .. math::
+            \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
+
+        :param link_f: latent variables (link(f))
+        :type link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param extra_data: extra_data which is not used in exponential distribution
+        :returns: likelihood evaluated for this point
+        :rtype: float
+
+        """
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
+        logpdf_link = np.sum(np.log(link_f) - y*link_f)
+        #logpdf_link = np.sum(-np.log(link_f) - y/link_f)
+        return logpdf_link
+
+    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+        """
+        Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
+
+        .. math::
+            \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
+
+        :param link_f: latent variables (f)
+        :type link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param extra_data: extra_data which is not used in exponential distribution
+        :returns: gradient of likelihood evaluated at points
+        :rtype: Nx1 array
+
+        """
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
+        grad = 1./link_f - y
+        #grad = y/(link_f**2) - 1./link_f
+        return grad
+
+    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+        """
+        Hessian at y, given link(f), w.r.t link(f)
+        i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
+        The hessian will be 0 unless i == j
+
+        .. math::
+            \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
+
+        :param link_f: latent variables link(f)
+        :type link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param extra_data: extra_data which is not used in exponential distribution
+        :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
+        :rtype: Nx1 array
+
+        .. Note::
+            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
+            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
+        """
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
+        hess = -1./(link_f**2)
+        #hess = -2*y/(link_f**3) + 1/(link_f**2)
+        return hess
+
+    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+        """
+        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
+
+        .. math::
+            \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
+
+        :param link_f: latent variables link(f)
+        :type link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param extra_data: extra_data which is not used in exponential distribution
+        :returns: third derivative of likelihood evaluated at points f
+        :rtype: Nx1 array
+        """
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
+        d3lik_dlink3 = 2./(link_f**3)
+        #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
+        return d3lik_dlink3

    def _mean(self,gp):
        """
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@ -222,21 +222,12 @@ class NoiseDistribution(object):
        raise NotImplementedError

    def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
-        """
-        Need to check if it should even exist by checking length of getparams
-        """
        raise NotImplementedError

    def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
-        """
-        Need to check if it should even exist by checking length of getparams
-        """
        raise NotImplementedError

    def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
-        """
-        Need to check if it should even exist by checking length of getparams
-        """
        raise NotImplementedError

    def pdf(self, f, y, extra_data=None):
--- a/GPy/likelihoods/noise_models/student_t_noise.py
+++ b/GPy/likelihoods/noise_models/student_t_noise.py
@ -55,7 +55,7 @@ class StudentT(NoiseDistribution):
        :returns: likelihood evaluated for this point
        :rtype: float
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        #Careful gamma(big_number) is infinity!
        objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
@ -80,7 +80,7 @@ class StudentT(NoiseDistribution):
        :rtype: float

        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        objective = (+ gammaln((self.v + 1) * 0.5)
                     - gammaln(self.v * 0.5)
@ -105,7 +105,7 @@ class StudentT(NoiseDistribution):
        :rtype: Nx1 array

        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
        return grad
@ -131,7 +131,7 @@ class StudentT(NoiseDistribution):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
        return hess
@ -151,7 +151,7 @@ class StudentT(NoiseDistribution):
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
                       ((e**2 + self.sigma2*self.v)**3)
@ -173,7 +173,7 @@ class StudentT(NoiseDistribution):
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: float
        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
        return np.sum(dlogpdf_dvar)
@ -193,7 +193,7 @@ class StudentT(NoiseDistribution):
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
        return dlogpdf_dlink_dvar
@ -213,7 +213,7 @@ class StudentT(NoiseDistribution):
        :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert y.shape == link_f.shape
+        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        e = y - link_f
        d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                              / ((self.sigma2*self.v + (e**2))**3)
@ -314,3 +314,19 @@ class StudentT(NoiseDistribution):
        p_025 = mu - p
        p_975 = mu + p
        return mu, np.nan*mu, p_025, p_975
+
+    def samples(self, gp):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param size: number of samples to compute
+        :param gp: latent variable
+        """
+        orig_shape = gp.shape
+        gp = gp.flatten()
+        f = self.gp_link.transf(gp)
+        #student_t_samples = stats.t.rvs(self.v, loc=f,
+                                        #scale=np.sqrt(self.sigma2),
+                                        #size=(num_test_points, num_y_samples, num_f_samples))
+        #Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
+        return Ysim.reshape(orig_shape)
--- a/GPy/testing/likelihoods_tests.py
+++ b/GPy/testing/likelihoods_tests.py
@ -83,6 +83,7 @@ class TestNoiseModels(object):
        self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
        self.f = np.random.rand(self.N, 1)
        self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
+        self.positive_Y = np.exp(self.Y.copy())

        self.var = 0.2

@ -216,6 +217,12 @@ class TestNoiseModels(object):
                            "laplace": True,
                            "Y": self.binary_Y,
                            "ep": True
+                            },
+                        "Exponential_default": {
+                            "model": GPy.likelihoods.exponential(),
+                            "link_f_constraints": [constrain_positive],
+                            "Y": self.positive_Y,
+                            "laplace": True,
                        }
                    }