Merge from upstream

2026-05-15 06:52:39 +02:00 · 2015-03-13 14:23:40 +00:00 · 2015-03-13 14:23:40 +00:00 · 7930eb646f
commit 7930eb646f
parent f6b71629e7 59bd0d3b6e
16 changed files with 214 additions and 103 deletions
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -6,3 +6,5 @@ from .poisson import Poisson
 from .student_t import StudentT
 from .likelihood import Likelihood
 from .mixed_noise import MixedNoise
+from .binomial import Binomial
+
--- a/GPy/likelihoods/binomial.py
+++ b/GPy/likelihoods/binomial.py
@ -0,0 +1,125 @@
+# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+from . import link_functions
+from .likelihood import Likelihood
+from scipy import special
+
+class Binomial(Likelihood):
+    """
+    Binomial likelihood
+
+    .. math::
+        p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
+
+    .. Note::
+        Y takes values in either {-1, 1} or {0, 1}.
+        link function should have the domain [0, 1], e.g. probit (default) or Heaviside
+
+    .. See also::
+        likelihood.py, for the parent class
+    """
+    def __init__(self, gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Probit()
+
+        super(Binomial, self).__init__(gp_link, 'Binomial')
+
+    def conditional_mean(self, gp, Y_metadata):
+        return self.gp_link(gp)*Y_metadata['trials']
+
+    def pdf_link(self, inv_link_f, y, Y_metadata):
+        """
+        Likelihood function given inverse link of f.
+
+        .. math::
+            p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: likelihood evaluated for this point
+        :rtype: float
+
+        .. Note:
+            Each y_i must be in {0, 1}
+        """
+        return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata))
+
+    def logpdf_link(self, inv_link_f, y, Y_metadata=None):
+        """
+        Log Likelihood function given inverse link of f.
+
+        .. math::
+            \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: log likelihood evaluated at points inverse link of f.
+        :rtype: float
+        """
+        N = Y_metadata['trials']
+        nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
+
+        return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
+
+    def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
+        """
+        Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: gradient of log likelihood evaluated at points inverse link of f.
+        :rtype: Nx1 array
+        """
+        N = Y_metadata['trials']
+        return y/inv_link_f - (N-y)/(1-inv_link_f)
+
+    def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
+        """
+        Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
+        i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j  w.r.t inverse link of f_i and inverse link of f_j.
+
+
+        .. math::
+            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata not used in binomial
+        :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
+        :rtype: Nx1 array
+
+        .. Note::
+            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
+            (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
+        """
+        N = Y_metadata['trials']
+        return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
+
+    def samples(self, gp, Y_metadata=None):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param gp: latent variable
+        """
+        orig_shape = gp.shape
+        gp = gp.flatten()
+        N = Y_metadata['trials']
+        Ysim = np.random.binomial(N, self.gp_link.transf(gp))
+        return Ysim.reshape(orig_shape)
+
+    def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
+        pass
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@ -57,9 +57,8 @@ class Exponential(Likelihood):
        :rtype: float

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        log_objective = np.log(link_f) - y*link_f
-        return np.sum(log_objective)
+        return log_objective

    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
@ -77,7 +76,6 @@ class Exponential(Likelihood):
        :rtype: Nx1 array

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        grad = 1./link_f - y
        #grad = y/(link_f**2) - 1./link_f
        return grad
@ -103,7 +101,6 @@ class Exponential(Likelihood):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        hess = -1./(link_f**2)
        #hess = -2*y/(link_f**3) + 1/(link_f**2)
        return hess
@ -123,7 +120,6 @@ class Exponential(Likelihood):
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        d3lik_dlink3 = 2./(link_f**3)
        #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
        return d3lik_dlink3
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@ -66,12 +66,11 @@ class Gamma(Likelihood):
        :rtype: float

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        #alpha = self.gp_link.transf(gp)*self.beta
        #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
        alpha = link_f*self.beta
        log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
-        return np.sum(log_objective)
+        return log_objective

    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
@ -90,7 +89,6 @@ class Gamma(Likelihood):
        :rtype: Nx1 array

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
        #old
        #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
@ -118,7 +116,6 @@ class Gamma(Likelihood):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
        #old
        #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
@ -140,6 +137,5 @@ class Gamma(Likelihood):
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
        return d3lik_dlink3
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -130,11 +130,10 @@ class Gaussian(Likelihood):
        :returns: log likelihood evaluated for this point
        :rtype: float
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        N = y.shape[0]
-        ln_det_cov = N*np.log(self.variance)
+        ln_det_cov = np.log(self.variance)

-        return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
+        return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))

    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
@ -151,8 +150,7 @@ class Gaussian(Likelihood):
        :returns: gradient of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
-        s2_i = (1.0/self.variance)
+        s2_i = 1.0/self.variance
        grad = s2_i*y - s2_i*link_f
        return grad

@ -178,9 +176,9 @@ class Gaussian(Likelihood):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        N = y.shape[0]
-        hess = -(1.0/self.variance)*np.ones((N, 1))
+        D = link_f.shape[1]
+        hess = -(1.0/self.variance)*np.ones((N, D))
        return hess

    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
@ -198,9 +196,9 @@ class Gaussian(Likelihood):
        :returns: third derivative of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        N = y.shape[0]
-        d3logpdf_dlink3 = np.zeros((N,1))
+        D = link_f.shape[1]
+        d3logpdf_dlink3 = np.zeros((N,D))
        return d3logpdf_dlink3

    def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
@ -218,12 +216,11 @@ class Gaussian(Likelihood):
        :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
        :rtype: float
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        e = y - link_f
        s_4 = 1.0/(self.variance**2)
        N = y.shape[0]
-        dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
-        return np.sum(dlik_dsigma) # Sure about this sum?
+        dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
+        return dlik_dsigma

    def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
        """
@ -240,7 +237,6 @@ class Gaussian(Likelihood):
        :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        s_4 = 1.0/(self.variance**2)
        dlik_grad_dsigma = -s_4*y + s_4*link_f
        return dlik_grad_dsigma
@ -260,15 +256,15 @@ class Gaussian(Likelihood):
        :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
        s_4 = 1.0/(self.variance**2)
        N = y.shape[0]
-        d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
+        D = link_f.shape[1]
+        d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
        return d2logpdf_dlink2_dvar

    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
-        return np.asarray([[dlogpdf_dvar]])
+        return dlogpdf_dvar

    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -131,7 +131,7 @@ class Likelihood(Parameterized):

        return z, mean, variance

-    def variational_expectations(self, Y, m, v, gh_points=None):
+    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
        """
        Use Gauss-Hermite Quadrature to compute

@ -158,9 +158,9 @@ class Likelihood(Parameterized):

        #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
        # broadcast needs to be handled carefully.
-        logp = self.logpdf(X,Y[:,None])
-        dlogp_dx = self.dlogpdf_df(X, Y[:,None])
-        d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
+        logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata)
+        dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata)
+        d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata)

        #clipping for numerical stability
        #logp = np.clip(logp,-1e9,1e9)
@ -425,7 +425,7 @@ class Likelihood(Parameterized):
            return np.zeros([f.shape[0], 0])

    def _laplace_gradients(self, f, y, Y_metadata=None):
-        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)

--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@ -64,8 +64,7 @@ class Poisson(Likelihood):
        :rtype: float

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
+        return -link_f + y*np.log(link_f) - special.gammaln(y+1)

    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
@ -83,7 +82,6 @@ class Poisson(Likelihood):
        :rtype: Nx1 array

        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        return y/link_f - 1

    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
@ -107,12 +105,7 @@ class Poisson(Likelihood):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        hess = -y/(link_f**2)
-        return hess
-        #d2_df = self.gp_link.d2transf_df2(gp)
-        #transf = self.gp_link.transf(gp)
-        #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
+        return -y/(link_f**2)

    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
@ -129,7 +122,6 @@ class Poisson(Likelihood):
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        d3lik_dlink3 = 2*y/(link_f)**3
        return d3lik_dlink3

--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@ -86,7 +86,6 @@ class StudentT(Likelihood):
        :rtype: float

        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        #FIXME:
        #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
@ -97,7 +96,7 @@ class StudentT(Likelihood):
                    - 0.5*np.log(self.sigma2 * self.v * np.pi)
                    - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
                    )
-        return np.sum(objective)
+        return objective

    def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
        """
@ -115,7 +114,6 @@ class StudentT(Likelihood):
        :rtype: Nx1 array

        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
        return grad
@ -141,7 +139,6 @@ class StudentT(Likelihood):
            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
        return hess
@ -161,7 +158,6 @@ class StudentT(Likelihood):
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
                       ((e**2 + self.sigma2*self.v)**3)
@ -183,10 +179,9 @@ class StudentT(Likelihood):
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: float
        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
-        return np.sum(dlogpdf_dvar)
+        return dlogpdf_dvar

    def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
        """
@ -203,7 +198,6 @@ class StudentT(Likelihood):
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
        return dlogpdf_dlink_dvar
@ -223,7 +217,6 @@ class StudentT(Likelihood):
        :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
        :rtype: Nx1 array
        """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        e = y - inv_link_f
        d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                              / ((self.sigma2*self.v + (e**2))**3)
@ -246,7 +239,7 @@ class StudentT(Likelihood):
        return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))

    def predictive_mean(self, mu, sigma, Y_metadata=None):
-        # The comment here confuses mean and median. 
+        # The comment here confuses mean and median.
        return self.gp_link.transf(mu) # only true if link is monotonic, which it is.

    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):