Fixed bernoulli likelihood divide by 0 and log of 0

2026-05-07 02:52:40 +02:00 · 2014-02-12 16:48:57 +00:00 · 2014-02-12 16:48:57 +00:00 · 46ce76dee8
commit 46ce76dee8
parent c788c463d8
5 changed files with 33 additions and 20 deletions
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
-    bern_noise_model = GPy.likelihoods.bernoulli()
+    likelihood = GPy.likelihoods.Bernoulli()
-    laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
+    laplace_inf = GPy.inference.latent_function_inference.Laplace()
    kernel = GPy.kern.rbf(1)
    # Model definition
-    m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
+    m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
    print m
    # Optimize
    if optimize:
        #m.update_likelihood_approximation()
        # Parameters optimization:
-        m.optimize('bfgs', messages=1)
+        try:
            m.optimize('scg', messages=1)
        except Exception as e:
            return m
        #m.pseudo_EM()
    # Plot
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -9,12 +9,12 @@ prior over a finite set of points f. This prior is
 where K is the kernel matrix.
 We also have a likelihood (see GPy.likelihoods) which defines how the data are
-related to the latent function: p(y | f).  If the likelihood is also a Gaussian, 
+related to the latent function: p(y | f).  If the likelihood is also a Gaussian,
-the inference over f is tractable (see exact_gaussian_inference.py). 
+the inference over f is tractable (see exact_gaussian_inference.py).
 If the likelihood object is something other than Gaussian, then exact inference
 is not tractable. We then resort to a Laplace approximation (laplace.py) or
-expectation propagation (ep.py). 
+expectation propagation (ep.py).
 The inference methods return a "Posterior" instance, which is a simple
 structure which contains a summary of the posterior. The model classes can then
@ -24,7 +24,7 @@ etc.
 """
 from exact_gaussian_inference import ExactGaussianInference
-from laplace import LaplaceInference
+from laplace import Laplace
 expectation_propagation = 'foo' # TODO
 from dtc import DTC
 from fitc import FITC
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -17,7 +17,7 @@ from posterior import Posterior
 import warnings
 from scipy import optimize
-class LaplaceInference(object):
+class Laplace(object):
    def __init__(self):
        """
@ -52,6 +52,7 @@ class LaplaceInference(object):
        f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
        self.f_hat = f_hat
        #Compute hessian and other variables at mode
        log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -116,7 +116,8 @@ class Bernoulli(Likelihood):
            Each y_i must be in {0, 1}
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        objective = (link_f**y) * ((1.-link_f)**(1.-y))
+        #objective = (link_f**y) * ((1.-link_f)**(1.-y))
        objective = np.where(y, link_f, 1.-link_f)
        return np.exp(np.sum(np.log(objective)))
    def logpdf_link(self, link_f, y, extra_data=None):
@ -136,7 +137,9 @@ class Bernoulli(Likelihood):
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        #objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
        state = np.seterr(divide='ignore')
        objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
        np.seterr(**state)
        return np.sum(objective)
    def dlogpdf_dlink(self, link_f, y, extra_data=None):
@ -155,7 +158,10 @@ class Bernoulli(Likelihood):
        :rtype: Nx1 array
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        grad = (y/link_f) - (1.-y)/(1-link_f)
+        #grad = (y/link_f) - (1.-y)/(1-link_f)
        state = np.seterr(divide='ignore')
        grad = np.where(y, 1./link_f, -1./(1-link_f))
        np.seterr(**state)
        return grad
    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
@ -180,7 +186,10 @@ class Bernoulli(Likelihood):
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
+        #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
        state = np.seterr(divide='ignore')
        d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
        np.seterr(**state)
        return d2logpdf_dlink2
    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@ -199,7 +208,10 @@ class Bernoulli(Likelihood):
        :rtype: Nx1 array
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
+        #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
        state = np.seterr(divide='ignore')
        d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
        np.seterr(**state)
        return d3logpdf_dlink3
    def samples(self, gp):
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -516,9 +516,8 @@ class TestNoiseModels(object):
        Y = Y/Y.max()
        white_var = 1e-6
        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-        laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
        m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
        m.ensure_default_constraints()
        m['white'].constrain_fixed(white_var)
        #Set constraints
@ -555,7 +554,6 @@ class TestNoiseModels(object):
        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        ep_inf = GPy.inference.latent_function_inference.EP()
        m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
        m.ensure_default_constraints()
        m['white'].constrain_fixed(white_var)
        for param_num in range(len(param_names)):
@ -644,13 +642,11 @@ class LaplaceTests(unittest.TestCase):
        m1['variance'] = initial_var_guess
        m1['variance'].constrain_bounded(1e-4, 10)
        m1['rbf'].constrain_bounded(1e-4, 10)
        m1.ensure_default_constraints()
        m1.randomize()
        gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
-        laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_inf = GPy.inference.latent_function_inference.Laplace()
        m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
        m2.ensure_default_constraints()
        m2['white'].constrain_fixed(1e-6)
        m2['rbf'].constrain_bounded(1e-4, 10)
        m2['variance'].constrain_bounded(1e-4, 10)