Fixed bernoulli likelihood divide by 0 and log of 0

This commit is contained in:
Alan Saul 2014-02-12 16:48:57 +00:00
parent c788c463d8
commit 46ce76dee8
5 changed files with 33 additions and 20 deletions

View file

@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
bern_noise_model = GPy.likelihoods.bernoulli() likelihood = GPy.likelihoods.Bernoulli()
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model) laplace_inf = GPy.inference.latent_function_inference.Laplace()
kernel = GPy.kern.rbf(1)
# Model definition # Model definition
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood) m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
print m
# Optimize # Optimize
if optimize: if optimize:
#m.update_likelihood_approximation() #m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize('bfgs', messages=1) try:
m.optimize('scg', messages=1)
except Exception as e:
return m
#m.pseudo_EM() #m.pseudo_EM()
# Plot # Plot

View file

@ -9,12 +9,12 @@ prior over a finite set of points f. This prior is
where K is the kernel matrix. where K is the kernel matrix.
We also have a likelihood (see GPy.likelihoods) which defines how the data are We also have a likelihood (see GPy.likelihoods) which defines how the data are
related to the latent function: p(y | f). If the likelihood is also a Gaussian, related to the latent function: p(y | f). If the likelihood is also a Gaussian,
the inference over f is tractable (see exact_gaussian_inference.py). the inference over f is tractable (see exact_gaussian_inference.py).
If the likelihood object is something other than Gaussian, then exact inference If the likelihood object is something other than Gaussian, then exact inference
is not tractable. We then resort to a Laplace approximation (laplace.py) or is not tractable. We then resort to a Laplace approximation (laplace.py) or
expectation propagation (ep.py). expectation propagation (ep.py).
The inference methods return a "Posterior" instance, which is a simple The inference methods return a "Posterior" instance, which is a simple
structure which contains a summary of the posterior. The model classes can then structure which contains a summary of the posterior. The model classes can then
@ -24,7 +24,7 @@ etc.
""" """
from exact_gaussian_inference import ExactGaussianInference from exact_gaussian_inference import ExactGaussianInference
from laplace import LaplaceInference from laplace import Laplace
expectation_propagation = 'foo' # TODO expectation_propagation = 'foo' # TODO
from dtc import DTC from dtc import DTC
from fitc import FITC from fitc import FITC

View file

@ -17,7 +17,7 @@ from posterior import Posterior
import warnings import warnings
from scipy import optimize from scipy import optimize
class LaplaceInference(object): class Laplace(object):
def __init__(self): def __init__(self):
""" """
@ -52,6 +52,7 @@ class LaplaceInference(object):
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)

View file

@ -116,7 +116,8 @@ class Bernoulli(Likelihood):
Each y_i must be in {0, 1} Each y_i must be in {0, 1}
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
objective = (link_f**y) * ((1.-link_f)**(1.-y)) #objective = (link_f**y) * ((1.-link_f)**(1.-y))
objective = np.where(y, link_f, 1.-link_f)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None): def logpdf_link(self, link_f, y, extra_data=None):
@ -136,7 +137,9 @@ class Bernoulli(Likelihood):
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f) #objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
state = np.seterr(divide='ignore')
objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
np.seterr(**state)
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, extra_data=None):
@ -155,7 +158,10 @@ class Bernoulli(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = (y/link_f) - (1.-y)/(1-link_f) #grad = (y/link_f) - (1.-y)/(1-link_f)
state = np.seterr(divide='ignore')
grad = np.where(y, 1./link_f, -1./(1-link_f))
np.seterr(**state)
return grad return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, extra_data=None):
@ -180,7 +186,10 @@ class Bernoulli(Likelihood):
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
state = np.seterr(divide='ignore')
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
np.seterr(**state)
return d2logpdf_dlink2 return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@ -199,7 +208,10 @@ class Bernoulli(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
state = np.seterr(divide='ignore')
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3
def samples(self, gp): def samples(self, gp):

View file

@ -516,9 +516,8 @@ class TestNoiseModels(object):
Y = Y/Y.max() Y = Y/Y.max()
white_var = 1e-6 white_var = 1e-6
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference() laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood) m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
m.ensure_default_constraints()
m['white'].constrain_fixed(white_var) m['white'].constrain_fixed(white_var)
#Set constraints #Set constraints
@ -555,7 +554,6 @@ class TestNoiseModels(object):
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
ep_inf = GPy.inference.latent_function_inference.EP() ep_inf = GPy.inference.latent_function_inference.EP()
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf) m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
m.ensure_default_constraints()
m['white'].constrain_fixed(white_var) m['white'].constrain_fixed(white_var)
for param_num in range(len(param_names)): for param_num in range(len(param_names)):
@ -644,13 +642,11 @@ class LaplaceTests(unittest.TestCase):
m1['variance'] = initial_var_guess m1['variance'] = initial_var_guess
m1['variance'].constrain_bounded(1e-4, 10) m1['variance'].constrain_bounded(1e-4, 10)
m1['rbf'].constrain_bounded(1e-4, 10) m1['rbf'].constrain_bounded(1e-4, 10)
m1.ensure_default_constraints()
m1.randomize() m1.randomize()
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess) gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() laplace_inf = GPy.inference.latent_function_inference.Laplace()
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf) m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
m2.ensure_default_constraints()
m2['white'].constrain_fixed(1e-6) m2['white'].constrain_fixed(1e-6)
m2['rbf'].constrain_bounded(1e-4, 10) m2['rbf'].constrain_bounded(1e-4, 10)
m2['variance'].constrain_bounded(1e-4, 10) m2['variance'].constrain_bounded(1e-4, 10)