mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 02:52:40 +02:00
Fixed bernoulli likelihood divide by 0 and log of 0
This commit is contained in:
parent
c788c463d8
commit
46ce76dee8
5 changed files with 33 additions and 20 deletions
|
|
@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
bern_noise_model = GPy.likelihoods.bernoulli()
|
likelihood = GPy.likelihoods.Bernoulli()
|
||||||
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
|
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||||
|
kernel = GPy.kern.rbf(1)
|
||||||
|
|
||||||
# Model definition
|
# Model definition
|
||||||
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
|
m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
|
||||||
print m
|
|
||||||
|
|
||||||
# Optimize
|
# Optimize
|
||||||
if optimize:
|
if optimize:
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
# Parameters optimization:
|
# Parameters optimization:
|
||||||
m.optimize('bfgs', messages=1)
|
try:
|
||||||
|
m.optimize('scg', messages=1)
|
||||||
|
except Exception as e:
|
||||||
|
return m
|
||||||
|
|
||||||
#m.pseudo_EM()
|
#m.pseudo_EM()
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ etc.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from exact_gaussian_inference import ExactGaussianInference
|
from exact_gaussian_inference import ExactGaussianInference
|
||||||
from laplace import LaplaceInference
|
from laplace import Laplace
|
||||||
expectation_propagation = 'foo' # TODO
|
expectation_propagation = 'foo' # TODO
|
||||||
from dtc import DTC
|
from dtc import DTC
|
||||||
from fitc import FITC
|
from fitc import FITC
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from posterior import Posterior
|
||||||
import warnings
|
import warnings
|
||||||
from scipy import optimize
|
from scipy import optimize
|
||||||
|
|
||||||
class LaplaceInference(object):
|
class Laplace(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -52,6 +52,7 @@ class LaplaceInference(object):
|
||||||
|
|
||||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
|
self.f_hat = f_hat
|
||||||
#Compute hessian and other variables at mode
|
#Compute hessian and other variables at mode
|
||||||
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,8 @@ class Bernoulli(Likelihood):
|
||||||
Each y_i must be in {0, 1}
|
Each y_i must be in {0, 1}
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
#objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
||||||
|
objective = np.where(y, link_f, 1.-link_f)
|
||||||
return np.exp(np.sum(np.log(objective)))
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, extra_data=None):
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
|
@ -136,7 +137,9 @@ class Bernoulli(Likelihood):
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
||||||
|
state = np.seterr(divide='ignore')
|
||||||
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
||||||
|
np.seterr(**state)
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
|
@ -155,7 +158,10 @@ class Bernoulli(Likelihood):
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
grad = (y/link_f) - (1.-y)/(1-link_f)
|
#grad = (y/link_f) - (1.-y)/(1-link_f)
|
||||||
|
state = np.seterr(divide='ignore')
|
||||||
|
grad = np.where(y, 1./link_f, -1./(1-link_f))
|
||||||
|
np.seterr(**state)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
|
@ -180,7 +186,10 @@ class Bernoulli(Likelihood):
|
||||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
||||||
|
state = np.seterr(divide='ignore')
|
||||||
|
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
|
||||||
|
np.seterr(**state)
|
||||||
return d2logpdf_dlink2
|
return d2logpdf_dlink2
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
|
@ -199,7 +208,10 @@ class Bernoulli(Likelihood):
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
||||||
|
state = np.seterr(divide='ignore')
|
||||||
|
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
|
||||||
|
np.seterr(**state)
|
||||||
return d3logpdf_dlink3
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
def samples(self, gp):
|
def samples(self, gp):
|
||||||
|
|
|
||||||
|
|
@ -516,9 +516,8 @@ class TestNoiseModels(object):
|
||||||
Y = Y/Y.max()
|
Y = Y/Y.max()
|
||||||
white_var = 1e-6
|
white_var = 1e-6
|
||||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
|
laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
|
||||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['white'].constrain_fixed(white_var)
|
m['white'].constrain_fixed(white_var)
|
||||||
|
|
||||||
#Set constraints
|
#Set constraints
|
||||||
|
|
@ -555,7 +554,6 @@ class TestNoiseModels(object):
|
||||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
ep_inf = GPy.inference.latent_function_inference.EP()
|
ep_inf = GPy.inference.latent_function_inference.EP()
|
||||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['white'].constrain_fixed(white_var)
|
m['white'].constrain_fixed(white_var)
|
||||||
|
|
||||||
for param_num in range(len(param_names)):
|
for param_num in range(len(param_names)):
|
||||||
|
|
@ -644,13 +642,11 @@ class LaplaceTests(unittest.TestCase):
|
||||||
m1['variance'] = initial_var_guess
|
m1['variance'] = initial_var_guess
|
||||||
m1['variance'].constrain_bounded(1e-4, 10)
|
m1['variance'].constrain_bounded(1e-4, 10)
|
||||||
m1['rbf'].constrain_bounded(1e-4, 10)
|
m1['rbf'].constrain_bounded(1e-4, 10)
|
||||||
m1.ensure_default_constraints()
|
|
||||||
m1.randomize()
|
m1.randomize()
|
||||||
|
|
||||||
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||||
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
||||||
m2.ensure_default_constraints()
|
|
||||||
m2['white'].constrain_fixed(1e-6)
|
m2['white'].constrain_fixed(1e-6)
|
||||||
m2['rbf'].constrain_bounded(1e-4, 10)
|
m2['rbf'].constrain_bounded(1e-4, 10)
|
||||||
m2['variance'].constrain_bounded(1e-4, 10)
|
m2['variance'].constrain_bounded(1e-4, 10)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue