diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index f9aaddd1..8637cc35 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot= Y = data['Y'][:, 0:1] Y[Y.flatten() == -1] = 0 - bern_noise_model = GPy.likelihoods.bernoulli() - laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model) + likelihood = GPy.likelihoods.Bernoulli() + laplace_inf = GPy.inference.latent_function_inference.Laplace() + kernel = GPy.kern.rbf(1) # Model definition - m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood) - print m + m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf) # Optimize if optimize: #m.update_likelihood_approximation() # Parameters optimization: - m.optimize('bfgs', messages=1) + try: + m.optimize('scg', messages=1) + except Exception as e: + return m + #m.pseudo_EM() # Plot diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index c89f771b..b114b8b9 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -9,12 +9,12 @@ prior over a finite set of points f. This prior is where K is the kernel matrix. We also have a likelihood (see GPy.likelihoods) which defines how the data are -related to the latent function: p(y | f). If the likelihood is also a Gaussian, -the inference over f is tractable (see exact_gaussian_inference.py). +related to the latent function: p(y | f). If the likelihood is also a Gaussian, +the inference over f is tractable (see exact_gaussian_inference.py). If the likelihood object is something other than Gaussian, then exact inference is not tractable. We then resort to a Laplace approximation (laplace.py) or -expectation propagation (ep.py). +expectation propagation (ep.py). The inference methods return a "Posterior" instance, which is a simple structure which contains a summary of the posterior. The model classes can then @@ -24,7 +24,7 @@ etc. """ from exact_gaussian_inference import ExactGaussianInference -from laplace import LaplaceInference +from laplace import Laplace expectation_propagation = 'foo' # TODO from dtc import DTC from fitc import FITC diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 4edb9a1d..1b7bdad8 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -17,7 +17,7 @@ from posterior import Posterior import warnings from scipy import optimize -class LaplaceInference(object): +class Laplace(object): def __init__(self): """ @@ -52,6 +52,7 @@ class LaplaceInference(object): f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) + self.f_hat = f_hat #Compute hessian and other variables at mode log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 00626cd3..388ce173 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -116,7 +116,8 @@ class Bernoulli(Likelihood): Each y_i must be in {0, 1} """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - objective = (link_f**y) * ((1.-link_f)**(1.-y)) + #objective = (link_f**y) * ((1.-link_f)**(1.-y)) + objective = np.where(y, link_f, 1.-link_f) return np.exp(np.sum(np.log(objective))) def logpdf_link(self, link_f, y, extra_data=None): @@ -136,7 +137,9 @@ class Bernoulli(Likelihood): """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape #objective = y*np.log(link_f) + (1.-y)*np.log(link_f) + state = np.seterr(divide='ignore') objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) + np.seterr(**state) return np.sum(objective) def dlogpdf_dlink(self, link_f, y, extra_data=None): @@ -155,7 +158,10 @@ class Bernoulli(Likelihood): :rtype: Nx1 array """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - grad = (y/link_f) - (1.-y)/(1-link_f) + #grad = (y/link_f) - (1.-y)/(1-link_f) + state = np.seterr(divide='ignore') + grad = np.where(y, 1./link_f, -1./(1-link_f)) + np.seterr(**state) return grad def d2logpdf_dlink2(self, link_f, y, extra_data=None): @@ -180,7 +186,10 @@ class Bernoulli(Likelihood): (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + state = np.seterr(divide='ignore') + d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) + np.seterr(**state) return d2logpdf_dlink2 def d3logpdf_dlink3(self, link_f, y, extra_data=None): @@ -199,7 +208,10 @@ class Bernoulli(Likelihood): :rtype: Nx1 array """ assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + state = np.seterr(divide='ignore') + d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) + np.seterr(**state) return d3logpdf_dlink3 def samples(self, gp): diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 9920d648..458831a0 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -516,9 +516,8 @@ class TestNoiseModels(object): Y = Y/Y.max() white_var = 1e-6 kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference() + laplace_likelihood = GPy.inference.latent_function_inference.Laplace() m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood) - m.ensure_default_constraints() m['white'].constrain_fixed(white_var) #Set constraints @@ -555,7 +554,6 @@ class TestNoiseModels(object): kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) ep_inf = GPy.inference.latent_function_inference.EP() m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf) - m.ensure_default_constraints() m['white'].constrain_fixed(white_var) for param_num in range(len(param_names)): @@ -644,13 +642,11 @@ class LaplaceTests(unittest.TestCase): m1['variance'] = initial_var_guess m1['variance'].constrain_bounded(1e-4, 10) m1['rbf'].constrain_bounded(1e-4, 10) - m1.ensure_default_constraints() m1.randomize() gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess) - laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() + laplace_inf = GPy.inference.latent_function_inference.Laplace() m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf) - m2.ensure_default_constraints() m2['white'].constrain_fixed(1e-6) m2['rbf'].constrain_bounded(1e-4, 10) m2['variance'].constrain_bounded(1e-4, 10)