From 0eee4b42d23aae7f4fa861dc8fe5e6bee2c4cd91 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 18 Oct 2013 14:08:37 +0100
Subject: [PATCH] Fixed a few laplace bits

---
 GPy/examples/classification.py                | 37 ++++++++++++++++++-
 GPy/likelihoods/laplace.py                    | 15 +++++---
 .../noise_models/bernoulli_noise.py           | 26 +++----------
 .../noise_models/student_t_noise.py           |  3 +-
 4 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 0630537b..38559105 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -43,7 +43,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
 
 def toy_linear_1d_classification(seed=default_seed):
     """
-    Simple 1D classification example
+    Simple 1D classification example using EP approximation
 
     :param seed: seed value for data generation (default is 4).
     :type seed: int
@@ -71,6 +71,41 @@ def toy_linear_1d_classification(seed=default_seed):
 
     return m
 
+def toy_linear_1d_classification_laplace(seed=default_seed):
+    """
+    Simple 1D classification example using Laplace approximation
+
+    :param seed: seed value for data generation (default is 4).
+    :type seed: int
+
+    """
+
+    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
+    Y = data['Y'][:, 0:1]
+    Y[Y.flatten() == -1] = 0
+
+    bern_noise_model = GPy.likelihoods.bernoulli()
+    laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
+
+    # Model definition
+    m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
+
+    print m
+    # Optimize
+    #m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize(messages=1)
+    #m.pseudo_EM()
+
+    # Plot
+    fig, axes = pb.subplots(2,1)
+    m.plot_f(ax=axes[0])
+    m.plot(ax=axes[1])
+    print(m)
+
+    return m
+
+
 def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
     """
     Sparse 1D classification example
diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index 33594da8..e6ffd78c 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -1,6 +1,14 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-
+#
+#Parts of this file were influenced by the Matlab GPML framework written by
+#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
+#
+#The GPML code is released under the FreeBSD License.
+#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
+#
+#The code and associated documentation is available from
+#http://gaussianprocess.org/gpml/code.
 
 import numpy as np
 import scipy as sp
@@ -32,7 +40,6 @@ class Laplace(likelihood):
         :param noise_model: likelihood function - subclass of noise_model
         :type noise_model: noise_model
         :param extra_data: additional data used by some likelihood functions,
-                           for example survival likelihoods need censoring data
         """
         self.data = data
         self.noise_model = noise_model
@@ -125,7 +132,6 @@ class Laplace(likelihood):
 
         #len(dlik_dthetaL)
         num_params = len(self._get_param_names())
-        print num_params
         # make space for one derivative for each likelihood parameter
         dL_dthetaL = np.zeros(num_params)
         for thetaL_i in range(num_params):
@@ -140,7 +146,6 @@ class Laplace(likelihood):
             dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
             dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
 
-        print dL_dthetaL
         return dL_dthetaL
 
     def _compute_GP_variables(self):
@@ -265,7 +270,7 @@ class Laplace(likelihood):
         ln_B_det = 2*np.sum(np.log(np.diag(L)))
         return W12BiW12, ln_B_det
 
-    def rasm_mode(self, K, MAX_ITER=100):
+    def rasm_mode(self, K, MAX_ITER=30):
         """
         Rasmussen's numerically stable mode finding
         For nomenclature see Rasmussen & Williams 2006
diff --git a/GPy/likelihoods/noise_models/bernoulli_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py
index fc7c5011..7ef8aa82 100644
--- a/GPy/likelihoods/noise_models/bernoulli_noise.py
+++ b/GPy/likelihoods/noise_models/bernoulli_noise.py
@@ -58,6 +58,8 @@ class Bernoulli(NoiseDistribution):
             sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
             if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
                 stop
+        else:
+            raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
 
         return Z_hat, mu_hat, sigma2_hat
 
@@ -75,24 +77,6 @@ class Bernoulli(NoiseDistribution):
         else:
             raise NotImplementedError
 
-    def _mass(self,gp,obs):
-        #NOTE obs must be in {0,1}
-        p = self.gp_link.transf(gp)
-        return p**obs * (1.-p)**(1.-obs)
-
-    def _nlog_mass(self,gp,obs):
-        p = self.gp_link.transf(gp)
-        return obs*np.log(p) + (1.-obs)*np.log(1-p)
-
-    def _dnlog_mass_dgp(self,gp,obs):
-        p = self.gp_link.transf(gp)
-        dp = self.gp_link.dtransf_df(gp)
-        return obs/p * dp - (1.-obs)/(1.-p) * dp
-
-    def _d2nlog_mass_dgp2(self,gp,obs):
-        p = self.gp_link.transf(gp)
-        return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
-
     def pdf_link(self, link_f, y, extra_data=None):
         """
         Likelihood function given link(f)
@@ -109,7 +93,7 @@ class Bernoulli(NoiseDistribution):
         :rtype: float
 
         .. Note:
-            Each y_{i} must be in {0,1}
+            Each y_i must be in {0,1}
         """
         assert np.asarray(link_f).shape == np.asarray(y).shape
         objective = (link_f**y) * ((1.-link_f)**(1.-y))
@@ -131,7 +115,8 @@ class Bernoulli(NoiseDistribution):
         :rtype: float
         """
         assert np.asarray(link_f).shape == np.asarray(y).shape
-        objective = np.log(link_f**y) + np.log((1.-link_f)**(1.-y))
+        #objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
+        objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
         return np.sum(objective)
 
     def dlogpdf_dlink(self, link_f, y, extra_data=None):
@@ -222,7 +207,6 @@ class Bernoulli(NoiseDistribution):
     def _d2variance_dgp2(self,gp):
         return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
 
-
     def samples(self, gp):
         """
         Returns a set of samples of observations based on a given value of the latent variable.
diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py
index 56f42ab2..49de781f 100644
--- a/GPy/likelihoods/noise_models/student_t_noise.py
+++ b/GPy/likelihoods/noise_models/student_t_noise.py
@@ -233,7 +233,7 @@ class StudentT(NoiseDistribution):
 
     def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
         """
-        Compute  mean, and conficence interval (percentiles 5 and 95) of the prediction
+        Compute predictive variance of student_t*normal p(y*|f*)p(f*)
 
         Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
         (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
@@ -313,4 +313,3 @@ class StudentT(NoiseDistribution):
         p_025 = mu - p
         p_975 = mu + p
         return mu, np.nan*mu, p_025, p_975
-