From 2b40ee6f7e952b11405d6e2434995c68c9ac71da Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Thu, 31 Jan 2013 15:30:57 +0000
Subject: [PATCH 1/4] predictive_values implemented in EP

---
 GPy/likelihoods/EP.py                   | 11 ++++++++++-
 GPy/likelihoods/Gaussian.py             | 10 ++++++++++
 GPy/likelihoods/likelihood_functions.py |  8 ++++----
 GPy/models/GP.py                        |  3 ++-
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index 3e975436..b557a62f 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -18,7 +18,6 @@ class EP:
         self.likelihood_function = likelihood_function
         self.epsilon = epsilon
         self.eta, self.delta = power_ep
-        self.jitter = 1e-12 # TODO: is this needed?
 
         """
         Initial values - Likelihood approximation parameters:
@@ -27,6 +26,16 @@ class EP:
         self.tau_tilde = np.zeros(self.N)
         self.v_tilde = np.zeros(self.N)
 
+    def predictive_values(self,mu,var):
+        return self.likelihood_function.predictive_values(mu,var)
+
+    def _get_params(self):
+        return np.zeros(0)
+    def _get_param_names(self):
+        return []
+    def _set_params(self,p):
+        pass # TODO: the EP likelihood might want to take some parameters...
+
     def _compute_GP_variables(self):
         #Variables to be called from GP
         mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index fe954b78..37132cf0 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -29,6 +29,16 @@ class Gaussian:
         self._variance = x
         self.variance = np.eye(self.N)*self._variance
 
+    def predictive_values(self,mu,var):
+        """
+        Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
+        """
+        mean = mu*self._std + self._mean
+        true_var = (var + self._variance)*self._std**2
+        _5pc = mean + mean - 2.*np.sqrt(var)
+        _95pc = mean + 2.*np.sqrt(var)
+        return mean, _5pc, _95pc
+
     def fit(self):
         """
         No approximations needed
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index b94929d3..68fd276a 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -49,7 +49,7 @@ class probit(likelihood):
 
     def predictive_values(self,mu,var,all=False):
         """
-        Compute  mean, variance, and conficence interval (percentiles 5 and 95) of the  prediction
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
         """
         mu = mu.flatten()
         var = var.flatten()
@@ -57,7 +57,7 @@ class probit(likelihood):
         if all:
             p_05 = np.zeros([mu.size])
             p_95 = np.ones([mu.size])
-            return mean, mean*(1-mean),p_05,p_95
+            return mean, p_05, p_95
         else:
             return mean
 
@@ -136,14 +136,14 @@ class poisson(likelihood):
 
     def predictive_values(self,mu,var,all=False):
         """
-        Compute  mean, variance, and conficence interval (percentiles 5 and 95) of the  prediction
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
         """
         mean = np.exp(mu*self.scale + self.location)
         if all:
             tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
             p_05 = tmp[:,0]
             p_95 = tmp[:,1]
-            return mean,mean,p_05,p_95
+            return mean,p_05,p_95
         else:
             return mean
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index b663ad3e..d20aa290 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -164,9 +164,10 @@ class GP(model):
         """
         #normalise X values
         Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var, phi = self._raw_predict(Xnew, slices, full_cov=full_cov)
+        mu, var = self._raw_predict(Xnew, slices, full_cov=full_cov)
 
         #now push through likelihood TODO
+        mean, _5pc, _95pc = self.likelihood.predictive_values(mu, var)
 
         return mean, _5pc, _95pc
 

From 182c4c7d64d9e85191f0aac45e67c77857b63cf7 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Fri, 1 Feb 2013 13:17:17 +0000
Subject: [PATCH 2/4] So many changes

---
 GPy/core/model.py                       |  9 +--
 GPy/examples/ep_fix.py                  | 50 +++++++-------
 GPy/likelihoods/EP.py                   | 31 ++++++---
 GPy/likelihoods/Gaussian.py             |  2 +-
 GPy/likelihoods/likelihood_functions.py | 74 ++++----------------
 GPy/models/GP.py                        | 89 ++++++++++++++-----------
 GPy/util/plot.py                        |  2 +
 7 files changed, 118 insertions(+), 139 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index c7b61a32..f26bf2ee 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
+from .. import likelihoods
 
 class model(parameterised):
     def __init__(self):
@@ -401,7 +402,7 @@ class model(parameterised):
         :type optimzer: string TODO: valid strings?
 
         """
-        assert self.EP, "EM is not available for gaussian likelihood"
+        assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
         log_change = epsilon + 1.
         self.log_likelihood_record = []
         self.gp_params_record = []
@@ -410,18 +411,18 @@ class model(parameterised):
         last_value = -np.exp(1000)
         while log_change > epsilon or not iteration:
             print 'EM iteration %s' %iteration
-            self.approximate_likelihood()
+            self.update_likelihood_approximation()
             self.optimize(**kwargs)
             new_value = self.log_likelihood()
             log_change = new_value - last_value
             if log_change > epsilon:
                 self.log_likelihood_record.append(new_value)
                 self.gp_params_record.append(self._get_params())
-                self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
+                #self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
                 last_value = new_value
             else:
                 convergence = False
-                self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
+                #self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
                 self._set_params(self.gp_params_record[-1])
                 print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
             iteration += 1
diff --git a/GPy/examples/ep_fix.py b/GPy/examples/ep_fix.py
index 8041cc91..83a58bf8 100644
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@@ -3,7 +3,8 @@
 
 
 """
-Simple Gaussian Processes classification
+Simple Gaussian Processes classification 1D
+Probit likelihood
 """
 import pylab as pb
 import numpy as np
@@ -12,28 +13,31 @@ pb.ion()
 
 pb.close('all')
 
-model_type='Full'
-inducing=4
-"""Simple 1D classification example.
-:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
-:param seed : seed value for data generation (default is 4).
-:type seed: int
-:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-:type inducing: int
-"""
-data = GPy.util.datasets.toy_linear_1d_classification(seed=0)
-likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+# Inputs
+N = 30
+X1 = np.random.normal(5,2,N/2)
+X2 = np.random.normal(10,2,N/2)
+X = np.hstack([X1,X2])[:,None]
 
-m = GPy.models.GP(data['X'],likelihood=likelihood)
-#m = GPy.models.GP(data['X'],likelihood.Y)
+# Outputs
+Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
+
+# Kernel object
+kernel = GPy.kern.rbf(1)
+
+# Define likelihood
+distribution = GPy.likelihoods.likelihood_functions.Probit()
+likelihood_object = GPy.likelihoods.EP(Y,distribution)
+
+# Model definition
+m = GPy.models.GP(X,kernel,likelihood=likelihood_object)
 m.ensure_default_constraints()
+m.update_likelihood_approximation()
+#m.checkgrad(verbose=1)
+m.optimize()
+print "Round 2"
+m.update_likelihood_approximation()
 
-# Optimize and plot
-#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
-#    m.approximate_likelihood()
-#m.optimize()
-m.EM()
-
-print m.log_likelihood()
-m.plot(samples=3)
-print(m)
+#m.EPEM()
+#m.plot()
+#print(m)
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index b557a62f..bec81436 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -1,7 +1,7 @@
 import numpy as np
 import random
 from scipy import stats, linalg
-from ..core import model
+#from ..core import model
 from ..util.linalg import pdinv,mdot,jitchol
 from ..util.plot import gpplot
 
@@ -18,6 +18,8 @@ class EP:
         self.likelihood_function = likelihood_function
         self.epsilon = epsilon
         self.eta, self.delta = power_ep
+        self.data = data
+        self.N = self.data.size
 
         """
         Initial values - Likelihood approximation parameters:
@@ -26,6 +28,12 @@ class EP:
         self.tau_tilde = np.zeros(self.N)
         self.v_tilde = np.zeros(self.N)
 
+        #initial values for the GP variables
+        self.Y = np.zeros((self.N,1))
+        self.variance = np.zeros((self.N,self.N))#np.eye(self.N)
+        self.Z = 0
+        self.YYT = None
+
     def predictive_values(self,mu,var):
         return self.likelihood_function.predictive_values(mu,var)
 
@@ -35,6 +43,8 @@ class EP:
         return []
     def _set_params(self,p):
         pass # TODO: the EP likelihood might want to take some parameters...
+    def _gradients(self,partial):
+        return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
 
     def _compute_GP_variables(self):
         #Variables to be called from GP
@@ -42,7 +52,8 @@ class EP:
         sigma_sum = 1./self.tau_ + 1./self.tau_tilde
         mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
         Z_ep = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant
-        self.Y, self.beta, self.Z = self.tau_tilde[:,None], mu_tilde[:,None], Z_ep
+        self.Y, self.beta, self.Z =  mu_tilde[:,None],self.tau_tilde[:,None], Z_ep
+        self.variance = np.diag(1./self.beta.flatten())
 
     def fit_full(self,K):
         """
@@ -53,7 +64,7 @@ class EP:
 
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
         self.mu = np.zeros(self.N)
-        self.Sigma = K.copy()
+        self.Sigma = K.copy() - self.variance.copy()
 
         """
         Initial values - Cavity distribution parameters:
@@ -78,14 +89,14 @@ class EP:
         self.np1 = [self.tau_tilde.copy()]
         self.np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.arange(self.N)
-            random.shuffle(update_order)
+            update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
                 self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
+                print 1./self.Sigma[i,i],self.tau_tilde[i]
                 #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
                 Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
                 Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
@@ -96,6 +107,7 @@ class EP:
                 self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
                 self.mu = np.dot(self.Sigma,self.v_tilde)
                 self.iterations += 1
+                print self.tau_tilde[i]
             #Sigma recomptutation with Cholesky decompositon
             Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
             B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
@@ -116,7 +128,7 @@ class EP:
         For nomenclature see ... 2013.
         """
 
-        #TODO: this doesn;t work with uncertain inputs! 
+        #TODO: this doesn;t work with uncertain inputs!
 
         """
         Prior approximation parameters:
@@ -251,14 +263,13 @@ class EP:
         self.np1 = [self.tau_tilde.copy()]
         self.np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.arange(self.N)
-            random.shuffle(update_order)
+            update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
                 self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
                 #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
                 Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
                 Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index 37132cf0..eec833b8 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -39,7 +39,7 @@ class Gaussian:
         _95pc = mean + 2.*np.sqrt(var)
         return mean, _5pc, _95pc
 
-    def fit(self):
+    def fit_full(self):
         """
         No approximations needed
         """
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 68fd276a..e153ce15 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -7,6 +7,7 @@ from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
+#from . import EP
 
 class likelihood:
     """
@@ -19,7 +20,7 @@ class likelihood:
         self.location = location
         self.scale = scale
 
-class probit(likelihood):
+class Probit(likelihood):
     """
     Probit likelihood
     Y is expected to take values in {-1,1}
@@ -28,8 +29,6 @@ class probit(likelihood):
     L(x) = \\Phi (Y_i*f_i)
     $$
     """
-    def __init__(self,location=0,scale=1):
-        likelihood.__init__(self,Y,location,scale)
 
     def moments_match(self,data_i,tau_i,v_i):
         """
@@ -47,24 +46,18 @@ class probit(likelihood):
         sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
         return Z_hat, mu_hat, sigma2_hat
 
-    def predictive_values(self,mu,var,all=False):
+    def predictive_values(self,mu,var):
         """
         Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
         """
         mu = mu.flatten()
         var = var.flatten()
         mean = stats.norm.cdf(mu/np.sqrt(1+var))
-        if all:
-            p_05 = np.zeros([mu.size])
-            p_95 = np.ones([mu.size])
-            return mean, p_05, p_95
-        else:
-            return mean
+        p_05 = np.zeros([mu.size])
+        p_95 = np.ones([mu.size])
+        return mean, p_05, p_95
 
-    def _log_likelihood_gradients():
-        return np.zeros(0) # there are no parameters of whcih to compute the gradients
-
-class poisson(likelihood):
+class Poisson(likelihood):
     """
     Poisson likelihood
     Y is expected to take values in {0,1,2,...}
@@ -73,9 +66,6 @@ class poisson(likelihood):
     L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
     $$
     """
-    def __init__(self,Y,location=0,scale=1):
-        assert len(Y[Y<0]) == 0, "Output cannot have negative values"
-        likelihood.__init__(self,Y,location,scale)
 
     def moments_match(self,i,tau_i,v_i):
         """
@@ -134,52 +124,12 @@ class poisson(likelihood):
         sigma2_hat = m2 - mu_hat**2 # Second central moment
         return float(Z_hat), float(mu_hat), float(sigma2_hat)
 
-    def predictive_values(self,mu,var,all=False):
+    def predictive_values(self,mu,var):
         """
         Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
         """
         mean = np.exp(mu*self.scale + self.location)
-        if all:
-            tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
-            p_05 = tmp[:,0]
-            p_95 = tmp[:,1]
-            return mean,p_05,p_95
-        else:
-            return mean
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
-
-    def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
-        assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X,phi,phi.flatten())
-        pb.plot(X_obs,self.Y,'kx',mew=1.5)
-        if samples:
-            phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
-            pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-        if Z is not None:
-            pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
-
-class gaussian(likelihood):
-    """
-    Gaussian likelihood
-    Y is expected to take values in (-inf,inf)
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
-        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
-        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
-        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
-        return Z_hat, mu_hat, sigma2_hat
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
+        tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
+        p_05 = tmp[:,0]
+        p_95 = tmp[:,1]
+        return mean,p_05,p_95
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 793e2585..49c22364 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -8,6 +8,7 @@ from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot, Tango
+from ..likelihoods import EP
 
 class GP(model):
     """
@@ -52,8 +53,10 @@ class GP(model):
             self._Xstd = np.ones((1,self.X.shape[1]))
 
         self.likelihood = likelihood
-        assert self.X.shape[0] == self.likelihood.Y.shape[0]
-        self.N, self.D = self.likelihood.Y.shape
+        #assert self.X.shape[0] == self.likelihood.Y.shape[0]
+        #self.N, self.D = self.likelihood.Y.shape
+        assert self.X.shape[0] == self.likelihood.data.shape[0]
+        self.N, self.D = self.likelihood.data.shape
 
         model.__init__(self)
 
@@ -87,7 +90,11 @@ class GP(model):
         For a Gaussian (or direct: TODO) likelihood, no iteration is required:
         this function does nothing
         """
-        self.likelihood.fit(self.K)
+        self.likelihood.fit_full(self.K)
+        # Recompute K + noise_term
+        self.K = self.kern.K(self.X,slices1=self.Xslices)
+        self.K += self.likelihood.variance
+        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
     def _model_fit_term(self):
         """
@@ -119,7 +126,7 @@ class GP(model):
         """
         return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=self.dL_dK)))
 
-    def _raw_predict(self,_Xnew,slices, full_cov=False):
+    def _raw_predict(self,_Xnew,slices=None, full_cov=False):
         """
         Internal helper function for making predictions, does not account
         for normalisation or likelihood
@@ -129,11 +136,11 @@ class GP(model):
         KiKx = np.dot(self.Ki,Kx)
         if full_cov:
             Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
-            var = Kxx - np.dot(KiKx.T,Kx)
+            var = Kxx - np.dot(KiKx.T,Kx) #NOTE is the shape of v right?
         else:
             Kxx = self.kern.Kdiag(_Xnew, slices=slices)
             var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
-        return mu, var
+        return mu, var[:,None]
 
 
     def predict(self,Xnew, slices=None, full_cov=False):
@@ -170,26 +177,11 @@ class GP(model):
 
         return mean, _5pc, _95pc
 
-    def raw_plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
+
+    def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None):
         """
-        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
-
-        :param samples: the number of a posteriori samples to plot
-        :param which_data: which if the training data to plot (default all)
-        :type which_data: 'all' or a slice object to slice self.X, self.Y
-        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_functions: which of the kernel functions to plot (additively)
-        :type which_functions: list of bools
-        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-
-        Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions using which_data and which_functions
+        Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
         """
-
         if which_functions=='all':
             which_functions = [True]*self.kern.Nparts
         if which_data=='all':
@@ -208,28 +200,47 @@ class GP(model):
 
         if self.X.shape[1]==1:
             Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-            m,v = self._raw_predict(Xnew,slices=which_functions,full_cov=False)
-            lower, upper = m.flatten() - 2.*np.sqrt(v) , m.flatten()+ 2.*np.sqrt(v)
-            gpplot(Xnew,m,lower,upper)
-            pb.plot(X,Y,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
         elif self.X.shape[1]==2:
             resolution = resolution or 50
             xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xtest = np.vstack((xx.flatten(),yy.flatten())).T
-            zz,vv = self._raw_predict(Xtest,slices=which_functions,full_cov=False)
-            zz = zz.reshape(resolution,resolution)
-            pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
-            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
-
+            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
         else:
             raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
+        return Xnew, xmin, xmax
 
-    def plot(self):
+    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+        """
+        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
+
+        :param samples: the number of a posteriori samples to plot
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_functions: which of the kernel functions to plot (additively)
+        :type which_functions: list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+
+        Plot the posterior of the GP.
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - In higher dimensions, we've no implemented this yet !TODO!
+
+        Can plot only part of the data and part of the posterior functions using which_data and which_functions
+        """
         """
         Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
         """
-        pass# TODO!!!!!
+        Xnew, xmin, xmax = self._x_frame()
+        m,v = self._raw_predict(Xnew)
+        if isinstance(self.likelihood,EP):
+            pb.subplot(211)
+        gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
+        pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5)
+        pb.xlim(xmin,xmax)
 
+        if isinstance(self.likelihood,EP):
+            pb.subplot(212)
+            phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v)
+            gpplot(Xnew,phi_m,phi_l,phi_u)
+            pb.plot(self.X,self.likelihood.data,'kx',mew=1.5)
+            pb.xlim(xmin,xmax)
diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index 3b4682e4..bf372869 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -11,6 +11,8 @@ def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.c
         axes = pb.gca()
     mu = mu.flatten()
     x = x.flatten()
+    lower = lower.flatten()
+    upper = upper.flatten()
 
     #here's the mean
     axes.plot(x,mu,color=edgecol,linewidth=2)

From eb04cbed634712aeef55b40fb29ae283bfa4e480 Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Fri, 1 Feb 2013 13:32:13 +0000
Subject: [PATCH 3/4] merged changes in likelihood_functions (James)

---
 GPy/examples/ep_fix.py                  |  4 +-
 GPy/likelihoods/likelihood_functions.py | 69 +------------------------
 GPy/models/GP.py                        |  7 +--
 3 files changed, 5 insertions(+), 75 deletions(-)

diff --git a/GPy/examples/ep_fix.py b/GPy/examples/ep_fix.py
index 83a58bf8..3cb35663 100644
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@@ -36,8 +36,8 @@ m.update_likelihood_approximation()
 #m.checkgrad(verbose=1)
 m.optimize()
 print "Round 2"
-m.update_likelihood_approximation()
+#rm.update_likelihood_approximation()
 
 #m.EPEM()
-#m.plot()
+m.plot()
 #print(m)
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 5e2f0b85..39428c70 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -20,11 +20,7 @@ class likelihood_function:
         self.location = location
         self.scale = scale
 
-<<<<<<< HEAD
-class Probit(likelihood):
-=======
 class probit(likelihood_function):
->>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
     """
     Probit likelihood
     Y is expected to take values in {-1,1}
@@ -33,11 +29,6 @@ class probit(likelihood_function):
     L(x) = \\Phi (Y_i*f_i)
     $$
     """
-<<<<<<< HEAD
-=======
-    def __init__(self,location=0,scale=1):
-        likelihood_function.__init__(self,Y,location,scale)
->>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
 
     def moments_match(self,data_i,tau_i,v_i):
         """
@@ -66,11 +57,7 @@ class probit(likelihood_function):
         p_95 = np.ones([mu.size])
         return mean, p_05, p_95
 
-<<<<<<< HEAD
-class Poisson(likelihood):
-=======
-class poisson(likelihood_function):
->>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
+class Poisson(likelihood_function):
     """
     Poisson likelihood
     Y is expected to take values in {0,1,2,...}
@@ -79,13 +66,6 @@ class poisson(likelihood_function):
     L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
     $$
     """
-<<<<<<< HEAD
-=======
-    def __init__(self,Y,location=0,scale=1):
-        assert len(Y[Y<0]) == 0, "Output cannot have negative values"
-        likelihood_function.__init__(self,Y,location,scale)
->>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
-
     def moments_match(self,i,tau_i,v_i):
         """
         Moments match of the marginal approximation in EP algorithm
@@ -148,54 +128,7 @@ class poisson(likelihood_function):
         Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
         """
         mean = np.exp(mu*self.scale + self.location)
-<<<<<<< HEAD
         tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
         p_05 = tmp[:,0]
         p_95 = tmp[:,1]
         return mean,p_05,p_95
-=======
-        if all:
-            tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
-            p_05 = tmp[:,0]
-            p_95 = tmp[:,1]
-            return mean,mean,p_05,p_95
-        else:
-            return mean
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
-
-    def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
-        assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X,phi,phi.flatten())
-        pb.plot(X_obs,self.Y,'kx',mew=1.5)
-        if samples:
-            phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
-            pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-        if Z is not None:
-            pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
-
-class gaussian(likelihood_function):
-    """
-    Gaussian likelihood
-    Y is expected to take values in (-inf,inf)
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
-        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
-        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
-        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
-        return Z_hat, mu_hat, sigma2_hat
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
->>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 49c22364..ae192618 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -90,11 +90,8 @@ class GP(model):
         For a Gaussian (or direct: TODO) likelihood, no iteration is required:
         this function does nothing
         """
-        self.likelihood.fit_full(self.K)
-        # Recompute K + noise_term
-        self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.K += self.likelihood.variance
-        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
+        self.likelihood.fit_full(self.kern.compute(self.X))
+        self._set_params(self._get_params()) # update the GP
 
     def _model_fit_term(self):
         """

From f941d629e68ac1611619d3455cc8c628ce59035e Mon Sep 17 00:00:00 2001
From: Ricardo Andrade <acq11ra@sheffield.ac.uk>
Date: Fri, 1 Feb 2013 13:45:55 +0000
Subject: [PATCH 4/4] James' debugging of the EP/GP interface

It seems that the GP-EP algorithm works now.
---
 GPy/examples/ep_fix.py |  4 ++--
 GPy/likelihoods/EP.py  | 11 +++++++----
 GPy/models/GP.py       |  4 ++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/GPy/examples/ep_fix.py b/GPy/examples/ep_fix.py
index 3cb35663..d1747025 100644
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@@ -4,7 +4,7 @@
 
 """
 Simple Gaussian Processes classification 1D
-Probit likelihood
+probit likelihood
 """
 import pylab as pb
 import numpy as np
@@ -26,7 +26,7 @@ Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
 kernel = GPy.kern.rbf(1)
 
 # Define likelihood
-distribution = GPy.likelihoods.likelihood_functions.Probit()
+distribution = GPy.likelihoods.likelihood_functions.probit()
 likelihood_object = GPy.likelihoods.EP(Y,distribution)
 
 # Model definition
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index 420b138a..a88059b1 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -27,7 +27,7 @@ class EP(likelihood):
 
         #initial values for the GP variables
         self.Y = np.zeros((self.N,1))
-        self.variance = np.zeros((self.N,self.N))#np.eye(self.N)
+        self.covariance_matrix = np.eye(self.N)
         self.Z = 0
         self.YYT = None
 
@@ -50,8 +50,9 @@ class EP(likelihood):
         mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
         self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
 
-        self.Y =  mu_tilde[:,None]
-        self.precsion = self.tau_tilde[:,None]
+        self.Y = mu_tilde[:,None]
+        self.YYT = np.dot(self.Y,self.Y.T)
+        self.precision = self.tau_tilde
         self.covariance_matrix = np.diag(1./self.precision)
 
     def fit_full(self,K):
@@ -61,9 +62,11 @@ class EP(likelihood):
         """
         #Prior distribution parameters: p(f|X) = N(f|0,K)
 
+        self.tau_tilde = np.zeros(self.N)
+        self.v_tilde = np.zeros(self.N)
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
         self.mu = np.zeros(self.N)
-        self.Sigma = K.copy() - self.variance.copy()
+        self.Sigma = K.copy()
 
         """
         Initial values - Cavity distribution parameters:
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index ae192618..e64da2c9 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -65,7 +65,7 @@ class GP(model):
         self.likelihood._set_params(p[self.kern.Nparam:])
 
         self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.K += self.likelihood.variance
+        self.K += self.likelihood.covariance_matrix
 
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
@@ -90,7 +90,7 @@ class GP(model):
         For a Gaussian (or direct: TODO) likelihood, no iteration is required:
         this function does nothing
         """
-        self.likelihood.fit_full(self.kern.compute(self.X))
+        self.likelihood.fit_full(self.kern.K(self.X))
         self._set_params(self._get_params()) # update the GP
 
     def _model_fit_term(self):