diff --git a/GPy/examples/ep_fix.py b/GPy/examples/ep_fix.py
index d1747025..440d00aa 100644
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 """
 Simple Gaussian Processes classification 1D
 probit likelihood
@@ -19,25 +18,36 @@ X1 = np.random.normal(5,2,N/2)
 X2 = np.random.normal(10,2,N/2)
 X = np.hstack([X1,X2])[:,None]
 
-# Outputs
+# Output
 Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
 
 # Kernel object
 kernel = GPy.kern.rbf(1)
 
-# Define likelihood
+# Likelihood object
 distribution = GPy.likelihoods.likelihood_functions.probit()
-likelihood_object = GPy.likelihoods.EP(Y,distribution)
+likelihood = GPy.likelihoods.EP(Y,distribution)
 
 # Model definition
-m = GPy.models.GP(X,kernel,likelihood=likelihood_object)
-m.ensure_default_constraints()
-m.update_likelihood_approximation()
-#m.checkgrad(verbose=1)
-m.optimize()
-print "Round 2"
-#rm.update_likelihood_approximation()
+m = GPy.models.GP(X,kernel,likelihood=likelihood)
 
-#m.EPEM()
-m.plot()
-#print(m)
+# Model constraints
+m.ensure_default_constraints()
+
+# Optimize model
+"""
+EPEM runs a loop that consists of two steps:
+1) EP likelihood approximation:
+    m.update_likelihood_approximation()
+2) Parameters optimization:
+    m.optimize()
+"""
+m.EPEM()
+
+# Plot
+pb.subplot(211)
+m.plot_GP()
+pb.subplot(212)
+m.plot_output()
+
+print(m)
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index a88059b1..f01a5017 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -65,8 +65,8 @@ class EP(likelihood):
         self.tau_tilde = np.zeros(self.N)
         self.v_tilde = np.zeros(self.N)
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
-        self.mu = np.zeros(self.N)
-        self.Sigma = K.copy()
+        mu = np.zeros(self.N)
+        Sigma = K.copy()
 
         """
         Initial values - Cavity distribution parameters:
@@ -94,29 +94,27 @@ class EP(likelihood):
             update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
-                print 1./self.Sigma[i,i],self.tau_tilde[i]
+                self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
                 #Marginal moments
                 self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
                 self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                 self.v_tilde[i] = self.v_tilde[i] + Delta_v
                 #Posterior distribution parameters update
-                si=self.Sigma[:,i].reshape(self.N,1)
-                self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
-                self.mu = np.dot(self.Sigma,self.v_tilde)
+                si=Sigma[:,i].reshape(self.N,1)
+                Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
+                mu = np.dot(Sigma,self.v_tilde)
                 self.iterations += 1
-                print self.tau_tilde[i]
             #Sigma recomptutation with Cholesky decompositon
             Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
             B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
             L = jitchol(B)
             V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
-            self.Sigma = K - np.dot(V.T,V)
-            self.mu = np.dot(self.Sigma,self.v_tilde)
+            Sigma = K - np.dot(V.T,V)
+            mu = np.dot(Sigma,self.v_tilde)
             epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
             epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
             self.np1.append(self.tau_tilde.copy())
@@ -139,7 +137,7 @@ class EP(likelihood):
         """
         Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
         KmnKnm = np.dot(Kmn, Kmn.T)
-        KmmiKmn = np.dot(Kmmi,self.Kmn)
+        KmmiKmn = np.dot(Kmmi,Kmn)
         Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
         LLT0 = Kmm.copy()
 
@@ -221,13 +219,13 @@ class EP(likelihood):
         q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
         Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
         """
-        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.P0 = self.Kmn.T
-        self.KmnKnm = np.dot(self.P0.T, self.P0)
-        self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
-        self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
-        self.Diag0 = self.Knn_diag - self.Qnn_diag
-        self.R0 = jitchol(self.Kmmi).T
+        Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
+        P0 = Kmn.T
+        KmnKnm = np.dot(P0.T, P0)
+        KmmiKmn = np.dot(Kmmi,P0.T)
+        Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
+        Diag0 = Knn_diag - Qnn_diag
+        R0 = jitchol(Kmmi).T
 
         """
         Posterior approximation: q(f|y) = N(f| mu, Sigma)
@@ -236,11 +234,11 @@ class EP(likelihood):
         """
         self.w = np.zeros(self.N)
         self.gamma = np.zeros(self.M)
-        self.mu = np.zeros(self.N)
-        self.P = self.P0.copy()
-        self.R = self.R0.copy()
-        self.Diag = self.Diag0.copy()
-        self.Sigma_diag = self.Knn_diag
+        mu = np.zeros(self.N)
+        P = P0.copy()
+        R = R0.copy()
+        Diag = Diag0.copy()
+        Sigma_diag = Knn_diag
 
         """
         Initial values - Cavity distribution parameters:
@@ -268,41 +266,41 @@ class EP(likelihood):
             update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
+                self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
                 #Marginal moments
                 self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
                 #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                 self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                 self.v_tilde[i] = self.v_tilde[i] + Delta_v
                 #Posterior distribution parameters update
-                dtd1 = Delta_tau*self.Diag[i] + 1.
-                dii = self.Diag[i]
-                self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
-                pi_ = self.P[i,:].reshape(1,self.M)
-                self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
-                Rp_i = np.dot(self.R,pi_.T)
-                RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
-                self.R = jitchol(RTR).T
+                dtd1 = Delta_tau*Diag[i] + 1.
+                dii = Diag[i]
+                Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
+                pi_ = P[i,:].reshape(1,self.M)
+                P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
+                Rp_i = np.dot(R,pi_.T)
+                RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
+                R = jitchol(RTR).T
                 self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
-                self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
-                self.RPT = np.dot(self.R,self.P.T)
-                self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-                self.mu = self.w + np.dot(self.P,self.gamma)
+                self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
+                RPT = np.dot(R,P.T)
+                Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+                mu = self.w + np.dot(P,self.gamma)
                 self.iterations += 1
             #Sigma recomptutation with Cholesky decompositon
-            self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
-            self.P = (self.Diag / self.Diag0)[:,None] * self.P0
-            self.RPT0 = np.dot(self.R0,self.P0.T)
-            L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
-            self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
-            self.RPT = np.dot(self.R,self.P.T)
-            self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-            self.w = self.Diag * self.v_tilde
-            self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
-            self.mu = self.w + np.dot(self.P,self.gamma)
+            Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
+            P = (Diag / Diag0)[:,None] * P0
+            RPT0 = np.dot(R0,P0.T)
+            L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
+            R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
+            RPT = np.dot(R,P.T)
+            Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+            self.w = Diag * self.v_tilde
+            self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
+            mu = self.w + np.dot(P,self.gamma)
             epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
             epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
             self.np1.append(self.tau_tilde.copy())
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 39428c70..4f571e14 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -7,7 +7,6 @@ from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
-#from . import EP
 
 class likelihood_function:
     """
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index e64da2c9..0c3ea6b7 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -7,7 +7,7 @@ import pylab as pb
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot, Tango
+from ..util.plot import gpplot,x_frame, Tango
 from ..likelihoods import EP
 
 class GP(model):
@@ -175,37 +175,7 @@ class GP(model):
         return mean, _5pc, _95pc
 
 
-    def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None):
-        """
-        Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
-        """
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
-            which_data = slice(None)
-
-        X = self.X[which_data,:]
-        Y = self.likelihood.Y[which_data,:]
-
-        if plot_limits is None:
-            xmin,xmax = X.min(0),X.max(0)
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-        elif len(plot_limits)==2:
-            xmin, xmax = plot_limits
-        else:
-            raise ValueError, "Bad limits for plotting"
-
-        if self.X.shape[1]==1:
-            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-        elif self.X.shape[1]==2:
-            resolution = resolution or 50
-            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
-        return Xnew, xmin, xmax
-
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+    def plot_GP(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
         """
         Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
 
@@ -223,21 +193,29 @@ class GP(model):
           - In higher dimensions, we've no implemented this yet !TODO!
 
         Can plot only part of the data and part of the posterior functions using which_data and which_functions
-        """
-        """
         Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
         """
-        Xnew, xmin, xmax = self._x_frame()
-        m,v = self._raw_predict(Xnew)
-        if isinstance(self.likelihood,EP):
-            pb.subplot(211)
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+
+        Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
+
+        m,v = self._raw_predict(Xnew, slices=which_functions)
         gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
-        pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5)
+        pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
         pb.xlim(xmin,xmax)
 
-        if isinstance(self.likelihood,EP):
-            pb.subplot(212)
-            phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v)
-            gpplot(Xnew,phi_m,phi_l,phi_u)
-            pb.plot(self.X,self.likelihood.data,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
+    def plot_output(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+        Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
+        m, lower, upper = self.predict(Xnew, slices=which_functions)
+        gpplot(Xnew,m, lower, upper)
+        pb.plot(self.X[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
+        ymin,ymax = self.likelihood.data.min()*1.2,self.likelihood.data.max()*1.2
+        pb.xlim(xmin,xmax)
+        pb.ylim(ymin,ymax)
diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index bf372869..60e3e488 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -70,4 +70,24 @@ def align_subplots(N,M,xlim=None, ylim=None):
         else:
             removeUpperTicks()
 
+def x_frame(X,plot_limits=None,resolution=None):
+    """
+    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
+    """
+    if plot_limits is None:
+        xmin,xmax = X.min(0),X.max(0)
+        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
+    elif len(plot_limits)==2:
+        xmin, xmax = plot_limits
+    else:
+        raise ValueError, "Bad limits for plotting"
 
+    if X.shape[1]==1:
+        Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
+    elif X.shape[1]==2:
+        resolution = resolution or 50
+        xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
+        Xnew = np.vstack((xx.flatten(),yy.flatten())).T
+    else:
+        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+    return Xnew, xmin, xmax