diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py
index 023f00ae..5f0eb7ff 100644
--- a/GPy/inference/likelihoods.py
+++ b/GPy/inference/likelihoods.py
@@ -122,36 +122,50 @@ class poisson(likelihood):
         mu = v_i/tau_i
         sigma = np.sqrt(1./tau_i)
         def poisson_norm(f):
+            """
+            Product of the likelihood and the cavity distribution
+            """
             pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
             rate = np.exp( (f*self.scale)+self.location)
             poisson = stats.poisson.pmf(float(self.Y[i]),rate)
             return pdf_norm_f*poisson
 
         def log_pnm(f):
+            """
+            Log of poisson_norm
+            """
             return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
 
-        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min()
-        golden_B = np.array([np.log(self.Y[i]),mu]).max()
+        """
+        Golden Search and Simpson's Rule
+        --------------------------------
+        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
+        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
+        """
+        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
+
+        #Golden search
+        golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
+        golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
         golden_A = (golden_A - self.location)/self.scale
         golden_B = (golden_B - self.location)/self.scale
-        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B))
-        width = 3./np.log(max(self.Y[i],2))
+        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
 
-        # Simpson's approximamtion
-        #TODO explain this algorithm
-        A = opt - width
-        B = opt + width
-        K =  10*int(np.log(max(self.Y[i],150)))
-        h = (B-A)/K
-        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)])
-        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]])
-        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]])
+        # Simpson's approximation
+        width = 3./np.log(max(self.Y[i],2))
+        A = opt - width #Lower limit
+        B = opt + width #Upper limit
+        K =  10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
+        h = (B-A)/K # length of the intervals
+        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
+        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
+        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
         first = zeroth*x
         second = first*x
-        Z_hat = sum(zeroth)*h/3
-        mu_hat = sum(first)*h/(3*Z_hat)
-        m2 = sum(second)*h/(3*Z_hat)
-        sigma2_hat = m2 - mu_hat**2
+        Z_hat = sum(zeroth)*h/3 # Zero-th moment
+        mu_hat = sum(first)*h/(3*Z_hat) # First moment
+        m2 = sum(second)*h/(3*Z_hat) # Second moment
+        sigma2_hat = m2 - mu_hat**2 # Second central moment
         return float(Z_hat), float(mu_hat), float(sigma2_hat)
 
     def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
@@ -201,4 +215,3 @@ class gaussian(likelihood):
 
     def _log_likelihood_gradients():
         raise NotImplementedError
-#This is just a test