diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index 88e688b8..b8b9a261 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -5,6 +5,7 @@ import numpy as np
 from ..core.parameterization import Parameterized, Param
 from ..core.parameterization.transformations import Logexp
 
+
 class WarpingFunction(Parameterized):
     """
     abstract function for warping
@@ -14,28 +15,28 @@ class WarpingFunction(Parameterized):
     def __init__(self, name):
         super(WarpingFunction, self).__init__(name=name)
 
-    def f(self,y,psi):
+    def f(self, y, psi):
         """function transformation
-        y is a list of values (GP training data) of shpape [N,1]
+        y is a list of values (GP training data) of shape [N, 1]
         """
         raise NotImplementedError
 
-    def fgrad_y(self,y,psi):
+    def fgrad_y(self, y, psi):
         """gradient of f w.r.t to y"""
         raise NotImplementedError
 
-    def fgrad_y_psi(self,y,psi):
+    def fgrad_y_psi(self, y, psi):
         """gradient of f w.r.t to y"""
         raise NotImplementedError
 
-    def f_inv(self,z,psi):
+    def f_inv(self, z, psi):
         """inverse function transformation"""
         raise NotImplementedError
 
     def _get_param_names(self):
         raise NotImplementedError
 
-    def plot(self,  xmin, xmax):
+    def plot(self, xmin, xmax):
         psi = self.psi
         y = np.arange(xmin, xmax, 0.01)
         f_y = self.f(y)
@@ -47,22 +48,21 @@ class WarpingFunction(Parameterized):
         plt.title('warping function')
         plt.show()
 
+
 class TanhWarpingFunction(WarpingFunction):
 
-    def __init__(self,n_terms=3):
+    def __init__(self, n_terms=3):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms
         super(TanhWarpingFunction, self).__init__(name='warp_tanh')
 
-    def f(self,y,psi):
+    def f(self, y, psi):
         """
         transform y with f using parameter vector psi
         psi = [[a,b,c]]
         ::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
-
         """
-
         #1. check that number of params is consistent
         assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
         assert psi.shape[1] == 3, 'inconsistent parameter dimensions'
@@ -77,25 +77,19 @@ class TanhWarpingFunction(WarpingFunction):
             z += a*np.tanh(b*(y+c))
         return z
 
-
-    def f_inv(self, y, psi, iterations = 10):
+    def f_inv(self, y, psi, iterations=10):
         """
         calculate the numerical inverse of f
-
         :param iterations: number of N.R. iterations
-
         """
 
         y = y.copy()
         z = np.ones_like(y)
-
         for i in range(iterations):
             z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
-
         return z
 
-
-    def fgrad_y(self, y, psi, return_precalc = False):
+    def fgrad_y(self, y, psi, return_precalc=False):
         """
         gradient of f w.r.t to y ([N x 1])
         returns: Nx1 vector of derivatives, unless return_precalc is true,
@@ -118,16 +112,12 @@ class TanhWarpingFunction(WarpingFunction):
             # return GRAD,S.sum(axis=1),R.sum(axis=1),D.sum(axis=1)
             return GRAD, S, R, D
 
-
         return GRAD
 
-
-    def fgrad_y_psi(self, y, psi, return_covar_chain = False):
+    def fgrad_y_psi(self, y, psi, return_covar_chain=False):
         """
         gradient of f w.r.t to y and psi
-
         returns: NxIx3 tensor of partial derivatives
-
         """
 
         # 1. exponentiate the a and b (positive!)
@@ -141,7 +131,6 @@ class TanhWarpingFunction(WarpingFunction):
             gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
             gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
 
-
         if return_covar_chain:
             covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3))
 
@@ -163,7 +152,7 @@ class TanhWarpingFunction(WarpingFunction):
 
 class TanhWarpingFunction_d(WarpingFunction):
 
-    def __init__(self,n_terms=3):
+    def __init__(self, n_terms=3):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms + 1
@@ -177,15 +166,13 @@ class TanhWarpingFunction_d(WarpingFunction):
         self.link_parameter(self.psi)
         self.link_parameter(self.d)
 
-
-    def f(self,y):
+    def f(self, y):
         """
         Transform y with f using parameter vector psi
         psi = [[a,b,c]]
 
         :math:`f = \\sum_{terms} a * tanh(b*(y+c))`
         """
-
         #1. check that number of params is consistent
         # assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
         # assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
@@ -200,23 +187,19 @@ class TanhWarpingFunction_d(WarpingFunction):
             z += a*np.tanh(b*(y+c))
         return z
 
-
     def f_inv(self, z, max_iterations=1000, y=None):
         """
         calculate the numerical inverse of f
 
         :param max_iterations: maximum number of N.R. iterations
-
         """
 
         z = z.copy()
         if y is None:
-            y = np.ones_like(z) * 0.1
-            #y = np.zeros_like(z)
+            y = np.ones_like(z)
 
         it = 0
         update = np.inf
-        #import ipdb; ipdb.set_trace()
 
         while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
             fy = self.f(y)
@@ -224,29 +207,20 @@ class TanhWarpingFunction_d(WarpingFunction):
             update = (fy - z)/fgrady
             y -= update
             it += 1
-            #print it
-            #print y
         if it == max_iterations:
             print("WARNING!!! Maximum number of iterations reached in f_inv ")
-            #print np.abs(update)
-
         return y
 
-
-    def fgrad_y(self, y,return_precalc = False):
+    def fgrad_y(self, y, return_precalc=False):
         """
         gradient of f w.r.t to y ([N x 1])
 
         :returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
-
         """
-
-
         d = self.d
         mpsi = self.psi
 
         # vectorized version
-
         S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
         R = np.tanh(S)
         D = 1-R**2
@@ -256,23 +230,17 @@ class TanhWarpingFunction_d(WarpingFunction):
         if return_precalc:
             return GRAD, S, R, D
 
-
         return GRAD
 
-
-    def fgrad_y_psi(self, y, return_covar_chain = False):
+    def fgrad_y_psi(self, y, return_covar_chain=False):
         """
         gradient of f w.r.t to y and psi
 
         :returns: NxIx4 tensor of partial derivatives
-
         """
-
-
         mpsi = self.psi
 
-        w, s, r, d = self.fgrad_y(y, return_precalc = True)
-        #print s
+        w, s, r, d = self.fgrad_y(y, return_precalc=True)
         gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
         for i in range(len(mpsi)):
             a,b,c  = mpsi[i]